3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
24 Please email comments or questions to the
public Ensembl
25 developers list at <http:
27 Questions may also be sent to the Ensembl help desk at
34 storage of Gene objects
41 -host =>
'ensembldb.ensembl.org',
48 $gene = $gene_adaptor->fetch_by_dbID(1234);
50 $gene = $gene_adaptor->fetch_by_stable_id(
'ENSG00000184129');
52 @genes = @{ $gene_adaptor->fetch_all_by_external_name(
'BRCA2') };
58 $slice_adaptor->fetch_by_region(
'chromosome',
'1', 1, 1000000 );
60 @genes = @{ $gene_adaptor->fetch_all_by_Slice($slice) };
64 This is a database aware adaptor
for the retrieval and storage of gene
71 package Bio::EnsEMBL::DBSQL::GeneAdaptor;
87 # Description: PROTECTED implementation of superclass abstract method.
88 # Returns the names, aliases of the tables to use for queries.
89 # Returntype : list of listrefs of strings
95 return ([
'gene',
'g'], [
'xref',
'x'], [
'external_db',
'exdb']);
101 # Description: PROTECTED implementation of superclass abstract method.
102 # Returns a list of columns to use for queries.
103 # Returntype : list of strings
111 my $created_date = $self->db()->dbc()->from_date_to_seconds(
"g.created_date");
112 my $modified_date = $self->db()->dbc()->from_date_to_seconds(
"g.modified_date");
114 return (
'g.gene_id',
'g.seq_region_id',
'g.seq_region_start',
'g.seq_region_end',
'g.seq_region_strand',
'g.analysis_id',
'g.biotype',
'g.display_xref_id',
'g.description',
'g.source',
'g.is_current',
'g.canonical_transcript_id',
'g.stable_id',
'g.version', $created_date, $modified_date,
'x.display_label',
'x.dbprimary_acc',
'x.description',
'x.version',
'exdb.db_name',
'exdb.status',
'exdb.db_release',
'exdb.db_display_name',
'x.info_type',
'x.info_text');
118 return ([
'xref',
"x.xref_id = g.display_xref_id"], [
'external_db',
"exdb.external_db_id = x.external_db_id"]);
123 Example : @gene_ids = @{$gene_adaptor->list_dbIDs()};
124 Description: Gets an array of
internal ids
for all genes in the current db
125 Arg[1] : <optional>
int. not 0
for the ids to be sorted by the seq_region.
126 Returntype : Listref of Ints
134 my ($self, $ordered) = @_;
136 return $self->_list_dbIDs(
"gene", undef, $ordered);
139 =head2 list_stable_ids
141 Example : @stable_gene_ids = @{$gene_adaptor->list_stable_ids()};
142 Description: Gets an listref of stable ids
for all genes in the current db
143 Returntype : reference to a list of strings
150 sub list_stable_ids {
153 return $self->_list_dbIDs(
"gene",
"stable_id");
156 sub list_seq_region_ids {
159 return $self->_list_seq_region_ids(
'gene');
162 =head2 fetch_by_display_label
164 Arg [1] : String $label - display label of gene to fetch
165 Example : my $gene = $geneAdaptor->fetch_by_display_label(
"BRCA2");
166 Description: Returns the gene which has the given display label or undef
if
167 there is none. If there are more than 1, the gene on the
168 reference slice is reported or
if none are on the reference,
169 the first one is reported.
177 sub fetch_by_display_label {
181 my $constraint =
"x.display_label = ? AND g.is_current = 1";
182 $self->bind_param_generic_fetch($label, SQL_VARCHAR);
183 my @genes = @{$self->generic_fetch($constraint)};
185 if (scalar(@genes) > 1) {
186 foreach my $gene_tmp (@genes) {
187 if ($gene_tmp->slice->is_reference) {
196 } elsif (scalar(@genes) == 1) {
201 } ## end sub fetch_by_display_label
203 =head2 fetch_all_by_display_label
205 Arg [1] : String $label - display label of genes to fetch
206 Example : my @genes = @{$geneAdaptor->fetch_all_by_display_label(
"PPP1R2P1")};
207 Description: Returns all genes which have the given display label or undef
if
216 sub fetch_all_by_display_label {
220 my $constraint =
"x.display_label = ? AND g.is_current = 1";
221 $self->bind_param_generic_fetch($label, SQL_VARCHAR);
222 my $genes = $self->generic_fetch($constraint);
227 =head2 fetch_by_stable_id
230 The stable ID of the gene to retrieve
231 Example : $gene = $gene_adaptor->fetch_by_stable_id(
'ENSG00000148944');
232 Description: Retrieves a gene
object from the database via its stable
id.
233 The gene will be retrieved in its native coordinate system (i.e.
234 in the coordinate system it is stored in the database). It may
235 be converted to a different coordinate system through a call to
236 transform() or transfer(). If the gene or
exon is not found
237 undef is returned instead.
239 Exceptions : if we cant get the gene in given coord system
245 sub fetch_by_stable_id {
246 my ($self, $stable_id) = @_;
248 my $constraint =
"g.stable_id = ? AND g.is_current = 1";
249 $self->bind_param_generic_fetch($stable_id, SQL_VARCHAR);
250 my ($gene) = @{$self->generic_fetch($constraint)};
252 # If we didn't get anything back, desperately try to see if there's
253 # a version number in the stable_id
254 if(!defined($gene) && (my $vindex = rindex($stable_id,
'.'))) {
255 $gene = $self->fetch_by_stable_id_version(substr($stable_id,0,$vindex),
256 substr($stable_id,$vindex+1));
262 =head2 fetch_by_stable_id_version
265 The stable ID of the gene to retrieve
266 Arg [2] : Integer $version
267 The version of the stable_id to retrieve
268 Example : $gene = $gene_adaptor->fetch_by_stable_id(
'ENSG00000148944', 14);
269 Description: Retrieves a gene
object from the database via its stable
id and version.
270 The gene will be retrieved in its native coordinate system (i.e.
271 in the coordinate system it is stored in the database). It may
272 be converted to a different coordinate system through a call to
273 transform() or transfer(). If the gene or
exon is not found
274 undef is returned instead.
276 Exceptions : if we cant get the gene in given coord system
282 sub fetch_by_stable_id_version {
283 my ($self, $stable_id, $version) = @_;
285 # Enforce that version be numeric
286 return unless($version =~ /^\d+$/);
288 my $constraint =
"g.stable_id = ? AND g.version = ? AND g.is_current = 1";
289 $self->bind_param_generic_fetch($stable_id, SQL_VARCHAR);
290 $self->bind_param_generic_fetch($version, SQL_INTEGER);
291 my ($gene) = @{$self->generic_fetch($constraint)};
296 =head2 fetch_all_by_source
298 Arg [1] : String $source
300 The source of the gene to retrieve. You can have as an argument a reference
302 Example : $genes = $gene_adaptor->fetch_all_by_source(
'havana');
303 $genes = $gene_adaptor->fetch_all_by_source([
'ensembl',
'vega']);
304 Description: Retrieves an array reference of gene objects from the database via its source or sources.
305 The gene will be retrieved in its native coordinate system (i.e.
306 in the coordinate system it is stored in the database). It may
307 be converted to a different coordinate system through a call to
308 transform() or transfer(). If the gene or
exon is not found
309 undef is returned instead.
311 Exceptions : if we cant get the gene in given coord system
317 sub fetch_all_by_source {
318 my ($self, $source) = @_;
319 my @genes = @{$self->generic_fetch($self->source_constraint($source))};
323 =head2 source_constraint
325 Arg [1] : String $source
327 The source of the gene to retrieve. You can have as an argument a reference
329 Description: Used internally to generate a SQL constraint to restrict a
transcript query by source
331 Exceptions : If source is not supplied
337 sub source_constraint {
338 my ($self, $sources, $inline_variables) = @_;
339 my $constraint =
"g.is_current = 1";
340 my $in_statement = $self->generate_in_constraint($sources,
'g.source', SQL_VARCHAR, $inline_variables);
341 $constraint .=
" and $in_statement";
345 =head2 count_all_by_source
347 Arg [1] : String $source
349 The source of the gene to retrieve. You can have as an argument a reference
351 Example : $cnt = $gene_adaptor->count_all_by_source(
'ensembl');
352 $cnt = $gene_adaptor->count_all_by_source([
'havana',
'vega']);
353 Description : Retrieves count of gene objects from the database via its source or sources.
360 sub count_all_by_source {
361 my ($self, $source) = @_;
362 return $self->generic_count($self->source_constraint($source));
365 =head2 fetch_all_by_biotype
367 Arg [1] : String $biotype
369 The biotype of the gene to retrieve. You can have as an argument a reference
370 to a list of biotypes
371 Example : $gene = $gene_adaptor->fetch_all_by_biotype(
'protein_coding');
372 $gene = $gene_adaptor->fetch_all_by_biotypes([
'protein_coding',
'sRNA',
'miRNA']);
373 Description: Retrieves an array reference of gene objects from the database via its biotype or biotypes.
374 The genes will be retrieved in its native coordinate system (i.e.
375 in the coordinate system it is stored in the database). It may
376 be converted to a different coordinate system through a call to
377 transform() or transfer(). If the gene or
exon is not found
378 undef is returned instead.
380 Exceptions : if we cant get the gene in given coord system
386 sub fetch_all_by_biotype {
387 my ($self, $biotype) = @_;
388 my @genes = @{$self->generic_fetch($self->biotype_constraint($biotype))};
392 =head2 biotype_constraint
394 Arg [1] : String $biotypes
396 The biotype of the gene to retrieve. You can have as an argument a reference
397 to a list of biotypes
398 Description: Used internally to generate a SQL constraint to restrict a gene query by biotype
400 Exceptions : If biotype is not supplied
406 sub biotype_constraint {
407 my ($self, $biotypes, $inline_variables) = @_;
408 my $constraint =
"g.is_current = 1";
409 my $in_statement = $self->generate_in_constraint($biotypes,
'g.biotype', SQL_VARCHAR, $inline_variables);
410 $constraint .=
" and $in_statement";
414 =head2 count_all_by_biotype
416 Arg [1] : String $biotype
418 The biotype of the gene to retrieve. You can have as an argument a reference
419 to a list of biotypes
420 Example : $cnt = $gene_adaptor->count_all_by_biotype(
'protein_coding');
421 $cnt = $gene_adaptor->count_all_by_biotypes([
'protein_coding',
'sRNA',
'miRNA']);
422 Description : Retrieves count of gene objects from the database via its biotype or biotypes.
429 sub count_all_by_biotype {
430 my ($self, $biotype) = @_;
431 return $self->generic_count($self->biotype_constraint($biotype));
436 my $constraint =
'g.biotype != "LRG_gene" and g.is_current = 1';
437 my @genes = @{$self->generic_fetch($constraint)};
441 =head2 fetch_all_versions_by_stable_id
443 Arg [1] : String $stable_id
444 The stable ID of the gene to retrieve
445 Example : $gene = $gene_adaptor->fetch_all_versions_by_stable_id
447 Description : Similar to fetch_by_stable_id, but retrieves all versions of a
448 gene stored in the database.
450 Exceptions :
if we cant get the gene in given coord system
456 sub fetch_all_versions_by_stable_id {
457 my ($self, $stable_id) = @_;
459 my $constraint =
"g.stable_id = ?";
460 $self->bind_param_generic_fetch($stable_id, SQL_VARCHAR);
461 return $self->generic_fetch($constraint);
464 =head2 fetch_by_exon_stable_id
467 The stable
id of an
exon of the gene to retrieve
468 Example : $gene = $gene_adptr->fetch_by_exon_stable_id(
'ENSE00000148944');
469 Description: Retrieves a gene
object from the database via an
exon stable
id.
470 The gene will be retrieved in its native coordinate system (i.e.
471 in the coordinate system it is stored in the database). It may
472 be converted to a different coordinate system through a call to
473 transform() or transfer(). If the gene or
exon is not found
474 undef is returned instead.
482 sub fetch_by_exon_stable_id {
483 my ($self, $stable_id, $version) = @_;
488 exon_transcript as et,
490 WHERE t.transcript_id = et.transcript_id
491 AND et.exon_id = e.exon_id
496 my $sth = $self->prepare($sql);
497 $sth->bind_param(1, $stable_id, SQL_VARCHAR);
500 my ($dbID) = $sth->fetchrow_array();
502 return undef
if (!defined($dbID));
504 my $gene = $self->fetch_by_dbID($dbID);
507 } ## end sub fetch_by_exon_stable_id
509 =head2 fetch_all_by_domain
511 Arg [1] : String $domain
512 The domain to fetch genes from
513 Example : my @genes = @{ $gene_adaptor->fetch_all_by_domain($domain) };
514 Description: Retrieves a listref of genes whose translation contain interpro
515 domain $domain. The genes are returned in their native coord
516 system (i.e. the coord_system they are stored in). If the coord
517 system needs to be changed, then tranform or transfer should be
518 called on the individual objects returned.
519 Returntype : list of Bio::EnsEMBL::Genes
526 sub fetch_all_by_domain {
527 my ($self, $domain) = @_;
529 throw(
"domain argument is required") unless ($domain);
531 my $sth = $self->prepare(
540 WHERE cs.species_id = ?
541 AND cs.coord_system_id = sr.coord_system_id
542 AND sr.seq_region_id = tr.seq_region_id
543 AND tr.is_current = 1
544 AND tr.transcript_id = tl.transcript_id
545 AND tl.translation_id = pf.translation_id
546 AND pf.hit_name = i.id
547 AND i.interpro_ac = ?
548 GROUP BY tr.gene_id));
550 $sth->bind_param(1, $self->species_id(), SQL_VARCHAR);
551 $sth->bind_param(2, $domain, SQL_VARCHAR);
555 my @array = @{$sth->fetchall_arrayref()};
558 my @gene_ids =
map { $_->[0] } @array;
560 return $self->fetch_all_by_dbID_list(\@gene_ids);
561 } ## end sub fetch_all_by_domain
563 =head2 fetch_all_by_Slice_and_external_dbname_link
566 The slice to fetch genes on.
567 Arg [2] : (optional)
string $logic_name
568 the logic name of the type of features to obtain
569 Arg [3] : (optional)
boolean $load_transcripts
570 if true, transcripts will be loaded immediately
571 rather than lazy loaded later.
573 Name of the external database to fetch the Genes by
574 Example : @genes = @{
575 $ga->fetch_all_by_Slice_and_external_dbname_link(
576 $slice, undef, undef,
"HGNC" ) };
577 Description: Overrides superclass method to optionally load
578 transcripts immediately rather than lazy-loading them
579 later. This is more efficient when there are a lot
580 of genes whose transcripts are going to be used. The
581 genes are then filtered to
return only those with
582 external database links of the type specified
583 Returntype : reference to list of genes
590 sub fetch_all_by_Slice_and_external_dbname_link {
591 my ($self, $slice, $logic_name, $load_transcripts, $db_name) = @_;
593 # Get the external_db_id(s) from the name.
594 my $dbentry_adaptor = $self->db()->get_DBEntryAdaptor();
595 my $external_db_ids = $dbentry_adaptor->get_external_db_ids($db_name, undef,
'ignore release');
597 if (scalar(@{$external_db_ids}) == 0) {
598 my $external_db_names = $dbentry_adaptor->get_distinct_external_dbs();
599 my $available = join(
"\n",
map {
"\t${_}"} @{$external_db_names});
600 warning sprintf(
"Could not find external database " .
"'%s' in the external_db table\n" .
"Available are:\n%s", $db_name, $available);
604 # Get the gene_ids for those with links.
607 foreach my $local_external_db_id (@{$external_db_ids}) {
608 my @linked_genes = $dbentry_adaptor->list_gene_ids_by_external_db_id($local_external_db_id);
609 $linked_genes{$_} = 1
for @linked_genes;
612 # Get all the genes on the slice and filter by the gene ids list
613 my $genes = $self->fetch_all_by_Slice($slice, $logic_name, $load_transcripts);
614 my $genes_passed = [ grep { exists $linked_genes{$_->dbID()} } @{$genes} ];
615 return $genes_passed;
616 } ## end sub fetch_all_by_Slice_and_external_dbname_link
618 =head2 fetch_all_by_Slice
621 The slice to fetch genes on.
622 Arg [2] : (optional)
string $logic_name
623 the logic name of the type of features to obtain
624 Arg [3] : (optional)
boolean $load_transcripts
625 if true, transcripts will be loaded immediately rather than
627 Arg [4] : (optional)
string $source
628 the source name of the features to obtain.
629 Arg [5] : (optional)
string biotype
630 the biotype of the features to obtain.
631 Example : @genes = @{$gene_adaptor->fetch_all_by_Slice()};
632 Description: Overrides superclass method to optionally load transcripts
633 immediately rather than lazy-loading them later. This
634 is more efficient when there are a lot of genes whose
635 transcripts are going to be used.
636 Returntype : reference to list of genes
638 Caller : Slice::get_all_Genes
643 sub fetch_all_by_Slice {
644 my ($self, $slice, $logic_name, $load_transcripts, $source, $biotype) = @_;
646 my $constraint =
'g.is_current = 1';
648 if (defined($source)) {
649 $constraint .=
" and g.source = '$source'";
651 if (defined($biotype)) {
652 my $inline_variables = 1;
653 $constraint .=
" and ".$self->generate_in_constraint($biotype,
'g.biotype', SQL_VARCHAR, $inline_variables);
656 my $genes = $self->SUPER::fetch_all_by_Slice_constraint($slice, $constraint, $logic_name);
658 # If there are 0 genes, still do lazy-loading.
659 if (!$load_transcripts || @$genes < 1) {
663 # Preload all of the transcripts now, instead of lazy loading later,
664 # faster than one query per transcript.
666 # First check if transcripts are already preloaded.
667 # FIXME: Should check all transcripts.
668 if (exists($genes->[0]->{
'_transcript_array'})) {
672 # Get extent of region spanned by transcripts.
673 my ($min_start, $max_end);
674 foreach my $g (@$genes) {
675 if (!defined($min_start) || $g->seq_region_start() < $min_start) {
676 $min_start = $g->seq_region_start();
678 if (!defined($max_end) || $g->seq_region_end() > $max_end) {
679 $max_end = $g->seq_region_end();
685 if ($min_start >= $slice->start() && $max_end <= $slice->end()) {
688 my $sa = $self->db()->get_SliceAdaptor();
689 $ext_slice = $sa->fetch_by_region($slice->coord_system->name(), $slice->seq_region_name(), $min_start, $max_end, $slice->strand(), $slice->coord_system->version());
692 # Associate transcript identifiers with genes.
694 my %g_hash =
map { $_->dbID => $_ } @{$genes};
696 my $g_id_str = join(
',', keys(%g_hash));
698 my $sth = $self->prepare(
"SELECT gene_id, transcript_id " .
"FROM transcript " .
"WHERE gene_id IN ($g_id_str)");
703 $sth->bind_columns(\($g_id, $tr_id));
707 while ($sth->fetch()) {
708 $tr_g_hash{$tr_id} = $g_hash{$g_id};
711 my $ta = $self->db()->get_TranscriptAdaptor();
712 my $transcripts = $ta->fetch_all_by_Slice($ext_slice, 1, undef, sprintf(
"t.transcript_id IN (%s)", join(
',', sort { $a <=> $b } keys(%tr_g_hash))));
714 # Move transcripts onto gene slice, and add them to genes.
715 foreach my $tr (@{$transcripts}) {
716 if (!exists($tr_g_hash{$tr->dbID()})) { next }
719 if ($slice != $ext_slice) {
720 $new_tr = $tr->transfer($slice);
721 if (!defined($new_tr)) {
722 throw(
"Unexpected. " .
"Transcript could not be transfered onto Gene slice.");
728 $tr_g_hash{$tr->dbID()}->add_Transcript($new_tr);
732 } ## end sub fetch_all_by_Slice
734 =head2 count_all_by_Slice
737 The slice to count genes on.
738 Arg [2] : (optional) biotype(s)
string or arrayref of strings
739 the biotype of the features to count.
740 Arg [1] : (optional)
string $source
741 the source name of the features to count.
742 Example : $cnt = $gene_adaptor->count_all_by_Slice();
743 Description: Method to count genes on a given slice, filtering by biotype and source
750 sub count_all_by_Slice {
751 my ($self, $slice, $biotype, $source) = @_;
753 my $constraint =
'g.is_current = 1';
754 if (defined($source)) {
755 $constraint .=
" and g.source = '$source'";
757 if (defined($biotype)) {
758 $constraint .=
" and " . $self->biotype_constraint($biotype);
761 return $self->count_by_Slice_constraint($slice, $constraint);
764 =head2 fetch_by_transcript_id
766 Arg [1] : Int $trans_id
767 Unique database identifier
for the
transcript whose gene should
768 be retrieved. The gene is returned in its native coord
769 system (i.e. the coord_system it is stored in). If the coord
770 system needs to be changed, then tranform or transfer should
771 be called on the returned
object. undef is returned
if the
772 gene or
transcript is not found in the database.
773 Example : $gene = $gene_adaptor->fetch_by_transcript_id(1241);
774 Description: Retrieves a gene from the database via the database identifier
775 of one of its transcripts.
783 sub fetch_by_transcript_id {
784 my ($self, $trans_id) = @_;
786 # this is a cheap SQL call
787 my $sth = $self->prepare(
791 WHERE tr.transcript_id = ?
794 $sth->bind_param(1, $trans_id, SQL_INTEGER);
797 my ($geneid) = $sth->fetchrow_array();
801 return undef
if (!defined $geneid);
803 my $gene = $self->fetch_by_dbID($geneid);
807 =head2 fetch_by_transcript_stable_id
809 Arg [1] :
string $trans_stable_id
810 transcript stable ID whose gene should be retrieved
811 Example : my $gene = $gene_adaptor->fetch_by_transcript_stable_id
813 Description: Retrieves a gene from the database via the stable ID of one of
822 sub fetch_by_transcript_stable_id {
823 my ($self, $trans_stable_id) = @_;
825 my $sth = $self->prepare(
833 $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR);
836 my ($geneid) = $sth->fetchrow_array();
839 return undef
if (!defined $geneid);
841 my $gene = $self->fetch_by_dbID($geneid);
845 =head2 fetch_by_translation_stable_id
847 Arg [1] : String $translation_stable_id
848 The stable
id of a translation of the gene to be obtained
849 Example : my $gene = $gene_adaptor->fetch_by_translation_stable_id
851 Description: Retrieves a gene via the stable
id of one of its translations.
859 sub fetch_by_translation_stable_id {
860 my ($self, $translation_stable_id) = @_;
862 my $sth = $self->prepare(
867 WHERE tl.stable_id = ?
868 AND tr.transcript_id = tl.transcript_id
869 AND tr.is_current = 1
872 $sth->bind_param(1, $translation_stable_id, SQL_VARCHAR);
875 my ($geneid) = $sth->fetchrow_array();
877 if (!defined $geneid) {
880 return $self->fetch_by_dbID($geneid);
883 =head2 fetch_all_by_external_name
885 Arg [1] : String $external_name
886 The external identifier
for the gene to be obtained
887 Arg [2] : (optional) String $external_db_name
888 The name of the external database from which the
889 identifier originates.
890 Arg [3] : Boolean
override. Force SQL regex matching
for users
891 who really
do want to find all
'NM%'
892 Example : @genes = @{$gene_adaptor->fetch_all_by_external_name(
'BRCA2')}
893 @many_genes = @{$gene_adaptor->fetch_all_by_external_name(
'BRCA%')}
894 Description: Retrieves a list of genes with an external database
895 identifier $external_name. The genes returned are in
896 their native coordinate system, i.e. in the coordinate
897 system they are stored in the database in. If another
898 coordinate system is required then the Gene::transfer or
899 Gene::transform method can be used.
900 SQL wildcards % and _ are supported in the $external_name,
901 but their use is somewhat restricted
for performance reasons.
902 Users that really
do want % and _ in the first three characters
903 should use argument 3 to prevent optimisations
906 Caller : goview, general
911 sub fetch_all_by_external_name {
912 my ($self, $external_name, $external_db_name, $override) = @_;
914 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
916 my @ids = $entryAdaptor->list_gene_ids_by_extids($external_name, $external_db_name, $override);
918 my %genes_by_dbIDs =
map { $_->dbID(), $_ } @{$self->fetch_all_by_dbID_list(\@ids)};
920 my @features =
map { $genes_by_dbIDs{$_} } @ids;
921 my @reference = grep { $_->slice()->is_reference() } @features;
922 my @non_reference = grep { ! $_->slice()->is_reference() } @features;
923 return [ @reference, @non_reference ];
926 =head2 fetch_all_by_description
928 Arg [1] : String of description
929 Example : $gene_list = $gene_adaptor->fetch_all_by_description(
'RNA%');
930 Description: Fetches genes by their textual description. Fully supports SQL
931 wildcards, since getting an exact hit is unlikely.
936 sub fetch_all_by_description {
937 my ($self,$description) = @_;
939 my $constraint =
"g.description LIKE ?";
940 $self->bind_param_generic_fetch($description, SQL_VARCHAR);
941 return $self->generic_fetch($constraint);
944 =head2 fetch_all_by_GOTerm
947 The GO term
for which genes should be fetched.
950 $gene_adaptor->fetch_all_by_GOTerm(
951 $go_adaptor->fetch_by_accession(
'GO:0030326') ) };
953 Description : Retrieves a list of genes that are associated with
954 the given GO term, or with any of its descendent
955 GO terms. The genes returned are in their native
956 coordinate system, i.e. in the coordinate system
957 in which they are stored in the database. If
958 another coordinate system is required then the
959 Gene::transfer or Gene::transform method can be
963 Exceptions : Throws of argument is not a GO term
969 sub fetch_all_by_GOTerm {
970 my ($self, $term) = @_;
972 assert_ref($term,
'Bio::EnsEMBL::OntologyTerm');
973 if ($term->ontology() ne
'GO') {
974 throw(
'Argument is not a GO term');
977 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
980 foreach my $accession (
map { $_->accession() } ($term, @{$term->descendants()})) {
981 my @ids = $entryAdaptor->list_gene_ids_by_extids($accession,
'GO');
982 foreach my $dbID (@ids) { $unique_dbIDs{$dbID} = 1 }
985 my @result = @{$self->fetch_all_by_dbID_list([sort { $a <=> $b } keys(%unique_dbIDs)])};
990 =head2 fetch_all_by_ontology_linkage_type
992 Arg [1] : (optional)
string $db_name
993 The database name to search
for. Defaults to GO
994 Arg [2] :
string $linkage_type
995 Linkage type to search
for e.g. IMP
997 Example: my $genes = $gene_adaptor->fetch_all_by_ontology_linkage_type(
'GO',
'IMP');
998 my $genes = $gene_adaptor->fetch_all_by_ontology_linkage_type(undef,
'IMP');
1000 Description : Retrieves a list of genes that are associated with
1001 the given ontology linkage type. The genes returned
1002 are in their native coordinate system, i.e. in the
1003 coordinate system in which they are stored in the database.
1005 Exceptions : Throws
if a linkage type is not given
1011 sub fetch_all_by_ontology_linkage_type {
1012 my ($self, $db_name, $linkage_type) = @_;
1013 $db_name =
'GO' if ! defined $db_name;
1014 throw "No linkage type given" if ! defined $linkage_type;
1016 my $dbentry_adaptor = $self->db->get_DBEntryAdaptor();
1017 my $external_db_ids = $dbentry_adaptor->get_external_db_ids($db_name, undef,
'ignore release');
1018 if (scalar(@{$external_db_ids}) == 0) {
1019 warning sprintf(
"Could not find external database '%s' in the external_db table", $db_name);
1023 # Get the gene_ids for those with links.
1025 foreach my $local_external_db_id (@{$external_db_ids}) {
1026 my @gene_ids = $dbentry_adaptor->list_gene_ids_by_external_db_id($local_external_db_id, $linkage_type);
1027 $unique_dbIDs{$_} = 1
for @gene_ids;
1030 # Get all the genes and return
1031 return $self->fetch_all_by_dbID_list([keys %unique_dbIDs]);
1034 =head2 fetch_all_by_GOTerm_accession
1037 The GO term
accession for which genes should be
1043 @{ $gene_adaptor->fetch_all_by_GOTerm_accession(
1046 Description : Retrieves a list of genes that are associated with
1047 the given GO term, or with any of its descendent
1048 GO terms. The genes returned are in their native
1049 coordinate system, i.e. in the coordinate system
1050 in which they are stored in the database. If
1051 another coordinate system is required then the
1052 Gene::transfer or Gene::transform method can be
1056 Exceptions : Throws of argument is not a GO term
accession
1062 sub fetch_all_by_GOTerm_accession {
1063 my ($self, $accession) = @_;
1065 if ($accession !~ /^GO:/) {
1066 throw(
'Argument is not a GO term accession');
1071 my $term = $goAdaptor->fetch_by_accession($accession);
1073 return $self->fetch_all_by_GOTerm($term);
1076 =head2 fetch_all_alt_alleles
1079 The gene to fetch alternative alleles
for
1080 Arg [2] : Boolean (optional)
1081 Ask the method to warn
about any gene without an alt allele
1082 group. Defaults to
false
1083 Example : my @alt_genes = @{ $gene_adaptor->fetch_all_alt_alleles($gene) };
1084 foreach my $alt_gene (@alt_genes) {
1085 print
"Alternate allele: " . $alt_gene->stable_id() .
"\n" ;
1087 Description: Retrieves genes which are alternate alleles to a provided gene.
1088 Alternate alleles in Ensembl are genes which are similar and are
1089 on an alternative haplotype of the same region. There are not
1090 currently very many of these. This method will
return a
1091 reference to an empty list
if no alternative alleles are found.
1093 Exceptions :
throw if incorrect arg provided
1094 warning
if gene arg does not have an entry in an alt allele and
if
1095 the warn flag is
true
1096 Caller : Gene::get_all_alt_alleles
1101 sub fetch_all_alt_alleles {
1106 if (!ref($gene) || !$gene->isa(
'Bio::EnsEMBL::Gene')) {
1107 throw(
'Bio::EnsEMBL::Gene argument is required');
1110 my $gene_id = $gene->
dbID();
1113 warning(
'Cannot retrieve alternate alleles for gene without dbID');
1117 my $aaga = $self->db->get_adaptor(
'AltAlleleGroup');
1118 my $aag = $aaga->fetch_by_gene_id($gene->dbID);
1121 warning(
"Supplied gene has no alternative alleles");
1125 # query for all alternative genes. do not filter
1126 # the representative but do filter this gene out
1127 return $aag->get_all_Genes(undef, [$gene]);
1128 } ## end sub fetch_all_alt_alleles
1133 Description: Used to determine whether a given Gene is the representative
1134 Gene of an alt allele group. If it does not have an alternative
1135 allele that is more representative, then
this ID will be said to
1137 Returntype : Boolean - True
for yes or no alternatives
1142 my ($self, $gene_id) = @_;
1143 my $aag = $self->db->get_adaptor(
'AltAlleleGroup')->fetch_by_gene_id($gene_id);
1144 if (defined($aag)) {
1145 if ($aag->rep_Gene_id == $gene_id) {
1153 throw(
"Unhandled circumstance in GeneAdaptor->is_ref");
1156 =head2 store_alt_alleles
1159 Arg [1] : reference to list of Bio::EnsEMBL::Genes $genes
1160 Example : $gene_adaptor->store_alt_alleles([$gene1, $gene2, $gene3]);
1161 Description: This method creates a group of alternative alleles (i.e. locus)
1162 from a set of genes. The genes should be genes from alternate
1163 haplotypes which are similar. The genes must already be stored
1164 in
this database. WARNING - now that more fine-grained support
1165 for alt_alleles has been implemented,
this method is rather coarse.
1166 Consider working directly with AltAlleleGroup and
1167 AltAlleleGroupAdaptor.
1168 Returntype :
int alt_allele_group_id or undef
if no alt_alleles were stored
1169 Exceptions :
throw on incorrect arguments
1170 throw on sql error (e.g. duplicate unique
id)
1176 sub store_alt_alleles {
1180 warning
"Unsupported. Switch to using AltAlleleGroupAdaptor::store() and AltAlleleGroups";
1182 if (!ref($genes) eq
'ARRAY') {
1183 throw(
'List reference of Bio::EnsEMBL::Gene argument expected.');
1185 my @genes = @$genes;
1186 my $num_genes = scalar(@genes);
1187 if ($num_genes < 2) {
1188 warning(
'At least 2 genes must be provided to construct alternative alleles (gene id: ' . $genes[0]->dbID() .
'). Ignoring.');
1193 foreach my $gene (@$genes) {
1195 push @$aa_record, $gene->dbID;
1197 if ($gene->slice->is_reference()) {
1198 $type{
'IS_REPRESENTATIVE'} = 1;
1200 push @$aa_record, \%type;
1201 push @$allele_list, $aa_record;
1205 -MEMBERS => $allele_list,
1207 if (scalar( @{$aag->get_all_members_with_type(
'IS_REPRESENTATIVE')} ) != 1) {
1208 warning(
'Inappropriate number of alternative alleles on the reference sequence. Ignoring.');
1212 my $aaga = $self->db->get_adaptor(
'AltAlleleGroup');
1213 return $aaga->store($aag);
1214 } ## end sub store_alt_alleles
1219 The gene to store in the database
1220 Arg [2] : ignore_release in xrefs [
default 1] set to 0 to use release
info
1221 in external database references
1222 Arg [3] : prevent coordinate recalculation
if you are persisting
1223 transcripts with
this gene
1224 Arg [4] : prevent copying supporting features across exons
1225 increased speed
for lost accuracy
1226 Example : $gene_adaptor->store($gene);
1227 Description: Stores a gene in the database.
1228 Returntype : the database identifier (dbID) of the newly stored gene
1230 $gene does not have an analysis
object
1237 my ($self, $gene, $ignore_release, $skip_recalculating_coordinates, $skip_exon_sf) = @_;
1239 if (!ref $gene || !$gene->isa(
'Bio::EnsEMBL::Gene')) {
1240 throw(
"Must store a gene object, not a $gene");
1242 if (!defined($ignore_release)) {
1243 $ignore_release = 1;
1245 my $db = $self->db();
1247 if ($gene->is_stored($db)) {
1248 return $gene->
dbID();
1251 # ensure coords are correct before storing
1252 $gene->recalculate_coordinates();
1254 my $analysis = $gene->analysis();
1255 throw(
"Genes must have an analysis object.")
if (!defined($analysis));
1258 if ($analysis->is_stored($db)) {
1259 $analysis_id = $analysis->dbID();
1261 $analysis_id = $db->get_AnalysisAdaptor->store($analysis);
1264 my $type = $gene->get_Biotype->name;
1266 # default to is_current = 1 if this attribute is not set
1267 my $is_current = $gene->is_current;
1268 $is_current = 1 unless (defined($is_current));
1270 my $original = $gene;
1271 my $original_transcripts = $gene->get_all_Transcripts();
1275 ($gene, $seq_region_id) = $self->_pre_store($gene);
1287 canonical_transcript_id
1293 if (defined($gene->stable_id)) {
1294 push @columns,
'stable_id',
'version';
1296 my $created = $self->db->dbc->from_seconds_to_date($gene->created_date());
1297 my $modified = $self->db->dbc->from_seconds_to_date($gene->modified_date());
1300 push @canned_columns,
'created_date';
1301 push @canned_values, $created;
1304 push @canned_columns,
'modified_date';
1305 push @canned_values, $modified;
1310 my $columns = join(
', ', @columns, @canned_columns);
1311 my $values = join(
', ', (
'?') x @columns, @canned_values);
1312 my $store_gene_sql = qq(
1313 INSERT INTO gene ( $columns ) VALUES ( $values )
1316 my $sth = $self->prepare($store_gene_sql);
1317 $sth->bind_param(1, $type, SQL_VARCHAR);
1318 $sth->bind_param(2, $analysis_id, SQL_INTEGER);
1319 $sth->bind_param(3, $seq_region_id, SQL_INTEGER);
1320 $sth->bind_param(4, $gene->start(), SQL_INTEGER);
1321 $sth->bind_param(5, $gene->end(), SQL_INTEGER);
1322 $sth->bind_param(6, $gene->strand(), SQL_TINYINT);
1323 $sth->bind_param(7, $gene->description(), SQL_LONGVARCHAR);
1324 $sth->bind_param(8, $gene->source(), SQL_VARCHAR);
1325 $sth->bind_param(9, $is_current, SQL_TINYINT);
1327 # Canonical transcript ID will be updated later.
1328 # Set it to zero for now.
1329 $sth->bind_param(10, 0, SQL_TINYINT);
1332 if (defined($gene->stable_id)) {
1334 $sth->bind_param(11, $gene->stable_id, SQL_VARCHAR);
1335 $sth->bind_param(12, $gene->version, SQL_INTEGER);
1341 my $gene_dbID = $self->last_insert_id(
'gene_id', undef,
'gene');
1343 # store the dbentries associated with this gene
1344 my $dbEntryAdaptor = $db->get_DBEntryAdaptor();
1346 foreach my $dbe (@{$gene->get_all_DBEntries}) {
1347 $dbEntryAdaptor->store($dbe, $gene_dbID,
"Gene", $ignore_release);
1350 # We allow transcripts not to share equal exons and instead have
1351 # copies. For the database we still want sharing though, to have
1352 # easier time with stable ids. So we need to have a step to merge
1353 # exons together before store.
1356 foreach my $trans (@{$gene->get_all_Transcripts}) {
1357 foreach my $e (@{$trans->get_all_Exons}) {
1358 my $key = $e->hashkey();
1359 if (exists $exons{$key}) {
1360 $trans->swap_exons($e, $exons{$key}, $skip_exon_sf);
1367 my $transcript_adaptor = $db->get_TranscriptAdaptor();
1369 my $transcripts = $gene->get_all_Transcripts();
1371 my $new_canonical_transcript_id;
1372 for (my $i = 0; $i < @$transcripts; $i++) {
1373 my $new = $transcripts->[$i];
1374 my $old = $original_transcripts->[$i];
1376 my $new_dbID = $transcript_adaptor->store($new, $gene_dbID, $analysis_id, $skip_recalculating_coordinates);
1377 $new = $transcript_adaptor->fetch_by_dbID($new_dbID);
1380 if (!defined($new_canonical_transcript_id) && $new->is_canonical()) {
1381 $new_canonical_transcript_id = $new->dbID();
1384 # update the original transcripts since we may have made copies of
1385 # them by transforming the gene
1386 $old->dbID($new->dbID());
1387 $old->adaptor($new->adaptor());
1389 if ($new->translation) {
1390 $old->translation->dbID($new->translation()->dbID);
1391 $old->translation->adaptor($new->translation()->adaptor);
1396 if (defined($new_canonical_transcript_id)) {
1397 # Now the canonical transcript has been stored, so update the
1398 # canonical_transcript_id of this gene with the new dbID.
1399 my $sth = $self->prepare(
1402 SET canonical_transcript_id = ?
1406 $sth->bind_param(1, $new_canonical_transcript_id, SQL_INTEGER);
1407 $sth->bind_param(2, $gene_dbID, SQL_INTEGER);
1412 my $transcript_adaptor = $db->get_TranscriptAdaptor();
1413 $transcript_adaptor->update_canonical_attribute($new_canonical_transcript_id);
1416 # update gene to point to display xref if it is set
1417 if (my $display_xref = $gene->display_xref) {
1419 if ($display_xref->is_stored($db)) {
1420 $dxref_id = $display_xref->dbID();
1422 $dxref_id = $dbEntryAdaptor->exists($display_xref);
1425 if (defined($dxref_id)) {
1426 my $sth = $self->prepare(
"UPDATE gene SET display_xref_id = ? WHERE gene_id = ?");
1427 $sth->bind_param(1, $dxref_id, SQL_INTEGER);
1428 $sth->bind_param(2, $gene_dbID, SQL_INTEGER);
1431 $display_xref->dbID($dxref_id);
1432 $display_xref->adaptor($dbEntryAdaptor);
1433 $display_xref->dbID($dxref_id);
1434 $display_xref->adaptor($dbEntryAdaptor);
1436 warning(
"Display_xref " . $display_xref->dbname() .
":" . $display_xref->display_id() .
" is not stored in database.\n" .
"Not storing relationship to this gene.");
1437 $display_xref->dbID(undef);
1438 $display_xref->adaptor(undef);
1442 # store gene attributes if there are any
1443 my $attr_adaptor = $db->get_AttributeAdaptor();
1444 $attr_adaptor->store_on_Gene($gene_dbID, $gene->get_all_Attributes);
1446 # set the adaptor and dbID on the original passed in gene not the
1448 $original->adaptor($self);
1449 $original->dbID($gene_dbID);
1457 the gene to remove from the database
1458 Example : $gene_adaptor->remove($gene);
1459 Description: Removes a gene completely from the database. All associated
1460 transcripts, exons, stable_identifiers, descriptions, etc.
1461 are removed as well. Use with caution!
1463 Exceptions :
throw on incorrect arguments
1464 warning
if gene is not stored in
this database
1474 if (!ref($gene) || !$gene->isa(
'Bio::EnsEMBL::Gene')) {
1475 throw(
"Bio::EnsEMBL::Gene argument expected.");
1478 if (!$gene->is_stored($self->db())) {
1479 warning(
"Cannot remove gene " . $gene->dbID() .
". Is not stored in " .
"this database.");
1483 # remove all object xrefs associated with this gene
1485 my $dbe_adaptor = $self->db()->get_DBEntryAdaptor();
1486 foreach my $dbe (@{$gene->get_all_DBEntries()}) {
1487 $dbe_adaptor->remove_from_object($dbe, $gene,
'Gene');
1490 # remove all alternative allele entries associated with this gene
1491 my $sth = $self->prepare(
"DELETE FROM alt_allele WHERE gene_id = ?");
1492 $sth->bind_param(1, $gene->dbID, SQL_INTEGER);
1496 # remove the attributes associated with this transcript
1497 my $attrib_adaptor = $self->db->get_AttributeAdaptor;
1498 $attrib_adaptor->remove_from_Gene($gene);
1500 # remove all of the transcripts associated with this gene
1501 my $transcriptAdaptor = $self->db->get_TranscriptAdaptor();
1502 foreach my $trans (@{$gene->get_all_Transcripts()}) {
1503 $transcriptAdaptor->remove($trans);
1506 # remove this gene from the database
1508 $sth = $self->prepare(
"DELETE FROM gene WHERE gene_id = ? ");
1509 $sth->bind_param(1, $gene->dbID, SQL_INTEGER);
1513 # unset the gene identifier and adaptor thereby flagging it as unstored
1516 $gene->adaptor(undef);
1521 =head2 get_Interpro_by_geneid
1523 Arg [1] : String $gene_stable_id
1524 The stable ID of the gene to obtain
1526 $gene_adaptor->get_Interpro_by_geneid(
1527 $gene->stable_id() ) };
1528 Description: Gets interpro
accession numbers by gene stable
id. A hack really
1529 - we should have a much more structured system than this.
1530 Returntype : listref of strings (Interpro_acc:description)
1537 sub get_Interpro_by_geneid {
1538 my ($self, $gene_stable_id) = @_;
1541 SELECT i.interpro_ac,
1549 WHERE g.stable_id = ?
1550 AND t.gene_id = g.gene_id
1551 AND t.is_current = 1
1552 AND tl.transcript_id = t.transcript_id
1553 AND tl.translation_id = pf.translation_id
1554 AND i.id = pf.hit_name
1555 AND i.interpro_ac = x.dbprimary_acc);
1557 my $sth = $self->prepare($sql);
1559 $sth->bind_param(1, $gene_stable_id, SQL_VARCHAR);
1565 while ((my $arr = $sth->fetchrow_arrayref())) {
1566 if ($h{$arr->[0]}) { next; }
1568 my $string = $arr->[0] .
":" . $arr->[1];
1569 push(@out, $string);
1573 } ## end sub get_Interpro_by_geneid
1579 Example : $gene_adaptor->update($gene);
1580 Description: Updates the type, analysis, display_xref, is_current and
1581 description of a gene in the database.
1590 my ($self, $gene) = @_;
1593 if (!defined $gene || !ref $gene || !$gene->isa(
'Bio::EnsEMBL::Gene')) {
1594 throw(
"Must update a gene object, not a $gene");
1597 # Get old canonical transcript id
1598 my $sth = $self->prepare(
"SELECT canonical_transcript_id FROM gene WHERE gene_id=?");
1599 $sth->execute($gene->dbID());
1600 my ($old_canonical_transcript_id) = $sth->fetchrow_array();
1603 my $update_gene_sql = qq(
1608 display_xref_id = ?,
1611 canonical_transcript_id = ?,
1616 my $display_xref = $gene->display_xref();
1617 my $display_xref_id;
1619 if ($display_xref && $display_xref->dbID()) {
1620 $display_xref_id = $display_xref->dbID();
1622 $display_xref_id = undef;
1625 $sth = $self->prepare($update_gene_sql);
1627 $sth->bind_param(1, $gene->stable_id(), SQL_VARCHAR);
1628 $sth->bind_param(2, $gene->get_Biotype->name, SQL_VARCHAR);
1629 $sth->bind_param(3, $gene->analysis->dbID(), SQL_INTEGER);
1630 $sth->bind_param(4, $display_xref_id, SQL_INTEGER);
1631 $sth->bind_param(5, $gene->description(), SQL_VARCHAR);
1632 $sth->bind_param(6, $gene->is_current(), SQL_TINYINT);
1634 if (defined($gene->canonical_transcript())) {
1635 $sth->bind_param(7, $gene->canonical_transcript()->dbID(), SQL_INTEGER);
1637 $sth->bind_param(7, 0, SQL_INTEGER);
1639 $sth->bind_param(8, $gene->version(), SQL_TINYINT);
1640 $sth->bind_param(9, $gene->dbID(), SQL_INTEGER);
1644 if (defined($gene->canonical_transcript())) {
1645 my $transcript_adaptor = $self->db()->get_TranscriptAdaptor();
1646 $transcript_adaptor->update_canonical_attribute($gene->canonical_transcript()->dbID(), $old_canonical_transcript_id);
1652 =head2 update_coords
1656 Example : $gene_adaptor->update_coords($gene);
1657 Description: In the
event of a
transcript being removed, coordinates
for the Gene
1658 need to be reset, but update() does not do this. update_coords
1661 Exceptions : thrown if the $gene is not supplied
1667 my ($self, $gene) = @_;
1668 throw(
'Must have a gene to update in order to update it') unless ($gene);
1669 $gene->recalculate_coordinates;
1670 my $update_sql = qq(
1672 SET seq_region_start = ?,
1676 my $sth = $self->prepare($update_sql);
1677 $sth->bind_param(1, $gene->seq_region_start);
1678 $sth->bind_param(2, $gene->seq_region_end);
1679 $sth->bind_param(3, $gene->dbID);
1685 # Arg [1] : StatementHandle $sth
1686 # Arg [2] : Bio::EnsEMBL::AssemblyMapper $mapper
1687 # Arg [3] : Bio::EnsEMBL::Slice $dest_slice
1688 # Description: PROTECTED implementation of abstract superclass method.
1689 # responsible for the creation of Genes
1690 # Returntype : listref of Bio::EnsEMBL::Genes in target coordinate system
1695 sub _objs_from_sth {
1696 my ($self, $sth, $mapper, $dest_slice) = @_;
1699 # This code is ugly because an attempt has been made to remove as many
1700 # function calls as possible for speed purposes. Thus many caches and
1701 # a fair bit of gymnastics is used.
1704 my $sa = $self->db()->get_SliceAdaptor();
1705 my $aa = $self->db()->get_AnalysisAdaptor();
1706 my $dbEntryAdaptor = $self->db()->get_DBEntryAdaptor();
1715 $gene_id, $seq_region_id, $seq_region_start,
1716 $seq_region_end, $seq_region_strand, $analysis_id,
1717 $biotype, $display_xref_id, $gene_description,
1718 $source, $is_current,
1719 $canonical_transcript_id, $stable_id, $version,
1720 $created_date, $modified_date, $xref_display_label,
1721 $xref_primary_acc, $xref_description, $xref_version,
1722 $external_db, $external_status, $external_release,
1723 $external_db_name, $info_type, $info_text
1726 $sth->bind_columns(\(
1727 $gene_id, $seq_region_id, $seq_region_start,
1728 $seq_region_end, $seq_region_strand, $analysis_id,
1729 $biotype, $display_xref_id, $gene_description,
1730 $source, $is_current,
1731 $canonical_transcript_id, $stable_id, $version,
1732 $created_date, $modified_date, $xref_display_label,
1733 $xref_primary_acc, $xref_description, $xref_version,
1734 $external_db, $external_status, $external_release,
1735 $external_db_name, $info_type, $info_text
1738 my $dest_slice_start;
1740 my $dest_slice_strand;
1741 my $dest_slice_length;
1743 my $dest_slice_sr_name;
1744 my $dest_slice_sr_id;
1748 $dest_slice_start = $dest_slice->start();
1749 $dest_slice_end = $dest_slice->end();
1750 $dest_slice_strand = $dest_slice->strand();
1751 $dest_slice_length = $dest_slice->length();
1752 $dest_slice_cs = $dest_slice->coord_system();
1753 $dest_slice_sr_name = $dest_slice->seq_region_name();
1754 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
1755 $asma = $self->db->get_AssemblyMapperAdaptor();
1758 FEATURE:
while($sth->fetch()) {
1760 #get the analysis object
1761 my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
1762 $analysis_hash{$analysis_id} = $analysis;
1764 #need to get the internal_seq_region, if present
1765 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
1766 my $slice = $slice_hash{
"ID:".$seq_region_id};
1769 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
1770 $slice_hash{
"ID:".$seq_region_id} = $slice;
1771 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
1772 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
1775 #obtain a mapper if none was defined, but a dest_seq_region was
1776 if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
1777 $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
1780 my $sr_name = $sr_name_hash{$seq_region_id};
1781 my $sr_cs = $sr_cs_hash{$seq_region_id};
1784 # remap the feature coordinates to another coord system
1785 # if a mapper was provided
1790 if (defined $dest_slice && $mapper->isa(
'Bio::EnsEMBL::ChainedAssemblyMapper') ) {
1791 ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
1792 $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
1795 ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
1796 $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs);
1799 #skip features that map to gaps or coord system boundaries
1800 next FEATURE
if (!defined($seq_region_id));
1802 #get a slice in the coord system we just mapped to
1803 $slice = $slice_hash{
"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
1807 # If a destination slice was provided convert the coords.
1809 if (defined($dest_slice)) {
1810 my $seq_region_len = $dest_slice->seq_region_length();
1812 if ( $dest_slice_strand == 1 ) {
1813 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
1814 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
1816 if ( $dest_slice->is_circular ) {
1817 # Handle circular chromosomes.
1819 if ( $seq_region_start > $seq_region_end ) {
1820 # Looking at a feature overlapping the chromosome origin.
1822 if ( $seq_region_end > $dest_slice_start ) {
1823 # Looking at the region in the beginning of the chromosome
1824 $seq_region_start -= $seq_region_len;
1826 if ( $seq_region_end < 0 ) {
1827 $seq_region_end += $seq_region_len;
1830 if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
1831 # Looking at the region overlapping the chromosome
1832 # origin and a feature which is at the beginning of the
1834 $seq_region_start += $seq_region_len;
1835 $seq_region_end += $seq_region_len;
1841 my $start = $dest_slice_end - $seq_region_end + 1;
1842 my $end = $dest_slice_end - $seq_region_start + 1;
1844 if ($dest_slice->is_circular()) {
1846 if ($dest_slice_start > $dest_slice_end) {
1847 # slice spans origin or replication
1849 if ($seq_region_start >= $dest_slice_start) {
1850 $end += $seq_region_len;
1851 $start += $seq_region_len
if $seq_region_end > $dest_slice_start;
1853 } elsif ($seq_region_start <= $dest_slice_end) {
1855 } elsif ($seq_region_end >= $dest_slice_start) {
1856 $start += $seq_region_len;
1857 $end += $seq_region_len;
1859 } elsif ($seq_region_end <= $dest_slice_end) {
1860 $end += $seq_region_len
if $end < 0;
1862 } elsif ($seq_region_start > $seq_region_end) {
1863 $end += $seq_region_len;
1868 if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
1870 } elsif ($seq_region_start > $seq_region_end) {
1871 if ($seq_region_start <= $dest_slice_end) {
1872 $start -= $seq_region_len;
1873 } elsif ($seq_region_end >= $dest_slice_start) {
1874 $end += $seq_region_len;
1880 $seq_region_start = $start;
1881 $seq_region_end = $end;
1882 $seq_region_strand *= -1;
1884 } ## end
else [
if ( $dest_slice_strand...)]
1886 # Throw away features off the end of the requested slice or on
1887 # different seq_region.
1888 if ($seq_region_end < 1
1889 || $seq_region_start > $dest_slice_length
1890 || ($dest_slice_sr_id != $seq_region_id)) {
1893 $slice = $dest_slice;
1898 if (defined $xref_display_label) {
1900 'dbID' => $display_xref_id,
1901 'adaptor' => $dbEntryAdaptor,
1902 'display_id' => $xref_display_label,
1903 'primary_id' => $xref_primary_acc,
1904 'version' => $xref_version,
1905 'description' => $xref_description,
1906 'release' => $external_release,
1907 'dbname' => $external_db,
1908 'db_display_name' => $external_db_name,
1909 'info_type' => $info_type,
1910 'info_text' => $info_text
1912 $display_xref->status($external_status);
1915 # Finally, create the new Gene.
1918 $self->_create_feature_fast(
1919 'Bio::EnsEMBL::Gene', {
1920 'analysis' => $analysis,
1921 'biotype' => $biotype,
1922 'start' => $seq_region_start,
1923 'end' => $seq_region_end,
1924 'strand' => $seq_region_strand,
1928 'stable_id' => $stable_id,
1929 'version' => $version,
1930 'created_date' => $created_date || undef,
1931 'modified_date' => $modified_date || undef,
1932 'description' => $gene_description,
1933 'external_name' => undef, # will use display_id
1935 'external_db' => $external_db,
1936 'external_status' => $external_status,
1937 'display_xref' => $display_xref,
1938 'source' => $source,
1939 'is_current' => $is_current,
1940 'canonical_transcript_id' => $canonical_transcript_id}));
1942 } ## end
while ($sth->fetch())
1945 } ## end sub _objs_from_sth
1947 =head2 cache_gene_seq_mappings
1949 Example : $gene_adaptor->cache_gene_seq_mappings();
1950 Description: caches all the assembly mappings needed
for genes
1955 : New experimental code
1959 sub cache_gene_seq_mappings {
1962 # get the sequence level to map too
1964 my $sql =
'SELECT name ' .
'FROM coord_system ' .
'WHERE attrib like "%%sequence_level%%"' .
'AND species_id = ?';
1966 my $sth = $self->prepare($sql);
1967 $sth->bind_param(1, $self->species_id(), SQL_INTEGER);
1970 my $sequence_level = $sth->fetchrow_array();
1974 my $csa = $self->db->get_CoordSystemAdaptor();
1975 my $ama = $self->db->get_AssemblyMapperAdaptor();
1977 my $cs1 = $csa->fetch_by_name($sequence_level);
1979 # get level to map to two
1981 my $mcc = $self->db->get_MetaCoordContainerAdaptor();
1982 my $csnew = $mcc->fetch_all_CoordSystems_by_feature_type(
'gene');
1984 foreach my $cs2 (@$csnew) {
1985 my $am = $ama->fetch_by_CoordSystems($cs1, $cs2);
1986 $am->register_all();
1989 } ## end sub cache_gene_seq_mappings
1991 =head2 fetch_all_by_exon_supporting_evidence
1993 Arg [1] : String $hit_name
1994 Name of supporting feature
1995 Arg [2] : String $feature_type
1996 one of
"dna_align_feature" or
"protein_align_feature"
1997 Arg [3] : (optional) Bio::Ensembl::Analysis
1998 Example : $genes = $gene_adaptor->fetch_all_by_exon_supporting_evidence(
1999 'XYZ',
'dna_align_feature');
2000 Description: Gets all the genes with transcripts with exons which have a
2001 specified hit on a particular type of feature. Optionally filter
2004 Exceptions : If feature_type is not of correct type.
2010 sub fetch_all_by_exon_supporting_evidence {
2011 my ($self, $hit_name, $feature_type, $analysis) = @_;
2013 if ($feature_type !~ /(dna)|(protein)_align_feature/) {
2014 throw(
"feature type must be dna_align_feature or protein_align_feature");
2017 my ($anal_from, $anal_where);
2019 $anal_from =
", analysis a ";
2020 $anal_where =
"AND a.analysis_id = f.analysis_id AND a.analysis_id=? ";
2024 SELECT DISTINCT(g.gene_id)
2028 supporting_feature sf,
2031 WHERE g.gene_id = t.gene_id
2032 AND g.is_current = 1
2033 AND t.transcript_id = et.transcript_id
2034 AND et.exon_id = sf.exon_id
2035 AND sf.feature_id = f.${feature_type}_id
2036 AND sf.feature_type = ?
2041 my $sth = $self->prepare($sql);
2043 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
2044 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
2045 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER)
if ($analysis);
2051 while (my $id = $sth->fetchrow_array) {
2052 my $gene = $self->fetch_by_dbID($id);
2053 push(@genes, $gene)
if $gene;
2057 } ## end sub fetch_all_by_exon_supporting_evidence
2059 =head2 fetch_all_by_transcript_supporting_evidence
2061 Arg [1] : String $hit_name
2062 Name of supporting feature
2063 Arg [2] : String $feature_type
2064 one of
"dna_align_feature" or
"protein_align_feature"
2065 Arg [3] : (optional) Bio::Ensembl::Analysis
2066 Example : $genes = $gene_adaptor->fetch_all_by_transcript_supporting_evidence(
'XYZ',
'dna_align_feature');
2067 Description: Gets all the genes with transcripts with evidence
for a
2068 specified hit on a particular type of feature. Optionally filter
2071 Exceptions : If feature_type is not of correct type.
2077 sub fetch_all_by_transcript_supporting_evidence {
2078 my ($self, $hit_name, $feature_type, $analysis) = @_;
2080 if ($feature_type !~ /(dna)|(protein)_align_feature/) {
2081 throw(
"feature type must be dna_align_feature or protein_align_feature");
2084 my ($anal_from, $anal_where);
2086 $anal_from =
", analysis a ";
2087 $anal_where =
"AND a.analysis_id = f.analysis_id AND a.analysis_id=? ";
2091 SELECT DISTINCT(g.gene_id)
2094 transcript_supporting_feature sf,
2097 WHERE g.gene_id = t.gene_id
2098 AND g.is_current = 1
2099 AND t.transcript_id = sf.transcript_id
2100 AND sf.feature_id = f.${feature_type}_id
2101 AND sf.feature_type = ?
2106 my $sth = $self->prepare($sql);
2108 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
2109 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
2110 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER)
if ($analysis);
2116 while (my $id = $sth->fetchrow_array) {
2117 my $gene = $self->fetch_by_dbID($id);
2118 push(@genes, $gene)
if $gene;
2122 } ## end sub fetch_all_by_transcript_supporting_evidence
2125 return ' ORDER BY g.gene_id'