3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
44 # print gene information
45 print(
"gene start:end:strand is "
46 . join(
":",
map { $gene->$_ } qw(start end strand) )
49 # set some additional attributes
50 $gene->stable_id(
'ENSG000001');
51 $gene->description(
'This is the gene description');
55 A representation of a
Gene within the Ensembl system. A gene is a set of one or
56 more alternative transcripts.
62 package Bio::EnsEMBL::Gene;
76 use constant SEQUENCE_ONTOLOGY => {
84 int - start postion of the gene
86 int - end position of the gene
88 int - 1,-1 tehe strand the gene is on
92 string - the stable identifier of
this gene
94 int - the version of the stable identifier of
this gene
95 Arg [-EXTERNAL_NAME] :
96 string - the external database name associated with
this gene
98 string - the name of the database the external name is from
99 Arg [-EXTERNAL_STATUS]:
100 string - the status of the external identifier
103 to label
this gene when it is displayed.
105 Listref of Bio::EnsEMBL::Transcripts -
this gene
's transcripts
107 string - the date the gene was created
108 Arg [-MODIFIED_DATE]:
109 string - the date the gene was last modified
111 string - the genes description
113 string - the biotype e.g. "protein_coding"
115 string - the genes source, e.g. "ensembl"
117 Boolean - specifies if this is the current version of the gene
118 Arg [-CANONICAL_TRANSCRIPT]:
119 Bio::EnsEMBL::Transcript - the canonical transcript of this gene
120 Arg [-CANONICAL_TRANSCRIPT_ID]:
121 integer - the canonical transcript dbID of this gene, if the
122 transcript object itself is not available.
124 Example : $gene = Bio::EnsEMBL::Gene->new(...);
125 Description: Creates a new gene object
126 Returntype : Bio::EnsEMBL::Gene
136 my $class = ref($caller) || $caller;
137 my $self = $class->SUPER::new(@_);
139 $stable_id, $version,
140 $external_name, $type,
141 $external_db, $external_status,
142 $display_xref, $description,
143 $transcripts, $created_date,
144 $modified_date, $confidence,
147 $canonical_transcript_id, $canonical_transcript
150 'STABLE_ID
', 'VERSION
',
151 'EXTERNAL_NAME
', 'TYPE
',
152 'EXTERNAL_DB
', 'EXTERNAL_STATUS
',
153 'DISPLAY_XREF
', 'DESCRIPTION
',
154 'TRANSCRIPTS
', 'CREATED_DATE
',
155 'MODIFIED_DATE
', 'CONFIDENCE
',
158 'CANONICAL_TRANSCRIPT_ID
', 'CANONICAL_TRANSCRIPT
'
165 $self->{'_transcript_array
'} = $transcripts;
166 $self->recalculate_coordinates();
169 $self->stable_id($stable_id);
170 $self->{'created_date
'} = $created_date;
171 $self->{'modified_date
'} = $modified_date;
173 $self->external_name($external_name) if ( defined $external_name );
174 $self->external_db($external_db) if ( defined $external_db );
175 $self->external_status($external_status)
176 if ( defined $external_status );
177 $self->display_xref($display_xref) if ( defined $display_xref );
179 $self->{'biotype
'} = $biotype || $type;
181 $self->description($description);
182 $self->source($source);
185 if ( !defined($version) ) { $version = 1 }
186 $self->{'version
'} = $version;
188 # default to is_current
189 $is_current = 1 unless (defined($is_current));
190 $self->{'is_current
'} = $is_current;
192 # Add the canonical transcript if we were given one, otherwise add the
193 # canonical transcript internal ID if we were given one.
194 if ( defined($canonical_transcript) ) {
195 $self->canonical_transcript($canonical_transcript);
196 } elsif ( defined($canonical_transcript_id) ) {
197 $self->{'canonical_transcript_id
'} = $canonical_transcript_id;
207 Arg [1] : (optional) String - the external name to set
208 Example : $gene->external_name('BRCA2
');
209 Description: Getter/setter for attribute external_name.
210 Returntype : String or undef
220 $self->{'external_name
'} = shift if (@_);
222 if (defined $self->{'external_name
'}) {
223 return $self->{'external_name
'};
226 my $display_xref = $self->display_xref();
228 if (defined $display_xref) {
229 return $display_xref->display_id();
239 Arg [1] : (optional) String - the source to set
240 Example : $gene->source('ensembl
');
241 Description: Getter/setter for attribute source
251 $self->{'source
'} = shift if( @_ );
252 return ( $self->{'source
'} || "ensembl" );
258 Arg [1] : (optional) String - name of external db to set
259 Example : $gene->external_db('HGNC
');
260 Description: Getter/setter for attribute external_db. The db is the one that
261 belongs to the external_name.
272 $self->{'external_db
'} = shift if( @_ );
274 if( exists $self->{'external_db
'} ) {
275 return $self->{'external_db
'};
278 my $display_xref = $self->display_xref();
280 if( defined $display_xref ) {
281 return $display_xref->dbname()
288 =head2 external_status
290 Arg [1] : (optional) String - status of the external db
291 Example : $gene->external_status('KNOWNXREF
');
292 Description: Getter/setter for attribute external_status. The status of
293 the external db of the one that belongs to the external_name.
301 sub external_status {
304 $self->{'_ext_status
'} = shift if ( @_ );
305 return $self->{'_ext_status
'} if exists $self->{'_ext_status
'};
307 my $display_xref = $self->display_xref();
309 if( defined $display_xref ) {
310 return $display_xref->status()
319 Arg [1] : (optional) String - the description to set
320 Example : $gene->description('This is the gene\
's description');
321 Description: Getter/setter
for gene description
331 $self->{
'description'} = shift
if( @_ );
332 return $self->{
'description'};
339 Example :
if ($geneA->equals($geneB)) { ... }
340 Description : Compares two genes
for equality.
341 The test
for eqality goes through the following list
342 and terminates at the first
true match:
345 then the genes are *not* equal.
346 2. If the biotypes differ, then the genes are *not*
348 3. If both genes have stable IDs: if these are the
349 same, the genes are equal, otherwise not.
350 4. If both genes have the same number of transcripts
351 and if these are (when compared pair-wise sorted by
352 start-position and length) the same, then they are
353 equal, otherwise not.
355 Return type : Boolean (0, 1)
357 Exceptions : Thrown if a non-gene is passed as the argument.
362 my ( $self, $gene ) = @_;
364 if ( !defined($gene) ) {
return 0 }
365 if ( $self eq $gene ) {
return 1 }
367 assert_ref( $gene,
'Bio::EnsEMBL::Gene' );
369 my $feature_equals = $self->SUPER::equals($gene);
370 if ( defined($feature_equals) && $feature_equals == 0 ) {
374 if ( $self->get_Biotype->name ne $self->get_Biotype->name ) {
378 if ( defined( $self->stable_id() ) && defined( $gene->stable_id() ) )
380 if ( $self->stable_id() eq $gene->stable_id() ) {
return 1 }
384 my @self_transcripts = sort {
385 $a->start() <=> $b->start() ||
386 $a->length() <=> $b->length()
387 } @{ $self->get_all_Transcripts() };
388 my @gene_transcripts = sort {
389 $a->start() <=> $b->start() ||
390 $a->length() <=> $b->length()
391 } @{ $gene->get_all_Transcripts() };
393 if ( scalar(@self_transcripts) != scalar(@gene_transcripts) ) {
397 while (@self_transcripts) {
398 my $self_transcript = shift(@self_transcripts);
399 my $gene_transcript = shift(@gene_transcripts);
401 if ( !$self_transcript->equals($gene_transcript) ) {
409 =head2 canonical_transcript
412 Example : $gene->canonical_transcript($canonical_transcript);
413 Description: Getter/setter
for the canonical_transcript
415 Exceptions : Throws
if argument is not a
transcript object.
421 sub canonical_transcript {
422 my ( $self, $transcript ) = @_;
424 if ( defined($transcript) ) {
425 # We're attaching a new canonical transcript.
427 assert_ref( $transcript,
'Bio::EnsEMBL::Transcript' );
429 # If there's already a canonical transcript, make sure it doesn't
430 # think it's still canonical.
431 if ( defined( $self->{
'canonical_transcript'} ) ) {
432 $self->{
'canonical_transcript'}->is_canonical(0);
435 $self->{
'canonical_transcript'} = $transcript;
436 $self->{
'canonical_transcript_id'} = $transcript->dbID();
438 $transcript->is_canonical(1);
440 } elsif ( !defined( $self->{
'canonical_transcript'} )
441 && defined( $self->{
'canonical_transcript_id'} )
442 && $self->{
'canonical_transcript_id'} != 0 )
444 # We have not attached a canoncical transcript, but we have the dbID
447 if ( defined( $self->adaptor() ) ) {
448 my $transcript_adaptor =
449 $self->adaptor()->db()->get_TranscriptAdaptor();
451 my $canonical_transcript =
452 $transcript_adaptor->fetch_by_dbID(
453 $self->{
'canonical_transcript_id'} );
455 if ( defined($canonical_transcript) ) {
457 $self->canonical_transcript($canonical_transcript);
461 warning(
"Gene has no adaptor "
462 .
"when trying to fetch canonical transcript." );
465 } ## end elsif ( !defined( $self->...))
467 return $self->{
'canonical_transcript'};
468 } ## end sub canonical_transcript
471 =head2 get_all_Attributes
473 Arg [1] : (optional) String $attrib_code
474 The code of the attribute type to retrieve values
for
475 Example : my ($author) = @{ $gene->get_all_Attributes(
'author') };
476 my @gene_attributes = @{ $gene->get_all_Attributes };
477 Description: Gets a list of Attributes of
this gene.
478 Optionally just get Attributes
for given code.
480 Exceptions : warning
if gene does not have attached adaptor and attempts lazy
487 sub get_all_Attributes {
489 my $attrib_code = shift;
491 if ( ! exists $self->{
'attributes' } ) {
492 if (!$self->adaptor() ) {
496 my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor();
497 $self->{
'attributes'} = $attribute_adaptor->fetch_all_by_Gene($self);
500 if ( defined $attrib_code ) {
501 my @results = grep { uc($_->code()) eq uc($attrib_code) }
502 @{$self->{
'attributes'}};
505 return $self->{
'attributes'};
510 =head2 add_Attributes
514 Example : my $attrib = Bio::EnsEMBL::Attribute->new(...);
515 $gene->add_Attributes($attrib);
516 Description: Adds an Attribute to the Gene. If you add an attribute before
517 you retrieve any from database, lazy loading will be disabled.
519 Exceptions : throw on incorrect arguments
529 if( ! exists $self->{'attributes
'} ) {
530 $self->{'attributes
'} = [];
533 for my $attrib ( @attribs ) {
534 if( ! $attrib->isa( "Bio::EnsEMBL::Attribute" )) {
535 throw( "Argument to add_Attribute has to be an Bio::EnsEMBL::Attribute" );
537 push( @{$self->{'attributes
'}}, $attrib );
546 Arg [1] : Bio::EnsEMBL::DBEntry $dbe
547 The dbEntry to be added
548 Example : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
549 $gene->add_DBEntry($dbe);
550 Description: Associates a DBEntry with this gene. Note that adding DBEntries
551 will prevent future lazy-loading of DBEntries for this gene
552 (see get_all_DBEntries).
554 Exceptions : thrown on incorrect argument type
565 throw('Expected DBEntry argument
');
568 $self->{'dbentries
'} ||= [];
569 push @{$self->{'dbentries
'}}, $dbe;
573 =head2 get_all_DBEntries
575 Arg [1] : (optional) String, external database name,
576 SQL wildcard characters (_ and %) can be used to
579 Arg [2] : (optional) String, external_db type, can be one of
580 ('ARRAY
','ALT_TRANS
','ALT_GENE
','MISC
','LIT
','PRIMARY_DB_SYNONYM
','ENSEMBL
'),
581 SQL wildcard characters (_ and %) can be used to
584 Example : my @dbentries = @{ $gene->get_all_DBEntries() };
585 @dbentries = @{ $gene->get_all_DBEntries('Uniprot%
') };
586 @dbentries = @{ $gene->get_all_DBEntries('%
', 'ENSEMBL
') };}
588 Description: Retrieves DBEntries (xrefs) for this gene. This does
589 *not* include DBEntries that are associated with the
590 transcripts and corresponding translations of this
591 gene (see get_all_DBLinks()).
593 This method will attempt to lazy-load DBEntries
594 from a database if an adaptor is available and no
595 DBEntries are present on the gene (i.e. they have not
596 already been added or loaded).
598 Return type: Listref of Bio::EnsEMBL::DBEntry objects
600 Caller : get_all_DBLinks, GeneAdaptor::store
605 sub get_all_DBEntries {
606 my ( $self, $db_name_exp, $ex_db_type ) = @_;
608 my $cache_name = 'dbentries
';
610 if ( defined($db_name_exp) ) {
611 $cache_name .= $db_name_exp;
614 if ( defined($ex_db_type) ) {
615 $cache_name .= $ex_db_type;
618 # if not cached, retrieve all of the xrefs for this gene
619 if ( !defined( $self->{$cache_name} ) && defined( $self->adaptor() ) )
621 $self->{$cache_name} =
622 $self->adaptor()->db()->get_DBEntryAdaptor()
623 ->fetch_all_by_Gene( $self, $db_name_exp, $ex_db_type );
626 $self->{$cache_name} ||= [];
628 return $self->{$cache_name};
629 } ## end sub get_all_DBEntries
631 =head2 get_all_object_xrefs
633 Arg [1] : (optional) String, external database name
635 Arg [2] : (optional) String, external_db type
637 Example : @oxrefs = @{ $gene->get_all_object_xrefs() };
639 Description: Retrieves xrefs for this gene. This does *not*
640 include xrefs that are associated with the
641 transcripts or corresponding translations of this
642 gene (see get_all_xrefs()).
644 This method will attempt to lazy-load xrefs from a
645 database if an adaptor is available and no xrefs are
646 present on the gene (i.e. they have not already been
649 NB: This method is an alias for the
650 get_all_DBentries() method.
652 Return type: Listref of Bio::EnsEMBL::DBEntry objects
658 sub get_all_object_xrefs {
660 return $self->get_all_DBEntries(@_);
663 =head2 get_all_DBLinks
665 Arg [1] : String database name (optional)
666 SQL wildcard characters (_ and %) can be used to
669 Arg [2] : (optional) String, external database type, can be one of
670 ('ARRAY
','ALT_TRANS
','ALT_GENE
','MISC
','LIT
','PRIMARY_DB_SYNONYM
','ENSEMBL
'),
671 SQL wildcard characters (_ and %) can be used to
674 Example : @dblinks = @{ $gene->get_all_DBLinks() };
675 @dblinks = @{ $gene->get_all_DBLinks('Uniprot%
') };
676 @dblinks = @{ $gene->get_all_DBLinks('%
', 'ENSEMBL
') };}
678 Description: Retrieves *all* related DBEntries for this gene. This
679 includes all DBEntries that are associated with the
680 transcripts and corresponding translations of this
683 If you only want to retrieve the DBEntries
684 associated with the gene (and not the transcript
685 and translations) then you should use the
686 get_all_DBEntries() call instead.
688 Note: Each entry may be listed more than once. No
689 uniqueness checks are done. Also if you put in an
690 incorrect external database name no checks are done
691 to see if this exists, you will just get an empty
694 Return type: Listref of Bio::EnsEMBL::DBEntry objects
701 sub get_all_DBLinks {
702 my ( $self, $db_name_exp, $ex_db_type ) = @_;
705 @{ $self->get_all_DBEntries( $db_name_exp, $ex_db_type ) };
707 # Add all of the transcript and translation xrefs to the return list.
708 foreach my $transcript ( @{ $self->get_all_Transcripts() } ) {
710 @{$transcript->get_all_DBLinks( $db_name_exp, $ex_db_type ) }
719 Arg [1] : String database name (optional)
720 SQL wildcard characters (_ and %) can be used to
723 Example : @xrefs = @{ $gene->get_all_xrefs() };
724 @xrefs = @{ $gene->get_all_xrefs('Uniprot%
') };
726 Description: Retrieves *all* related xrefs for this gene. This
727 includes all xrefs that are associated with the
728 transcripts and corresponding translations of this
731 If you want to retrieve the xrefs associated
732 with only the gene (and not the transcript
733 or translations) then you should use the
734 get_all_object_xrefs() method instead.
736 Note: Each entry may be listed more than once. No
737 uniqueness checks are done. Also if you put in an
738 incorrect external database name no checks are done
739 to see if this exists, you will just get an empty
742 NB: This method is an alias for the
743 get_all_DBLinks() method.
745 Return type: Listref of Bio::EnsEMBL::DBEntry objects
753 return $self->get_all_DBLinks(@_);
758 Example : my @exons = @{ $gene->get_all_Exons };
759 Description: Returns a set of all the exons associated with this gene.
760 Returntype : Listref of Bio::EnsEMBL::Exon objects
774 foreach my $trans ( @{$self->get_all_Transcripts} ) {
775 foreach my $e ( @{$trans->get_all_Exons} ) {
776 $h{$e->start()."-".$e->end()."-".$e->strand()."-".$e->phase()."-".$e->end_phase()} = $e;
780 push @out, values %h;
785 =head2 get_all_Introns
788 Example : my @introns = @{$gene->get_all_Introns()};
789 Description: Returns an listref of the introns in this gene in order.
790 i.e. the first intron in the listref is the 5prime most exon in
792 Returntype : listref to Bio::EnsEMBL::Intron objects
799 sub get_all_Introns {
806 foreach my $trans ( @{$self->get_all_Transcripts} ) {
807 my @exons = @{ $trans->get_all_Exons() };
808 for (my $i = 0; $i < scalar(@exons) - 1; $i++) {
809 my $intron = new Bio::EnsEMBL::Intron($exons[$i], $exons[$i+1]);
810 push (@introns, $intron);
818 =head2 get_all_homologous_Genes
820 Arg[1] : String The compara synonym to use when looking for a database in the
821 registry. If not provided we will use the very first compara database
823 Description: Queries the Ensembl Compara database and retrieves all
824 Genes from other species that are orthologous.
825 REQUIRES properly setup Registry conf file. Meaning that
826 one of the aliases for each core db has to be "Genus species"
827 e.g. "Homo sapiens" (as in the name column in genome_db table
828 in the compara database).
830 The data is cached in this Object for faster re-retreival.
831 Returntype : listref [
833 Bio::EnsEMBL::Compara::Homology,
834 string $species # needed as cannot get spp from Gene
842 sub get_all_homologous_Genes {
843 my ($self, $db_synonym) = @_;
845 #Look for DBAdaptors which have a group of compara; these are compara DBAs.
847 my %args = (-GROUP => 'compara
');
848 $args{-SPECIES} = $db_synonym if $db_synonym;
849 my ($compara_dba) = @{Bio::EnsEMBL::Registry->get_all_DBAdaptors(%args)};
850 unless( $compara_dba ) {
851 throw("No compara found in Bio::EnsEMBL::Registry. Please fully populate the Registry or construct a Bio::EnsEMBL::Compara::DBSQL::DBAdaptor");
853 my $compara_species = $compara_dba->species();
854 if( exists( $self->{'homologues
'}->{$compara_species} ) ){
855 return $self->{'homologues
'}->{$compara_species};
857 $self->{'homologues
'}->{$compara_species} = [];
859 # Get the compara 'member
' corresponding to self
860 my $member_adaptor = $compara_dba->get_adaptor('GeneMember
');
861 my $query_member = $member_adaptor->fetch_by_Gene($self);
862 unless( $query_member ){ return $self->{'homologues
'}->{$compara_species} };
864 # Get the compara 'homologies
' corresponding to 'member
'
865 my $homology_adaptor = $compara_dba->get_adaptor('Homology
');
866 my @homolos = @{$homology_adaptor->fetch_all_by_Member($query_member)};
867 unless( scalar(@homolos) ){ return $self->{'homologues
'}->{$compara_species} };
869 # Get the ensembl 'genes
' corresponding to 'homologies
'
870 foreach my $homolo( @homolos ){
871 foreach my $member( @{$homolo->get_all_GeneMembers} ){
872 my $hstable_id = $member->stable_id;
873 next if ($hstable_id eq $query_member->stable_id); # Ignore self
875 eval { $hgene = $member->get_Gene;} ;
877 # Something up with DB. Create a new gene is best we can do
878 $hgene = Bio::EnsEMBL::Gene->new
879 ( -stable_id=>$hstable_id,
880 -description=>$member->description, );
882 my $hspecies = $member->genome_db->name;
883 push @{$self->{'homologues
'}->{$compara_species}}, [$hgene,$homolo,$hspecies];
886 return $self->{'homologues
'}->{$compara_species};
889 =head2 _clear_homologues
891 Description: Removes any cached homologues from the Gene which could have been
892 fetched from the C<get_all_homologous_Genes()> call.
899 sub _clear_homologues {
901 delete $self->{homologues};
904 =head2 add_Transcript
906 Arg [1] : Bio::EnsEMBL::Transcript $trans
907 The transcript to add to the gene
908 Example : my $transcript = Bio::EnsEMBL::Transcript->new(...);
909 $gene->add_Transcript($transcript);
910 Description: Adds another Transcript to the set of alternatively
911 spliced Transcripts of this gene. If it shares exons
912 with another Transcript, these should be object-identical.
921 my ($self, $trans) = @_;
923 if( !ref $trans || ! $trans->isa("Bio::EnsEMBL::Transcript") ) {
924 throw("$trans is not a Bio::EnsEMBL::Transcript!");
927 $self->{'_transcript_array
'} ||= [];
928 push(@{$self->{'_transcript_array
'}},$trans);
930 $self->recalculate_coordinates();
933 sub remove_Transcript {
934 my ($self,$trans) = @_;
935 if( !ref $trans || ! $trans->isa("Bio::EnsEMBL::Transcript") ) {
936 throw("$trans is not a Bio::EnsEMBL::Transcript!");
938 # Clean transcript from live data
939 $self->get_all_Transcripts; # force lazy load.
940 my $array = $self->{_transcript_array};
941 my $db_id = $trans->dbID;
942 @$array = grep { $_->dbID != $db_id } @$array;
943 # Recalculate and store new gene coordinates
944 $self->adaptor->update_coords($self);
948 =head2 get_all_Transcripts
950 Example : my @transcripts = @{ $gene->get_all_Transcripts };
951 Description: Returns the Transcripts in this gene.
952 Returntype : Listref of Bio::EnsEMBL::Transcript objects
953 Warning : This method returns the internal transcript array
954 used by this object. Avoid any modification
955 of this array. We class use of shift and
956 reassignment of the loop variable when iterating
957 this array as modification.
959 Dereferencing the structure as shown in the example is
960 a safe way of using this data structure.
967 sub get_all_Transcripts {
970 if( ! exists $self->{'_transcript_array
'} ) {
971 if( defined $self->adaptor() ) {
972 my $ta = $self->adaptor()->db()->get_TranscriptAdaptor();
973 my $transcripts = $ta->fetch_all_by_Gene( $self );
974 $self->{'_transcript_array
'} = $transcripts;
978 if (defined $self->{'_transcript_array
'}) {
979 @array_copy = @{ $self->{'_transcript_array
'} } ;
986 =head2 get_all_alt_alleles
988 Example : my @alt_genes = @{ $gene->get_all_alt_alleles };
989 foreach my $alt_gene (@alt_genes) {
990 print "Alternate allele: " . $alt_gene->stable_id() . "\n";
992 Description: Returns a listref of Gene objects that represent this Gene on
993 an alternative haplotype. Empty list if there is no such
994 Gene (eg there is no overlapping haplotype).
995 Returntype : listref of Bio::EnsEMBL::Gene objects
1002 sub get_all_alt_alleles {
1004 my $result = $self->adaptor()->fetch_all_alt_alleles( $self );
1011 Arg [1] : (optional) Int
1012 A version number for the stable_id
1013 Example : $gene->version(2);
1014 Description: Getter/setter for version number
1024 $self->{'version
'} = shift if(@_);
1025 return $self->{'version
'};
1031 Arg [1] : (optional) String - the stable ID to set
1032 Example : $gene->stable_id("ENSG0000000001");
1033 Description: Getter/setter for stable id for this gene.
1043 $self->{'stable_id
'} = shift if(@_);
1044 return $self->{'stable_id
'};
1047 =head2 stable_id_version
1049 Arg [1] : (optional) String - the stable ID with version to set
1050 Example : $gene->stable_id("ENSG0000000001.3");
1051 Description: Getter/setter for stable id with version for this gene.
1059 sub stable_id_version {
1061 if(my $stable_id = shift) {
1062 # See if there's an embedded period, assume that
's a
1063 # version, might not work for some species but you
1064 # should use ->stable_id() and version() if you're worried
1066 my $vindex = rindex($stable_id,
'.');
1067 # Set the stable_id and version pair depending on if
1068 # we found a version delimiter in the stable_id
1069 ($self->{stable_id}, $self->{version}) = ($vindex > 0 ?
1070 (substr($stable_id,0,$vindex), substr($stable_id,$vindex+1)) :
1073 return $self->{stable_id} . ($self->{version} ?
".$self->{version}" :
'');
1078 Arg [1] : Boolean $is_current
1079 Example : $gene->is_current(1)
1080 Description: Getter/setter
for is_current state of
this gene.
1090 $self->{
'is_current'} = shift
if (@_);
1091 return $self->{
'is_current'};
1097 Arg [1] : (optional) String - created date to set (as a UNIX time
int)
1098 Example : $gene->created_date(
'1141948800');
1099 Description: Getter/setter
for attribute created_date
1109 $self->{
'created_date'} = shift
if ( @_ );
1110 return $self->{
'created_date'};
1114 =head2 modified_date
1116 Arg [1] : (optional) String - modified date to set (as a UNIX time
int)
1117 Example : $gene->modified_date(
'1141948800');
1118 Description: Getter/setter
for attribute modified_date
1128 $self->{
'modified_date'} = shift
if ( @_ );
1129 return $self->{
'modified_date'};
1135 Arg [1] : String - coordinate system name to transform to
1136 Arg [2] : String - coordinate system version
1137 Example : my $new_gene = $gene->transform(
'supercontig');
1138 Description: Moves
this gene to the given coordinate system. If
this gene has
1139 Transcripts attached, they move as well.
1141 Exceptions :
throw on wrong parameters
1150 my $new_gene = $self->SUPER::transform(@_);
1152 if ( !defined($new_gene) ) {
1153 # check if this gene projects at all to requested coord system,
1154 # if not we are done.
1155 my @segments = @{ $self->
project(@_) };
1162 # If you are transforming the gene then make sure the transcripts and exons are loaded
1165 foreach my $tran (@{$self->get_all_Transcripts}){
1166 $tran->get_all_Exons();
1169 if( exists $self->{
'_transcript_array'} ) {
1170 my @new_transcripts;
1171 my ( $strand, $slice );
1172 my $low_start = POSIX::INT_MAX;
1173 my $hi_end = POSIX::INT_MIN;
1174 for my $old_transcript ( @{$self->{
'_transcript_array'}} ) {
1175 my $new_transcript = $old_transcript->transform( @_ );
1176 # this can fail if gene transform failed
1178 return undef unless $new_transcript;
1180 if( ! defined $new_gene ) {
1181 if( $new_transcript->start() < $low_start ) {
1182 $low_start = $new_transcript->start();
1184 if( $new_transcript->end() > $hi_end ) {
1185 $hi_end = $new_transcript->end();
1187 $slice = $new_transcript->slice();
1188 $strand = $new_transcript->strand();
1190 push( @new_transcripts, $new_transcript );
1193 if( ! defined $new_gene ) {
1194 %$new_gene = %$self;
1195 bless $new_gene, ref( $self );
1197 $new_gene->start( $low_start );
1198 $new_gene->end( $hi_end );
1199 $new_gene->strand( $strand );
1200 $new_gene->slice( $slice );
1203 $new_gene->{
'_transcript_array'} = \@new_transcripts;
1206 if(exists $self->{attributes}) {
1207 $new_gene->{attributes} = [@{$self->{attributes}}];
1217 Example : my $new_gene = $gene->transfer($slice);
1218 Description: Moves
this Gene to given target slice coordinates. If Transcripts
1219 are attached they are moved as well. Returns a
new gene.
1230 my $new_gene = $self->SUPER::transfer( @_ );
1231 return undef unless $new_gene;
1233 if( exists $self->{
'_transcript_array'} ) {
1234 my @new_transcripts;
1235 for my $old_transcript ( @{$self->{
'_transcript_array'}} ) {
1236 my $new_transcript = $old_transcript->
transfer( @_ );
1237 push( @new_transcripts, $new_transcript );
1239 $new_gene->{
'_transcript_array'} = \@new_transcripts;
1242 if(exists $self->{attributes}) {
1243 $new_gene->{attributes} = [@{$self->{attributes}}];
1253 Example : $gene->display_xref($db_entry);
1254 Description: Getter/setter display_xref
for this gene.
1264 $self->{
'display_xref'} = shift
if(@_);
1265 return $self->{
'display_xref'};
1272 Description: This method returns a
string that is considered to be
1273 the
'display' identifier. For genes
this is (depending on
1274 availability and in
this order) the stable Id, the dbID or an
1278 Caller : web drawing code
1285 return $self->{
'stable_id'} || $self->dbID ||
'';
1289 =head2 recalculate_coordinates
1291 Example : $gene->recalculate_coordinates;
1292 Description: Called when
transcript added to the gene, tries to adapt the
1293 coords
for the gene.
1301 sub recalculate_coordinates {
1304 my $transcripts = $self->get_all_Transcripts();
1306 return if(!$transcripts || !@$transcripts);
1308 my ( $slice, $start, $end, $strand );
1309 $slice = $transcripts->[0]->slice();
1310 $strand = $transcripts->[0]->strand();
1311 $start = $transcripts->[0]->start();
1312 $end = $transcripts->[0]->end();
1314 my $transsplicing = 0;
1316 for my $t ( @$transcripts ) {
1317 if( $t->start() < $start ) {
1318 $start = $t->start();
1321 if( $t->end() > $end ) {
1325 if( $t->slice()->name() ne $slice->name() ) {
1326 throw(
"Transcripts with different slices not allowed on one Gene" );
1329 if( $t->strand() != $strand ) {
1333 if( $transsplicing ) {
1334 warning(
"Gene contained trans splicing event" );
1337 $self->start( $start );
1339 $self->strand( $strand );
1340 $self->slice( $slice );
1344 =head2 get_all_DASFactories
1346 Example : $dasref = $prot->get_all_DASFactories
1347 Description: Retrieves a listref of registered DAS objects
1348 TODO: Abstract to a DBLinkContainer obj
1349 Returntype : [ DAS_objects ]
1356 sub get_all_DASFactories {
1358 return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
1362 =head2 get_all_DAS_Features
1364 Example : $features = $prot->get_all_DAS_Features;
1365 Description: Retrieves a hash reference to a hash of DAS feature
1366 sets, keyed by the DNS, NOTE the values of
this hash
1367 are an anonymous array containing:
1368 (1) a pointer to an array of features
1369 (2) a pointer to the DAS stylesheet
1370 Returntype : hashref of Bio::SeqFeatures
1377 sub get_all_DAS_Features{
1378 my ($self, @args) = @_;
1379 my $slice = $self->feature_Slice;
1380 return $self->SUPER::get_all_DAS_Features($slice);
1386 Arg [1] : Boolean $load_xrefs
1387 Load (or don
't load) xrefs. Default is to load xrefs.
1388 Example : $gene->load();
1389 Description : The Ensembl API makes extensive use of
1390 lazy-loading. Under some circumstances (e.g.,
1391 when copying genes between databases), all data of
1392 an object needs to be fully loaded. This method
1393 loads the parts of the object that are usually
1394 lazy-loaded. It will also call the equivalent
1395 method on all the transcripts of the gene.
1401 my ( $self, $load_xrefs ) = @_;
1403 if ( !defined($load_xrefs) ) { $load_xrefs = 1 }
1405 foreach my $transcript ( @{ $self->get_all_Transcripts() } ) {
1406 $transcript->load($load_xrefs);
1410 $self->get_all_Attributes();
1412 $self->canonical_transcript();
1415 $self->get_all_DBEntries();
1419 =head2 flush_Transcripts
1421 Description : Empties out caches and unsets fields of this Gene.
1422 Beware of further actions without adding some new transcripts.
1423 Example : $gene->flush_Transcripts();
1427 sub flush_Transcripts {
1429 $self->{'_transcript_array
'} = [];
1430 $self->{'canonical_transcript_id
'} = undef;
1431 $self->{'canonical_transcript
'} = undef;
1437 Description: getter setter for the gene attribute is_ref
1438 Arg [1] : (optional) 1 or 0
1444 my ( $self, $is_ref) = @_;
1446 if(defined($is_ref)){
1447 $self->{'is_ref
'} = $is_ref;
1450 $self->{'is_ref
'} = $self->adaptor->is_ref($self->dbID);
1452 return $self->{'is_ref
'};
1455 =head2 summary_as_hash
1457 Example : $gene_summary = $gene->summary_as_hash();
1458 Description : Extends Feature::summary_as_hash
1459 Retrieves a summary of this Gene object.
1461 Returns : hashref of arrays of descriptive strings
1462 Status : Intended for internal use
1465 sub summary_as_hash {
1467 my $summary_ref = $self->SUPER::summary_as_hash;
1468 $summary_ref->{'description
'} = $self->description;
1469 $summary_ref->{'biotype
'} = $self->get_Biotype->name;
1470 $summary_ref->{'Name
'} = $self->external_name if $self->external_name;
1471 $summary_ref->{'logic_name
'} = $self->analysis->logic_name() if defined $self->analysis();
1472 $summary_ref->{'source
'} = $self->source();
1473 $summary_ref->{'gene_id
'} = $summary_ref->{'id'};
1475 ## Will only work for for merged species
1476 my $havana_gene = $self->havana_gene();
1477 $summary_ref->{'havana_gene
'} = $havana_gene->display_id() if defined $havana_gene;
1478 $summary_ref->{'havana_version
'} = $havana_gene->version() if defined $havana_gene;
1480 ## Stable identifier of the parent gene this gene was projected from
1481 my $proj_parent_attributes = $self->get_all_Attributes("proj_parent_g");
1482 if (@{$proj_parent_attributes}) {
1483 $summary_ref->{'projection_parent_gene
'} = $proj_parent_attributes->[0]->value;
1485 return $summary_ref;
1490 Example : $havana_gene = $transcript->havana_gene();
1491 Description : Locates the corresponding havana gene
1492 Returns : Bio::EnsEMBL::DBEntry
1497 my @otts = @{ $self->get_all_DBEntries('Vega_gene
') };
1499 foreach my $xref (@otts) {
1500 if ($xref->display_id() =~ /OTT/) {
1510 Example : my $biotype = $gene->get_Biotype;
1511 Description: Returns the Biotype object of this gene.
1512 When no biotype exists, defaults to 'protein_coding
'.
1513 When used to set to a biotype that does not exist in
1514 the biotype table, a biotype object is created with
1515 the provided argument as name and object_type gene.
1516 Returntype : Bio::EnsEMBL::Biotype
1524 # have a biotype object, return it
1526 return $self->{'biotype
'};
1529 # biotype is first set as a string retrieved from the gene table
1530 # there is no biotype object in the gene object, retrieve it using the biotype string
1531 # if no string, default to protein_coding. this is legacy behaviour and should probably be revisited
1532 my $biotype_name = $self->{'biotype
'} // 'protein_coding
';
1534 return $self->set_Biotype( $biotype_name );
1539 Arg [1] : Arg [1] : String - the biotype name to set
1540 Example : my $biotype = $gene->set_Biotype('protin_coding
');
1541 Description: Sets the Biotype of this gene to the provided biotype name.
1542 Returns the Biotype object of this gene.
1543 When no biotype exists, defaults to 'protein_coding
' name.
1544 When setting a biotype that does not exist in
1545 the biotype table, a biotype object is created with
1546 the provided argument as name and object_type gene.
1547 Returntype : Bio::EnsEMBL::Biotype
1548 Exceptions : If no argument provided
1553 my ( $self, $name ) = @_;
1555 throw('No argument provided
') unless defined $name;
1557 # retrieve biotype object from the biotype adaptor
1558 if( defined $self->adaptor() ) {
1559 my $ba = $self->adaptor()->db()->get_BiotypeAdaptor();
1560 $self->{'biotype
'} = $ba->fetch_by_name_object_type( $name, 'gene
' );
1562 # if $self->adaptor is unavailable, create a new biotype object containing name and object_type only
1564 $self->{'biotype
'} = Bio::EnsEMBL::Biotype->new(
1566 -OBJECT_TYPE => 'gene
',
1570 return $self->{'biotype
'} ;
1574 Arg [1] : (optional) String - the biotype to set
1575 Example : $gene->biotype("protein_coding");
1576 Description: Getter/setter for the attribute biotype name.
1577 Recommended to use instead for a getter:
1578 $biotype = $gene->get_Biotype;
1580 $biotype = $gene->set_Biotype("protein_coding");
1581 The String biotype name can then be retrieved by
1582 calling name on the Biotype object:
1583 $biotype_name = $biotype->name;
1591 my ( $self, $biotype_name) = @_;
1593 # Setter? set_Biotype()
1594 if (defined $biotype_name) {
1595 return $self->set_Biotype($biotype_name)->name;
1598 # Getter? get_Biotype()
1599 return $self->get_Biotype->name;