3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
43 -ANALYSIS => $analysis,
44 -STABLE_ID =>
'ENSE000000123',
48 # seq() returns a Bio::Seq
49 my $seq = $exon->seq->seq();
51 # Peptide only makes sense within transcript context
52 my $pep = $exon->peptide($transcript)->seq();
54 # Normal feature operations can be performed:
55 $exon = $exon->transform(
'clone');
56 $exon->move( $new_start, $new_end, $new_strand );
57 print $exon->slice->seq_region_name();
61 This is a
class which represents an
exon which is part of a
transcript.
73 use Bio::Seq; # exons have to have sequences...
83 use constant SEQUENCE_ONTOLOGY => {
90 Arg [-SLICE]: Bio::EnsEMBL::SLice - Represents the sequence that
this
91 feature is on. The coordinates of the created feature are
92 relative to the start of the slice.
93 Arg [-START]: The start coordinate of
this feature relative to the start
94 of the slice it is sitting on. Coordinates start at 1 and
96 Arg [-END] : The end coordinate of
this feature relative to the start of
97 the slice it is sitting on. Coordinates start at 1 and are
99 Arg [-STRAND]: The orientation of
this feature. Valid values are 1,-1,0.
100 Arg [-SEQNAME] : (optional) A seqname to be used instead of the
default name
101 of the of the slice. Useful
for features that
do not have an
102 attached slice such as protein features.
103 Arg [-dbID] : (optional)
internal database
id
105 Arg [-PHASE] : the phase.
106 Arg [-END_PHASE]: the end phase
107 Arg [-STABLE_ID]: (optional) the stable
id of the
exon
108 Arg [-VERSION] : (optional) the version
109 Arg [-CREATED_DATE] : (optional) the created date
110 Arg [-MODIFIED_DATE]: (optional) the last midifeid date
113 Description: create an
Exon object
115 Exceptions :
if phase is not valid (i.e. 0,1, 2 -1)
124 $class = ref $class || $class;
126 my $self = $class->SUPER::new( @_ );
128 my ( $phase, $end_phase, $stable_id, $version, $created_date,
129 $modified_date, $is_current, $is_constitutive )
131 "PHASE",
"END_PHASE",
132 "STABLE_ID",
"VERSION",
133 "CREATED_DATE",
"MODIFIED_DATE",
134 "IS_CURRENT",
"IS_CONSTITUTIVE"
139 if ( defined($phase) ) { # make sure phase is valid.
140 $self->phase($phase);
143 $self->{
'end_phase'} = $end_phase;
144 $self->{
'stable_id'} = $stable_id;
145 $self->{
'created_date'} = $created_date;
146 $self->{
'modified_date'} = $modified_date;
149 if ( !defined($version) ) { $version = 1 }
150 $self->{
'version'} = $version;
153 if ( !defined($is_current) ) { $is_current = 1 }
154 $self->{
'is_current'} = $is_current;
156 # Default is_constitutive
157 if ( !defined($is_constitutive) ) { $is_constitutive = 0 }
158 $self->{
'is_constitutive'} = $is_constitutive;
166 Arg [1] : (optional)
int $end_phase
167 Example : $end_phase = $feat->end_phase;
168 Description: Gets/Sets the end phase of the
exon.
169 end_phase = number of bases from the last incomplete codon of
171 Usually, end_phase = (phase + exon_length)%3
172 but end_phase could be -1
if the
exon is half-coding and its 3
175 Exceptions : warning
if end_phase is called without an argument and the
185 $self->{
'end_phase'} = shift;
188 if ( !defined( $self->{
'end_phase'} ) ) {
190 $stable_id = $self->stable_id
if defined $self->stable_id;
191 warning(
"No end phase set in Exon ".$stable_id.
". You must set it explicitly.");
194 return $self->{
'end_phase'};
200 Arg [1] : (optional)
int $phase
201 Example : my $phase = $exon->phase;
203 Description: Gets/Sets the phase of the
exon.
205 Exceptions :
throws if phase is not (0, 1 2 or -1).
210 Get or set the phase of the Exon, which tells the
211 translation machinery, which makes a peptide from
212 the DNA, where to start.
214 The Ensembl phase convention can be thought of as
215 "the number of bases of the first codon which are
216 on the previous exon". It is therefore 0, 1 or 2
217 (or -1
if the
exon is non-coding). In ascii art,
218 with alternate codons represented by B<###> and
221 Previous Exon Intron This Exon
222 ...------------- -------------...
225 ...#+++###+++### 0 +++###+++###+...
226 ...+++###+++###+ 1 ++###+++###++...
227 ...++###+++###++ 2 +###+++###+++...
229 Here is another explanation from Ewan:
231 Phase means the place where the intron lands
232 inside the codon - 0 between codons, 1 between
233 the 1st and second base, 2 between the second and
234 3rd base. Exons therefore have a start phase and
235 a end phase, but introns have just one phase.
240 my ($self,$value) = @_;
242 if (defined($value)) {
243 # Value must be 0,1,2, or -1 for non-coding
244 if ($value =~ /^(-1|0|1|2)$/) {
245 #print STDERR "Setting phase to $value\n";
246 $self->{
'phase'} = $value;
248 throw(
"Bad value ($value) for exon phase. Should only be" .
252 return $self->{
'phase'};
259 Example : $frame = $exon->frame
260 Description: Gets the frame of
this exon
262 Exceptions : thrown
if an arg is passed
263 thrown
if frame cannot be calculated due to a bad phase value
270 my ($self,$value) = @_;
272 if( defined $value ) {
273 throw(
"Cannot set frame. Deduced from seq_start and phase");
276 # frame is mod 3 of the translation point
278 if( $self->phase == -1 ) {
279 return '.'; # gff convention
for no frame
info
281 if( $self->phase == 0 ) {
282 return $self->start%3;
285 if( $self->phase == 1 ) {
286 return ($self->start+2)%3;
289 if( $self->phase == 2 ) {
290 return ($self->start+1)%3;
293 throw(
"bad phase in exon ".$self->phase);
300 Arg [1] :
int $start (optional)
301 Example : $start = $exon->start();
302 Description: Getter/Setter
for the start of
this exon. The superclass
303 implmentation is overridden to flush the
internal sequence
304 cache
if this value is altered
314 # if an arg was provided, flush the internal sequence cache
315 delete $self->{
'_seq_cache'}
if(@_);
316 return $self->SUPER::start(@_);
322 Arg [1] :
int $end (optional)
323 Example : $end = $exon->end();
324 Description: Getter/Setter
for the end of
this exon. The superclass
325 implmentation is overridden to flush the
internal sequence
326 cache
if this value is altered
336 # if an arg was provided, flush the internal sequence cache
337 delete $self->{
'_seq_cache'}
if(@_);
338 return $self->SUPER::end(@_);
344 Arg [1] :
int $strand (optional)
345 Example : $start = $exon->strand();
346 Description: Getter/Setter
for the strand of
this exon. The superclass
347 implmentation is overridden to flush the
internal sequence
348 cache
if this value is altered
358 # if an arg was provided, flush the internal sequence cache
359 delete $self->{
'_seq_cache'}
if(@_);
360 return $self->SUPER::strand(@_);
366 The
transcript for which cDNA coordinates should be
368 Example : $cdna_start = $exon->cdna_start($transcript);
369 Description : Returns the start position of the
exon in cDNA
371 Since an
exon may be part of one or more transcripts,
372 the relevant
transcript must be given as argument to
374 Return type : Integer
375 Exceptions : Throws
if the given argument is not a
transcript.
376 Throws
if the first part of the
exon maps into a gap.
377 Throws
if the
exon can not be mapped at all.
384 my ($self, $transcript) = @_;
385 assert_ref($transcript,
'Bio::EnsEMBL::Transcript',
'transcript');
387 my $id = $transcript->dbID();
389 if(defined $id && exists $self->{cdna_start}->{$id}) {
390 return $self->{cdna_start}->{$id};
394 my @coords = $transcript->genomic2cdna($self->seq_region_start(), $self->seq_region_end(), $self->strand());
395 if(@coords && !$coords[0]->isa(
'Bio::EnsEMBL::Mapper::Gap')) {
396 $cdna_start = $coords[0]->start();
399 throw "First part of exon maps into gap";
402 throw "Can not map exon";
406 $self->{cdna_start}->{$id} = $cdna_start;
410 } ## end sub cdna_start
415 The
transcript for which cDNA coordinates should be
417 Example : $cdna_end = $exon->cdna_end($transcript);
418 Description : Returns the end position of the
exon in cDNA
420 Since an
exon may be part of one or more transcripts,
421 the relevant
transcript must be given as argument to
423 Return type : Integer
424 Exceptions : Throws
if the given argument is not a
transcript.
425 Throws
if the last part of the
exon maps into a gap.
426 Throws
if the
exon can not be mapped at all.
433 my ($self, $transcript) = @_;
434 assert_ref($transcript,
'Bio::EnsEMBL::Transcript',
'transcript');
436 my $id = $transcript->dbID();
438 if(defined $id && exists $self->{cdna_end}->{$id}) {
439 return $self->{cdna_end}->{$id};
443 my @coords = $transcript->genomic2cdna($self->seq_region_start(), $self->seq_region_end(), $self->strand());
444 if(@coords && !$coords[-1]->isa(
'Bio::EnsEMBL::Mapper::Gap')) {
445 $cdna_end = $coords[-1]->end();
448 throw "Last part of exon maps into gap";
451 throw "Can not map exon";
455 $self->{cdna_end}->{$id} = $cdna_end;
459 } ## end sub cdna_end
461 =head2 cdna_coding_start
464 The
transcript for which cDNA coordinates should be
466 Example : $cdna_coding_start = $exon->cdna_coding_start($transcript);
467 Description : Returns the start position of the coding region of the
468 exon in cDNA coordinates. Returns undef
if the whole
470 Since an
exon may be part of one or more transcripts,
471 the relevant
transcript must be given as argument to
473 Return type : Integer or undef
474 Exceptions : Throws
if the given argument is not a
transcript.
480 sub cdna_coding_start {
481 my ($self, $transcript) = @_;
482 assert_ref($transcript,
'Bio::EnsEMBL::Transcript',
'transcript');
484 my $id = $transcript->dbID();
486 if(defined $id && exists $self->{cdna_coding_start}->{$id}) {
487 return $self->{cdna_coding_start}->{$id};
490 my $cdna_coding_start;
491 my $transcript_coding_start = $transcript->cdna_coding_start();
492 if(defined $transcript_coding_start) {
493 my $cdna_start = $self->cdna_start($transcript);
495 if ( $transcript_coding_start < $cdna_start ) {
496 # Coding region starts upstream of this exon...
498 if ( $transcript->cdna_coding_end() < $cdna_start ) {
499 # ... and also ends upstream of this exon.
500 $cdna_coding_start = undef;
503 # ... and does not end upstream of this exon.
504 $cdna_coding_start = $cdna_start;
507 # Coding region starts either within or downstream of this
510 if ( $transcript_coding_start <= $self->cdna_end($transcript) ) {
511 # Coding region starts within this exon.
512 $cdna_coding_start = $transcript_coding_start;
515 # Coding region starts downstream of this exon.
516 $cdna_coding_start = undef;
521 $cdna_coding_start = undef;
525 $self->{cdna_coding_start}->{$id} = $cdna_coding_start;
526 $self->{cdna_coding_end}->{$id} = undef
if ! defined $cdna_coding_start;
529 return $cdna_coding_start;
530 } ## end sub cdna_coding_start
532 =head2 cdna_coding_end
535 The
transcript for which cDNA coordinates should be
537 Example : $cdna_coding_end = $exon->cdna_coding_end($transcript);
538 Description : Returns the end position of the coding region of the
539 exon in cDNA coordinates. Returns undef
if the whole
541 Since an
exon may be part of one or more transcripts,
542 the relevant
transcript must be given as argument to
544 Return type : Integer or undef
545 Exceptions : Throws
if the given argument is not a
transcript.
551 sub cdna_coding_end {
552 my ($self, $transcript) = @_;
553 assert_ref($transcript,
'Bio::EnsEMBL::Transcript',
'transcript');
555 my $id = $transcript->dbID();
557 if(defined $id && exists $self->{cdna_coding_end}->{$id}) {
558 return $self->{cdna_coding_end}->{$id};
562 my $transcript_coding_end = $transcript->cdna_coding_end();
563 if(defined $transcript_coding_end) {
564 my $cdna_end = $self->cdna_end($transcript);
566 if ( $transcript_coding_end > $cdna_end ) {
568 # Coding region ends downstream of this exon...
569 if ( $transcript->cdna_coding_start() > $cdna_end ) {
570 # ... and also starts downstream of this exon.
571 $cdna_coding_end = undef;
574 # ... and does not start downstream of this exon.
575 $cdna_coding_end = $cdna_end;
579 # Coding region ends either within or upstream of this
582 if ( $transcript_coding_end >= $self->cdna_start($transcript) ) {
583 # Coding region ends within this exon.
584 $cdna_coding_end = $transcript_coding_end;
587 # Coding region ends upstream of this exon.
588 $cdna_coding_end = undef;
593 $cdna_coding_end = undef;
597 $self->{cdna_coding_end}->{$id} = $cdna_coding_end;
598 $self->{cdna_coding_start}->{$id} = undef
if ! defined $cdna_coding_end;
601 return $cdna_coding_end;
602 } ## end sub cdna_coding_end
604 =head2 coding_region_start
607 Example : $coding_region_start =
609 Description : Returns the start position of the coding region
610 of the
exon in slice-relative coordinates on the
611 forward strand. Returns undef
if the whole
exon is
613 Since an
exon may be part of one or more transcripts,
614 the relevant
transcript must be given as argument to
616 Return type : Integer or undef
617 Exceptions : Throws
if the given argument is not a
transcript.
623 # The implementation of this method is analogous to the implementation
624 # of cdna_coding_start().
626 sub coding_region_start {
627 my ($self, $transcript) = @_;
628 assert_ref($transcript,
'Bio::EnsEMBL::Transcript',
'transcript');
630 my $id = $transcript->dbID();
632 if(defined $id && exists $self->{coding_region_start}->{$id}) {
633 return $self->{coding_region_start}->{$id};
636 my $coding_region_start;
637 my $transcript_coding_start = $transcript->coding_region_start();
638 if(defined $transcript_coding_start) {
639 my $start = $self->start();
641 if ( $transcript_coding_start < $start ) {
642 # Coding region starts upstream of this exon...
644 if ( $transcript->coding_region_end() < $start ) {
645 # ... and also ends upstream of this exon.
646 $coding_region_start = undef;
649 # ... and does not end upstream of this exon.
650 $coding_region_start = $start;
654 # Coding region starts either within or downstream of this
657 if ( $transcript_coding_start <= $self->end() ) {
658 # Coding region starts within this exon.
659 $coding_region_start = $transcript_coding_start;
662 # Coding region starts downstream of this exon.
663 $coding_region_start = undef;
668 $coding_region_start = undef;
672 $self->{coding_region_start}->{$id} = $coding_region_start;
673 $self->{coding_region_end}->{$id} = undef
if ! defined $coding_region_start;
676 return $coding_region_start;
677 } ## end sub coding_region_start
679 =head2 coding_region_end
682 Example : $coding_region_end =
684 Description : Returns the end position of the coding region of
685 the
exon in slice-relative coordinates on the
686 forward strand. Returns undef
if the whole
exon is
688 Since an
exon may be part of one or more transcripts,
689 the relevant
transcript must be given as argument to
691 Return type : Integer or undef
692 Exceptions : Throws
if the given argument is not a
transcript.
698 # The implementation of this method is analogous to the implementation
699 # of cdna_coding_end().
701 sub coding_region_end {
702 my ($self, $transcript) = @_;
703 assert_ref($transcript,
'Bio::EnsEMBL::Transcript',
'transcript');
705 my $id = $transcript->dbID();
707 if(defined $id && exists $self->{coding_region_end}->{$id}) {
708 return $self->{coding_region_end}->{$id};
711 my $coding_region_end;
712 my $transcript_coding_end = $transcript->coding_region_end();
713 if(defined $transcript_coding_end) {
715 my $end = $self->end();
716 if($transcript_coding_end > $end) {
717 # Coding region ends downstream of this exon...
719 if ( $transcript->coding_region_start() > $end ) {
720 # ... and also starts downstream of this exon.
721 $coding_region_end = undef;
724 # ... and does not start downstream of this exon.
725 $coding_region_end = $end;
729 # Coding region ends either within or upstream of this
731 if ( $transcript_coding_end >= $self->start() ) {
732 $coding_region_end = $transcript_coding_end;
735 $coding_region_end = undef;
740 # This is a non-coding transcript.
741 $coding_region_end = undef;
745 $self->{coding_region_end}->{$id} = $coding_region_end;
746 $self->{coding_region_start}->{$id} = undef
if ! defined $coding_region_end;
749 return $coding_region_end;
750 } ## end sub coding_region_end
758 Example : $rank = $exon->rank($transcript);
759 Description : Returns the rank of the
exon relative to
761 Since an
exon may be part of one or more transcripts,
762 the relevant
transcript must be given as argument to
764 Return type : Integer
765 Exceptions : Throws
if the given argument is not a
transcript.
773 my ($self, $transcript) = @_;
774 assert_ref($transcript,
'Bio::EnsEMBL::Transcript',
'transcript');
776 my $rank = $transcript->exon_rank($self);
784 Example : $slice = $exon->
slice();
785 Description: Getter/Setter
for the slice
this exon is on. The superclass
786 implmentation is overridden to flush the
internal sequence
787 cache
if this value is altered
796 my ( $self, $slice ) = @_;
798 if ( defined($slice) ) {
799 # If a new slice was provided, flush the internal sequence cache and
800 # transfer all supporting evidence to the new slice.
802 delete $self->{
'_seq_cache'};
804 if ( exists( $self->{
'_supporting_evidence'} ) ) {
807 for my $old_feature ( @{ $self->{
'_supporting_evidence'} } ) {
811 if ( defined( $old_feature->slice() ) ) {
812 $new_feature = $old_feature->transfer($slice);
814 # If the old feature does not have a slice, assume transfer is
816 $new_feature = $old_feature;
819 push( @new_features, $new_feature );
822 $self->{
'_supporting_evidence'} = \@new_features;
825 return $self->SUPER::slice($slice);
827 return $self->SUPER::slice(undef);
829 return $self->SUPER::slice();
836 Example :
if ($exonA->equals($exonB)) { ... }
837 Description : Compares two exons
for equality.
838 The test
for eqality goes through the following list
839 and terminates at the first
true match:
842 then the exons are *not* equal.
843 2. If both exons have stable IDs: if these are the
844 same, the exons are equal, otherwise not.
845 3. If the exons have the same start, end, strand, and
846 phase, then they are equal, otherwise not.
848 Return type : Boolean (0, 1)
850 Exceptions : Thrown if a non-
transcript is passed as the argument.
855 my ( $self, $exon ) = @_;
857 if ( !defined($exon) ) {
return 0 }
858 if ( $self eq $exon ) {
return 1 }
860 assert_ref( $exon,
'Bio::EnsEMBL::Exon' );
862 my $feature_equals = $self->SUPER::equals($exon);
863 if ( defined($feature_equals) && $feature_equals == 0 ) {
867 if ( defined( $self->stable_id() ) && defined( $exon->stable_id() ) )
869 if ( $self->stable_id() eq $exon->stable_id() ) {
877 if ( $self->start() == $exon->start() &&
878 $self->end() == $exon->end() &&
879 $self->strand() == $exon->strand() &&
880 $self->phase() == $exon->phase() &&
881 $self->end_phase() == $exon->end_phase() )
893 Arg [3] : (optional)
int strand
895 Description: Sets the start, end and strand in one call rather than in
896 3 seperate calls to the start(), end() and strand() methods.
897 This is for convenience and for speed when this needs to be
898 done within a tight loop. This overrides the superclass
899 move() method so that the internal sequence cache can be
900 flushed if the
exon if moved.
902 Exceptions : Thrown is invalid arguments are provided
910 # flush the internal sequence cache
911 delete $self->{
'_seq_cache'};
912 return $self->SUPER::move(@_);
918 Arg 1 : String $coordinate_system_name
919 Arg [2] : String $coordinate_system_version
920 Description: moves
this exon to the given coordinate system. If
this exon has
921 attached supporting evidence, they move as well.
923 Exceptions : wrong parameters
932 my $new_exon = $self->SUPER::transform( @_ );
933 if (not defined $new_exon or
934 $new_exon->length != $self->length) {
938 if( exists $self->{
'_supporting_evidence'} ) {
940 for my $old_feature ( @{$self->{
'_supporting_evidence'}} ) {
941 my $new_feature = $old_feature->
transform( @_ );
942 if (defined $new_feature) {
943 push( @new_features, $new_feature );
946 $new_exon->{
'_supporting_evidence'} = \@new_features;
949 #dont want to share the same sequence cache
950 delete $new_exon->{
'_seq_cache'};
960 Description: Moves
this Exon to given target slice coordinates. If Features
961 are attached they are moved as well. Returns a
new exon.
972 my $new_exon = $self->SUPER::transfer( @_ );
973 return undef unless $new_exon;
975 if( exists $self->{
'_supporting_evidence'} ) {
977 for my $old_feature ( @{$self->{
'_supporting_evidence'}} ) {
978 my $new_feature = $old_feature->
transfer( @_ );
979 push( @new_features, $new_feature );
981 $new_exon->{
'_supporting_evidence'} = \@new_features;
984 #dont want to share the same sequence cache
985 delete $new_exon->{
'_seq_cache'};
991 =head2 add_supporting_features
994 Example : $exon->add_supporting_features(@features);
995 Description: Adds a list of supporting features to
this exon.
996 Duplicate features are not added.
997 If supporting features are added manually in
this
998 way, prior to calling get_all_supporting_features then the
999 get_all_supporting_features call will not retrieve supporting
1000 features from the database.
1002 Exceptions :
throw if any of the features are not Feature
1003 throw if any of the features are not in the same coordinate
1010 sub add_supporting_features {
1011 my ($self,@features) = @_;
1013 return unless @features;
1015 $self->{_supporting_evidence} ||= [];
1017 # check whether this feature object has been added already
1018 FEATURE:
foreach my $feature (@features) {
1019 unless($feature && $feature->isa(
"Bio::EnsEMBL::Feature")) {
1020 throw(
"Supporting feat [$feature] not a " .
1021 "Bio::EnsEMBL::Feature");
1024 if ((defined $self->slice() && defined $feature->slice())&&
1025 ( $self->slice()->name() ne $feature->slice()->name())){
1026 throw(
"Supporting feat not in same coord system as exon\n" .
1027 "exon is attached to [".$self->slice()->name().
"]\n" .
1028 "feat is attached to [".$feature->slice()->name().
"]");
1031 foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
1033 if ( $feature == $added_feature ){
1034 # this feature has already been added
1039 # no duplicate was found, add the feature
1040 push(@{$self->{_supporting_evidence}},$feature);
1045 =head2 flush_supporting_features
1047 Example : $exon->flush_supporting_features;
1048 Description : Removes all supporting evidence from the
exon.
1049 Return type : (Empty) listref
1056 sub flush_supporting_features {
1058 $self->{
'_supporting_evidence'} = [];
1062 =head2 get_all_supporting_features
1065 Example : @evidence = @{$exon->get_all_supporting_features()};
1066 Description: Retrieves any supporting features added manually by
1067 calls to add_supporting_features. If no features have been
1068 added manually and
this exon is in a database (i.e. it has
1069 an adaptor), fetch from the database
1077 sub get_all_supporting_features {
1080 if( !exists $self->{_supporting_evidence} ) {
1081 if($self->adaptor) {
1082 my $sfa = $self->
adaptor->
db->get_SupportingFeatureAdaptor();
1083 $self->{_supporting_evidence} = $sfa->fetch_all_by_Exon($self);
1087 return $self->{_supporting_evidence} || [];
1091 =head2 find_supporting_evidence
1093 # This method is only for genebuild backwards compatibility.
1094 # Avoid using it if possible
1097 The list of features to search
for supporting (i.e. overlapping)
1099 Arg [2] : (optional)
boolean $sorted
1100 Used to speed up the calculation of overlapping features.
1101 Should be set to
true if the list of features is sorted in
1102 ascending order on their start coordinates.
1103 Example : $exon->find_supporting_evidence(\@features);
1104 Description: Looks through all the similarity features and
1105 stores as supporting features any feature
1106 that overlaps with an
exon.
1110 Status : Medium Risk
1114 sub find_supporting_evidence {
1115 my ($self,$features,$sorted) = @_;
1117 foreach my $f (@$features) {
1118 # return if we have a sorted feature array
1119 if ($sorted == 1 && $f->start > $self->end) {
1122 if ($f->sub_SeqFeature) {
1123 my @subf = $f->sub_SeqFeature;
1125 $self->find_supporting_evidence(\@subf);
1128 if ($f->entire_seq()->name eq $self->slice()->name) {
1129 if ($f->end >= $self->start && $f->start <= $self->end && $f->strand == $self->strand) {
1130 $self->add_supporting_features($f);
1140 Arg [1] :
string $stable_id
1142 Description: get/set
for attribute stable_id
1152 $self->{
'stable_id'} = shift
if( @_ );
1153 return $self->{
'stable_id'};
1159 Arg [1] :
string $created_date
1161 Description: get/set
for attribute created_date
1171 $self->{
'created_date'} = shift
if ( @_ );
1172 return $self->{
'created_date'};
1176 =head2 modified_date
1178 Arg [1] :
string $modified_date
1180 Description: get/set
for attribute modified_date
1190 $self->{
'modified_date'} = shift
if ( @_ );
1191 return $self->{
'modified_date'};
1197 Arg [1] :
string $version
1199 Description: get/set
for attribute version
1209 $self->{
'version'} = shift
if( @_ );
1210 return $self->{
'version'};
1213 =head2 stable_id_version
1215 Arg [1] : (optional) String - the stable ID with version to set
1216 Example : $exon->stable_id(
"ENSE0000000001.3");
1217 Description: Getter/setter
for stable
id with version
for this exon.
1225 sub stable_id_version {
1227 if(my $stable_id = shift) {
1228 # See if there's an embedded period, assume that's a
1229 # version, might not work for some species but you
1230 # should use ->stable_id() and version() if you're worried
1232 my $vindex = rindex($stable_id,
'.');
1233 # Set the stable_id and version pair depending on if
1234 # we found a version delimiter in the stable_id
1235 ($self->{stable_id}, $self->{version}) = ($vindex > 0 ?
1236 (substr($stable_id,0,$vindex), substr($stable_id,$vindex+1)) :
1239 return $self->{stable_id} . ($self->{version} ?
".$self->{version}" :
'');
1244 Arg [1] : Boolean $is_current
1245 Example : $exon->is_current(1)
1246 Description: Getter/setter
for is_current state of
this exon.
1255 my ( $self, $value ) = @_;
1257 if ( defined($value) ) {
1258 $self->{
'is_current'} = $value;
1260 return $self->{
'is_current'};
1263 =head2 is_constitutive
1265 Arg [1] : Boolean $is_constitutive
1266 Example : $exon->is_constitutive(0)
1267 Description: Getter/setter
for is_constitutive state of
this exon.
1275 sub is_constitutive {
1276 my ( $self, $value ) = @_;
1278 if ( defined($value) ) {
1279 $self->{
'is_constitutive'} = $value;
1281 return $self->{
'is_constitutive'};
1287 Example : $exon->is_coding()
1288 Description: Says
if the
exon is within the translation or not
1297 my ( $self, $transcript) = @_;
1299 if (!$transcript) {
throw(
"Transcript parameter is required for " . __PACKAGE__ .
"->is_coding()."); }
1301 if (!$transcript->translate) {
return 0; }
1303 # coding region overlaps start of exon
1304 if ($transcript->coding_region_start <= $self->start && $self->start <= $transcript->coding_region_end) {
return 1; }
1306 # coding region overlaps end of exon
1307 if ($transcript->coding_region_end >= $self->end && $self->end >= $transcript->coding_region_start) {
return 1; }
1309 # to handle cases where transcript coding region can fall within the exon start and exon end, eg: if it is one exon transcript
1310 if ($transcript->coding_region_start >= $self->start && $transcript->coding_region_end <= $self->end ) {
return 1; }
1315 =head2 adjust_start_end
1317 Arg 1 :
int $start_adjustment
1318 Arg 2 :
int $end_adjustment
1320 Description: returns a
new Exon with
this much shifted coordinates
1323 Caller : Transcript->get_all_translateable_Exons()
1328 sub adjust_start_end {
1329 my ( $self, $start_adjust, $end_adjust ) = @_;
1332 %{$new_exon} = %{$self};
1334 #invalidate the sequence cache
1335 delete $new_exon->{
'_seq_cache'};
1337 if( $self->strand() == 1 ) {
1338 $new_exon->
start( $self->start() + $start_adjust );
1339 $new_exon->end( $self->end() + $end_adjust )
1341 $new_exon->start( $self->start() - $end_adjust );
1342 $new_exon->end( $self->end() - $start_adjust )
1352 Example : my $pep_str = $exon->peptide($transcript)->
seq;
1353 Description: Retrieves the portion of the transcripts peptide
1355 because outside of the context of a
transcript it is not
1356 possible to correctly determine the translation. Note that
1357 an entire amino acid will be present at the
exon boundaries
1358 even
if only a partial codon is present. Therefore the
1359 concatenation of all of the peptides of a transcripts exons
1360 is not the same as a transcripts translation because the
1361 summation may contain duplicated amino acids at splice sites.
1362 In the
case that
this exon is entirely UTR, a Bio::Seq
object
1363 with an empty sequence
string is returned.
1364 Returntype : Bio::Seq
1365 Exceptions : thrown
if transcript argument is not provided
1375 unless($tr && ref($tr) && $tr->isa(
'Bio::EnsEMBL::Transcript')) {
1376 throw(
"transcript arg must be Bio::EnsEMBL:::Transcript not [$tr]");
1379 #convert exons coordinates to peptide coordinates
1380 my $tmp_exon = $self->transfer($tr->slice);
1382 throw(
"Couldn't transfer exon to transcript's slice");
1386 $tr->genomic2pep($tmp_exon->start, $tmp_exon->end, $tmp_exon->strand);
1389 @coords = grep {$_->isa(
'Bio::EnsEMBL::Mapper::Coordinate')} @coords;
1391 #if this is UTR then the peptide will be empty string
1395 if(scalar(@coords) > 1) {
1396 my $coord = $self->_merge_ajoining_coords(\@coords);
1401 my ($e_id, $tr_id) = ($self->stable_id(), $tr->stable_id());
1402 throw(
"Error. Exon maps to multiple locations in peptide and those".
1403 " locations are not continuous." .
1404 " Is this exon [$e_id] a member of this transcript [$tr_id]?");
1407 elsif(scalar(@coords) == 1) {
1409 my $pep = $tr->translate;
1411 #bioperl doesn't give back residues for incomplete codons
1412 #make sure we don't subseq too far...
1414 $end = ($c->end > $pep->length) ? $pep->length : $c->end;
1415 $start = ($c->start < $end) ? $c->start : $end;
1416 $pep_str = $tr->translate->subseq($start, $end);
1420 Bio::Seq->new( -seq => $pep_str,
1421 -moltype =>
'protein',
1422 -alphabet =>
'protein',
1423 -
id => $self->display_id );
1426 =head2 _merge_ajoining_coords
1430 Description : Merges coords which are ajoining or overlapping
1432 Exceptions : Exception
if the cooords cannot be condensed into one location
1434 Status : Development
1438 sub _merge_ajoining_coords {
1439 my ($self, $coords) = @_;
1442 my $coord = shift @{$coords};
1443 my $start = $coord->
start();
1444 my $last_end = $coord->end();
1445 foreach my $other_coord (@{$coords}) {
1446 if( ($last_end + 1) >= $other_coord->start() ) {
1447 $last_end = $other_coord->end();
1460 $coord->id(), $start, $last_end, $coord->strand(), $coord->rank());
1470 Example : my $seq_str = $exon->seq->seq;
1471 Description: Retrieves the dna sequence of
this Exon.
1472 Returned in a Bio::Seq
object. Note that the sequence may
1473 include UTRs (or even be entirely UTR).
1474 Returntype : Bio::Seq or undef
1475 Exceptions : warning
if argument passed,
1476 warning
if exon does not have attatched slice
1477 warning
if exon strand is not defined (or 0)
1484 my ( $self, $arg ) = @_;
1486 if ( defined $arg ) {
1487 warning(
"seq setting on Exon not supported currently");
1488 $self->{
'_seq_cache'} = $arg->seq();
1491 if ( !defined( $self->{
'_seq_cache'} ) ) {
1494 if ( !defined $self->slice() ) {
1495 warning(
"Cannot retrieve seq for exon without slice\n");
1499 if ( !$self->strand() ) {
1500 warning(
"Cannot retrieve seq for unstranded exon\n");
1504 if ($self->slice->is_circular() ) {
1505 if ( $self->slice->start > $self->slice->end) {
1506 # Normally exons overlapping chromosome origin will have negative feature start, but slice will be from 1 .. length
1507 # But in case you got an exon attached to a sub slice try this
1508 my $mid_point = $self->slice()->seq_region_length() - $self->slice()->start() + 1;
1509 my $seq1 = $self->slice()->subseq( $self->start(), $mid_point, $self->strand() );
1511 my $seq2 = $self->slice()->subseq( $mid_point + 1, $self->end(), $self->strand() );
1513 $seq = $self->strand() > 0 ?
"$seq1$seq2" :
"$seq2$seq1";
1514 } elsif ( $self->start < 0 || $self->start > $self->end) {
1515 # Normally exons overlapping chromosome origin will be 0 based, and can have negative start
1516 # But if you go via sub_Slice it gives you chromosome based coordinates, i.e it will have start greater then end
1517 my $start_point = $self->slice->seq_region_length + $self->slice->start;
1518 my $mid_point = $self->slice->seq_region_length;
1519 my $seq1 = $self->slice->subseq( $self->start, $mid_point, $self->strand);
1520 my $seq2 = $self->slice->subseq(1, $self->end, $self->strand );
1521 $seq = $self->strand > 0 ?
"$seq1$seq2" :
"$seq2$seq1";
1523 # End this is the case for genes not overlapping the origin
1524 $seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() );
1527 $seq = $self->slice()->subseq( $self->start(), $self->end(), $self->strand() );
1530 $self->{
'_seq_cache'} = $seq;
1531 } ## end
if ( !defined( $self->...))
1534 Bio::Seq->new( -seq => $self->{
'_seq_cache'},
1535 -
id => $self->display_id,
1537 -alphabet =>
'dna' );
1544 Example :
if(exists $hash{$exon->hashkey}) { do_something(); }
1545 Description: Returns a unique hashkey that can be used to uniquely identify
1546 this exon. Exons are considered to be identical
if they share
1547 the same seq_region, start, end, strand, phase, end_phase.
1548 Note that
this will consider two exons on different slices
1549 to be different, even
if they actually are not.
1550 Returntype :
string formatted as slice_name-start-end-strand-phase-end_phase
1551 Exceptions : thrown
if not all the necessary attributes needed to generate
1552 a unique hash value are set
1562 my $slice = $self->{
'slice'};
1563 my $slice_name = ($slice) ? $slice->name() : undef;
1564 my $start = $self->{
'start'};
1565 my $end = $self->{
'end'};
1566 my $strand = $self->{
'strand'};
1567 my $phase = $self->{
'phase'};
1568 my $end_phase = $self->{
'end_phase'};
1570 if(!defined($slice_name)) {
1571 throw(
'Slice must be set to generate correct hashkey.');
1574 if(!defined($start)) {
1575 warning(
"start attribute must be defined to generate correct hashkey.");
1578 if(!defined($end)) {
1579 throw(
"end attribute must be defined to generate correct hashkey.");
1582 if(!defined($strand)) {
1583 throw(
"strand attribute must be defined to generate correct hashkey.");
1586 if(!defined($phase)) {
1587 throw(
"phase attribute must be defined to generate correct hashkey.");
1590 if(!defined($end_phase)) {
1591 throw(
"end_phase attribute must be defined to generate correct hashkey.");
1594 return "$slice_name-$start-$end-$strand-$phase-$end_phase";
1601 Example : print $exons->display_id();
1602 Description: This method returns a
string that is considered to be
1603 the
'display' identifier. For exons
this is (depending on
1604 availability and in
this order) the stable Id, the dbID or an
1608 Caller : web drawing code
1615 return $self->{
'stable_id'} || $self->dbID ||
'';
1622 Example : $exon->load();
1623 Description : The Ensembl API makes extensive use of
1624 lazy-loading. Under some circumstances (e.g.,
1625 when copying genes between databases), all data of
1626 an
object needs to be fully loaded. This method
1627 loads the parts of the
object that are usually
1638 $self->get_all_supporting_features();
1642 =head2 summary_as_hash
1644 Example : $exon_summary = $exon->summary_as_hash();
1646 Retrieves a summary of
this Exon.
1647 Returns : hashref of descriptive strings
1648 Status : Intended
for internal use
1651 sub summary_as_hash {
1653 my $summary_ref = $self->SUPER::summary_as_hash;
1654 $summary_ref->{
'constitutive'} = $self->is_constitutive;
1655 $summary_ref->{
'ensembl_phase'} = $self->phase;
1656 $summary_ref->{
'ensembl_end_phase'} = $self->end_phase;
1657 $summary_ref->{
'exon_id'} = $summary_ref->{
'id'};
1658 return $summary_ref;