ensembl-hive  2.8.1
Transcript.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::Transcript - object representing an Ensembl transcript
34 
35 =head1 SYNOPSIS
36 
37 Creation:
38 
39  my $tran = new Bio::EnsEMBL::Transcript();
40  my $tran = new Bio::EnsEMBL::Transcript( -EXONS => \@exons );
41 
42 Manipulation:
43 
44  # Returns an array of Exon objects
45  my @exons = @{ $tran->get_all_Exons() };
46 
47  # Returns the peptide translation of the exons as a Bio::Seq
48  if ( $tran->translation() ) {
49  my $pep = $tran->translate();
50  } else {
51  print "Transcript ", $tran->stable_id(), " is non-coding\n";
52  }
53 
54 =head1 DESCRIPTION
55 
56 A representation of a transcript within the Ensembl system. A transcript
57 consists of a set of Exons and (possibly) a Translation which defines the
58 coding and non-coding regions of the exons.
59 
60 =cut
61 
62 package Bio::EnsEMBL::Transcript;
63 
64 use strict;
65 
75 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
76 use Bio::EnsEMBL::Utils::Exception qw(warning throw );
77 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
78 
79 use parent qw(Bio::EnsEMBL::Feature);
80 
81 use constant SEQUENCE_ONTOLOGY => {
82  acc => 'SO:0000673',
83  term => 'transcript',
84 };
85 
86 =head2 new
87 
88  Arg [-EXONS] :
89  reference to list of Bio::EnsEMBL::Exon objects - exons which make up
90  this transcript
91  Arg [-STABLE_ID] :
92  string - the stable identifier of this transcript
93  Arg [-VERSION] :
94  int - the version of the stable identifier of this transcript
95  Arg [-EXTERNAL_NAME] :
96  string - the external database name associated with this transcript
97  Arg [-EXTERNAL_DB] :
98  string - the name of the database the external name is from
99  Arg [-EXTERNAL_STATUS]:
100  string - the status of the external identifier
101  Arg [-DISPLAY_XREF]:
102  Bio::EnsEMBL::DBEntry - The external database entry that is used
103  to label this transcript when it is displayed.
104  Arg [-CREATED_DATE]:
105  string - the date the transcript was created
106  Arg [-MODIFIED_DATE]:
107  string - the date the transcript was last modified
108  Arg [-DESCRIPTION]:
109  string - the transcripts description
110  Arg [-BIOTYPE]:
111  string - the biotype e.g. "protein_coding"
112  Arg [-IS_CURRENT]:
113  Boolean - specifies if this is the current version of the transcript
114  Arg [-SOURCE]:
115  string - the transcript source, e.g. "ensembl"
116 
117  Example : $tran = new Bio::EnsEMBL::Transcript(-EXONS => \@exons);
118  Description: Constructor. Instantiates a Transcript object.
119  Returntype : Bio::EnsEMBL::Transcript
120  Exceptions : throw on bad arguments
121  Caller : general
122  Status : Stable
123 
124 =cut
125 
126 sub new {
127  my $proto = shift;
128 
129  my $class = ref($proto) || $proto;
130 
131  my $self = $class->SUPER::new(@_);
132 
133  my (
134  $exons, $stable_id, $version,
135  $external_name, $external_db, $external_status,
136  $display_xref, $created_date, $modified_date,
137  $description, $biotype, $confidence,
138  $external_db_name, $is_current,
139  $source
140  );
141 
142  (
143  $exons, $stable_id, $version,
144  $external_name, $external_db, $external_status,
145  $display_xref, $created_date, $modified_date,
146  $description, $biotype, $confidence,
147  $external_db_name, $is_current,
148  $source
149  )
150  = rearrange( [
151  'EXONS', 'STABLE_ID',
152  'VERSION', 'EXTERNAL_NAME',
153  'EXTERNAL_DB', 'EXTERNAL_STATUS',
154  'DISPLAY_XREF', 'CREATED_DATE',
155  'MODIFIED_DATE', 'DESCRIPTION',
156  'BIOTYPE', 'CONFIDENCE',
157  'EXTERNAL_DB_NAME',
158  'IS_CURRENT', 'SOURCE'
159  ],
160  @_
161  );
162 
163  if ($exons) {
164  $self->{'_trans_exon_array'} = $exons;
165  $self->recalculate_coordinates();
166  }
167 
168  $self->stable_id($stable_id);
169  $self->{'created_date'} = $created_date;
170  $self->{'modified_date'} = $modified_date;
171  $self->external_name($external_name) if ( defined $external_name );
172  $self->external_db($external_db) if ( defined $external_db );
173  $self->external_status($external_status)
174  if ( defined $external_status );
175  $self->display_xref($display_xref) if ( defined $display_xref );
176  $self->edits_enabled(1);
177 
178  $self->description($description);
179 
180  $self->{'biotype'} = $biotype;
181 
182  $self->source($source);
183 
184  # Default version
185  if ( !defined($version) ) { $version = 1 }
186  $self->{'version'} = $version;
187 
188  # default is_current
189  $is_current = 1 unless ( defined($is_current) );
190  $self->{'is_current'} = $is_current;
191 
192  return $self;
193 } ## end sub new
194 
195 =head2 get_all_DBLinks
196 
197  Arg [1] : String database name (optional)
198  SQL wildcard characters (_ and %) can be used to
199  specify patterns.
200 
201  Arg [2] : (optional) String, external database type, can be one of
202  ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
203  SQL wildcard characters (_ and %) can be used to
204  specify patterns.
205 
206  Example : my @dblinks = @{ $transcript->get_all_DBLinks() };
207  @dblinks = @{ $transcript->get_all_DBLinks('Uniprot%') };}
208  @dblinks = @{ $transcript->get_all_DBLinks('%', 'ENSEMBL') };
209 
210  Description: Retrieves *all* related DBEntries for this
211  transcript. This includes all DBEntries that are
212  associated with the corresponding translation.
213 
214  If you only want to retrieve the DBEntries associated
215  with the transcript (and not the translation) then
216  you should use the get_all_DBEntries() call instead.
217 
218  Note: Each entry may be listed more than once. No
219  uniqueness checks are done. Also if you put in an
220  incorrect external database name no checks are done
221  to see if this exists, you will just get an empty
222  list.
223 
224  Return type: Listref of Bio::EnsEMBL::DBEntry objects
225  Exceptions : none
226  Caller : general
227  Status : Stable
228 
229 =cut
230 
231 sub get_all_DBLinks {
232  my ( $self, $db_name_exp, $ex_db_type ) = @_;
233 
234  my @links =
235  @{ $self->get_all_DBEntries( $db_name_exp, $ex_db_type ) };
236 
237  # Add all of the transcript and translation xrefs to the return list.
238  my $translation = $self->translation();
239  if ( defined($translation) ) {
240  push( @links,
241  @{$translation->get_all_DBEntries( $db_name_exp, $ex_db_type ) }
242  );
243  }
244 
245  @links = sort { _compare_xrefs() } @links;
246 
247  return \@links;
248 }
249 
250 =head2 get_all_xrefs
251 
252  Arg [1] : String database name (optional)
253  SQL wildcard characters (_ and %) can be used to
254  specify patterns.
255 
256  Example : @xrefs = @{ $transcript->get_all_xrefs() };
257  @xrefs = @{ $transcript->get_all_xrefs('Uniprot%') };
258 
259  Description: Retrieves *all* related xrefs for this transcript.
260  This includes all xrefs that are associated with the
261  corresponding translation of this transcript.
262 
263  If you want to retrieve the xrefs associated with
264  only the transcript (and not the translation) then
265  you should use the get_all_object_xrefs() method
266  instead.
267 
268  Note: Each entry may be listed more than once. No
269  uniqueness checks are done. Also if you put in an
270  incorrect external database name no checks are done
271  to see if this exists, you will just get an empty
272  list.
273 
274  NB: This method is an alias for the
275  get_all_DBLinks() method.
276 
277  Return type: Listref of Bio::EnsEMBL::DBEntry objects
278 
279  Status : Stable
280 
281 =cut
282 
283 sub get_all_xrefs {
284  my $self = shift;
285  return $self->get_all_DBLinks(@_);
286 }
287 
288 =head2 get_all_DBEntries
289 
290  Arg [1] : (optional) String, external database name,
291  SQL wildcard characters (_ and %) can be used to
292  specify patterns.
293 
294  Arg [2] : (optional) String, external database type, can be one of
295  ('ARRAY','ALT_TRANS','ALT_GENE','MISC','LIT','PRIMARY_DB_SYNONYM','ENSEMBL'),
296  SQL wildcard characters (_ and %) can be used to
297  specify patterns.
298 
299  Example : my @dbentries = @{ $transcript->get_all_DBEntries() };
300  @dbentries = @{ $transcript->get_all_DBEntries('Uniprot%') };}
301  @dbentries = @{ $transcript->get_all_DBEntries('%', 'ENSEMBL') };}
302 
303  Description: Retrieves DBEntries (xrefs) for this transcript.
304  This does *not* include the corresponding
305  translations DBEntries (see get_all_DBLinks()).
306 
307  This method will attempt to lazy-load DBEntries
308  from a database if an adaptor is available and no
309  DBEntries are present on the transcript (i.e. they
310  have not already been added or loaded).
311 
312  Returntype : Listref of Bio::EnsEMBL::DBEntry objects
313  Exceptions : none
314  Caller : get_all_DBLinks, TranscriptAdaptor::store
315  Status : Stable
316 
317 =cut
318 
319 sub get_all_DBEntries {
320  my ( $self, $ex_db_exp, $ex_db_type ) = @_;
321 
322  my $cache_name = 'dbentries';
323 
324  if ( defined($ex_db_exp) ) {
325  $cache_name .= $ex_db_exp;
326  }
327 
328  if ( defined($ex_db_type) ) {
329  $cache_name .= $ex_db_type;
330  }
331 
332  # if not cached, retrieve all of the xrefs for this transcript
333  if ( !defined( $self->{$cache_name} ) && defined( $self->adaptor() ) )
334  {
335  $self->{$cache_name} =
336  $self->adaptor()->db()->get_DBEntryAdaptor()
337  ->fetch_all_by_Transcript( $self, $ex_db_exp, $ex_db_type );
338  }
339 
340  $self->{$cache_name} ||= [];
341 
342  return $self->{$cache_name};
343 } ## end sub get_all_DBEntries
344 
345 =head2 get_all_object_xrefs
346 
347  Arg [1] : (optional) String, external database name
348 
349  Arg [2] : (optional) String, external_db type
350 
351  Example : @oxrefs = @{ $transcript->get_all_object_xrefs() };
352 
353  Description: Retrieves xrefs for this transcript. This does
354  *not* include xrefs that are associated with the
355  corresponding translations of this transcript (see
356  get_all_xrefs()).
357 
358  This method will attempt to lazy-load xrefs from a
359  database if an adaptor is available and no xrefs are
360  present on the transcript (i.e. they have not already
361  been added or loaded).
362 
363  NB: This method is an alias for the
364  get_all_DBentries() method.
365 
366  Return type: Listref of Bio::EnsEMBL::DBEntry objects
367 
368  Status : Stable
369 
370 =cut
371 
372 sub get_all_object_xrefs {
373  my $self = shift;
374  return $self->get_all_DBEntries(@_);
375 }
376 
377 =head2 add_DBEntry
378 
379  Arg [1] : Bio::EnsEMBL::DBEntry $dbe
380  The dbEntry to be added
381  Example : my $dbe = Bio::EnsEMBL::DBEntery->new(...);
382  $transcript->add_DBEntry($dbe);
383  Description: Associates a DBEntry with this transcript. Note that adding
384  DBEntries will prevent future lazy-loading of DBEntries for this
385  gene (see get_all_DBEntries).
386  Returntype : none
387  Exceptions : thrown on incorrect argument type
388  Caller : general
389  Status : Stable
390 
391 =cut
392 
393 sub add_DBEntry {
394  my $self = shift;
395  my $dbe = shift;
396 
397  unless($dbe && ref($dbe) && $dbe->isa('Bio::EnsEMBL::DBEntry')) {
398  throw('Expected DBEntry argument');
399  }
400 
401  $self->{'dbentries'} ||= [];
402  push @{$self->{'dbentries'}}, $dbe;
403 }
404 
405 
406 =head2 get_all_supporting_features
407 
408  Example : my @evidence = @{ $transcript->get_all_supporting_features };
409  Description: Retrieves any supporting features added manually by
410  calls to add_supporting_features.
411  Returntype : Listref of Bio::EnsEMBL::FeaturePair objects
412  Exceptions : none
413  Caller : general
414  Status : Stable
415 
416 =cut
417 
418 sub get_all_supporting_features {
419  my $self = shift;
420 
421  if( !exists $self->{_supporting_evidence} ) {
422  if($self->adaptor) {
423  my $tsfa = $self->adaptor->db->get_TranscriptSupportingFeatureAdaptor();
424  $self->{_supporting_evidence} = $tsfa->fetch_all_by_Transcript($self);
425  }
426  }
427 
428  return $self->{_supporting_evidence} || [];
429 }
430 
431 
432 =head2 add_supporting_features
433 
434  Arg [1-N] : Bio::EnsEMBL::FeaturePair $feature
435  The supporting features to add
436  Example : $transcript->add_supporting_features(@features);
437  Description: Adds a list of supporting features to this Transcript.
438  The added features can be retieved by
439  get_all_supporting_features().
440  Returntype : none
441  Exceptions : throw if any of the features are not FeaturePairs
442  throw if any of the features are not in the same coordinate
443  system as the Transcript
444  Caller : general
445  Status : Stable
446 
447 =cut
448 
449 sub add_supporting_features {
450  my ($self, @features) = @_;
451 
452  return unless @features;
453 
454  $self->{_supporting_evidence} ||= [];
455 
456  # check whether this feature object has been added already
457  FEATURE: foreach my $feature (@features) {
458 
459  if (!defined($feature) || ref($feature) eq "ARRAY") {
460  throw("Element in transcript supporting features array is undefined or is an ARRAY for " . $self->dbID);
461  }
462  if (!$feature || !$feature->isa("Bio::EnsEMBL::FeaturePair")) {
463  print "feature = " . $feature . "\n";
464  throw("Supporting feat [$feature] not a " .
465  "Bio::EnsEMBL::FeaturePair");
466  }
467 
468  if ((defined $self->slice() && defined $feature->slice())&&
469  ( $self->slice()->name() ne $feature->slice()->name())){
470  throw("Supporting feat not in same coord system as transcript\n" .
471  "transcript is attached to [".$self->slice()->name()."]\n" .
472  "feat is attached to [".$feature->slice()->name()."]");
473  }
474 
475  foreach my $added_feature ( @{ $self->{_supporting_evidence} } ){
476  # compare objects
477  if ( $feature == $added_feature ){
478  #this feature has already been added
479  next FEATURE;
480  }
481  }
482 
483  #no duplicate was found, add the feature
484  push(@{$self->{_supporting_evidence}}, $feature);
485  }
486 }
487 
488 
489 =head2 flush_supporting_features
490 
491  Example : $transcript->flush_supporting_features;
492  Description : Removes all supporting evidence from the transcript.
493  Return type : (Empty) listref
494  Exceptions : none
495  Caller : general
496  Status : Stable
497 
498 =cut
499 
500 sub flush_supporting_features {
501  my $self = shift;
502  $self->{'_supporting_evidence'} = [];
503 }
504 
505 
506 =head2 external_db
507 
508  Arg [1] : (optional) String - name of external db to set
509  Example : $transcript->external_db('HGNC');
510  Description: Getter/setter for attribute external_db. The db is the one that
511  belongs to the external_name.
512  Returntype : String
513  Exceptions : none
514  Caller : general
515  Status : Stable
516 
517 =cut
518 
519 sub external_db {
520  my ( $self, $ext_dbname ) = @_;
521 
522  if(defined $ext_dbname) {
523  return ( $self->{'external_db'} = $ext_dbname );
524  }
525 
526  if( exists $self->{'external_db'} ) {
527  return $self->{'external_db'};
528  }
529 
530  my $display_xref = $self->display_xref();
531 
532  if( defined $display_xref ) {
533  return $display_xref->dbname()
534  } else {
535  return undef;
536  }
537 }
538 
539 
540 =head2 external_status
541 
542  Arg [1] : (optional) String - status of the external db
543  Example : $transcript->external_status('KNOWNXREF');
544  Description: Getter/setter for attribute external_status. The status of
545  the external db of the one that belongs to the external_name.
546  Returntype : String
547  Exceptions : none
548  Caller : general
549  Status : Stable
550 
551 =cut
552 
553 sub external_status {
554  my ( $self, $ext_status ) = @_;
555 
556  if(defined $ext_status) {
557  return ( $self->{'external_status'} = $ext_status );
558  }
559 
560  if( exists $self->{'external_status'} ) {
561  return $self->{'external_status'};
562  }
563 
564  my $display_xref = $self->display_xref();
565 
566  if( defined $display_xref ) {
567  return $display_xref->status()
568  } else {
569  return undef;
570  }
571 }
572 
573 
574 =head2 external_name
575 
576  Arg [1] : (optional) String - the external name to set
577  Example : $transcript->external_name('BRCA2-001');
578  Description: Getter/setter for attribute external_name.
579  Returntype : String or undef
580  Exceptions : none
581  Caller : general
582  Status : Stable
583 
584 =cut
585 
586 sub external_name {
587  my ($self, $ext_name) = @_;
588 
589  if(defined $ext_name) {
590  return ( $self->{'external_name'} = $ext_name );
591  }
592 
593  if( exists $self->{'external_name'} ) {
594  return $self->{'external_name'};
595  }
596 
597  my $display_xref = $self->display_xref();
598 
599  if( defined $display_xref ) {
600  return $display_xref->display_id()
601  } else {
602  return undef;
603  }
604 }
605 
606 =head2 source
607 
608  Arg [1] : (optional) String - the source to set
609  Example : $transcript->source('ensembl');
610  Description: Getter/setter for attribute source
611  Returntype : String
612  Exceptions : none
613  Caller : general
614  Status : Stable
615 
616 =cut
617 
618 sub source {
619  my $self = shift;
620  $self->{'source'} = shift if( @_ );
621  return ( $self->{'source'} || "ensembl" );
622 }
623 
624 =head2 display_xref
625 
626  Arg [1] : (optional) Bio::EnsEMBL::DBEntry - the display xref to set
627  Example : $transcript->display_xref($db_entry);
628  Description: Getter/setter for display_xref for this transcript.
629  Returntype : Bio::EnsEMBL::DBEntry
630  Exceptions : none
631  Caller : general
632  Status : Stable
633 
634 =cut
635 
636 sub display_xref {
637  my $self = shift;
638  $self->{'display_xref'} = shift if(@_);
639  return $self->{'display_xref'};
640 }
641 
642 =head2 is_canonical
643 
644  Args [1] : (optional) Boolean is_canonical
645 
646  Example : if ($transcript->is_canonical()) { ... }
647 
648  Description : Returns true (non-zero) if the transcript is the
649  canonical transcript of its gene, false (0) if not. If the code
650  returns an undefined it is because its state is not currently
651  known. Internally the code will consult the database for this
652  value if it is unknown and the transcript has a dbID and an
653  attached adaptor
654 
655  Return type : Boolean
656 
657  Status : Stable
658 
659 =cut
660 
661 sub is_canonical {
662  my ( $self, $value ) = @_;
663 
664  #Shortcut call
665  return $self->{is_canonical} if defined $self->{is_canonical};
666 
667  if ( defined($value) ) {
668  $self->{is_canonical} = ( $value ? 1 : 0 );
669  }
670  else {
671  if(! defined $self->{is_canonical} && $self->dbID() && $self->adaptor()) {
672  $self->{is_canonical} = $self->adaptor()->is_Transcript_canonical($self);
673  }
674  }
675 
676  return $self->{is_canonical};
677 }
678 
679 =head2 translation
680 
681  Args : None
682  Example : if ( $transcript->translation() ) {
683  print( $transcript->translation()->stable_id(), "\n" );
684  } else {
685  print("Pseudogene\n");
686  }
687  Description: Getter/setter for the Translation object which
688  defines the CDS (and as a result the peptide encoded
689  by) this transcript. This function will return
690  undef if this transcript is a pseudogene, i.e. a
691  non-translating transcript such as an ncRNA. This
692  is the accepted method of determining whether a
693  transcript is a pseudogene or not.
694  Returntype : Bio::EnsEMBL::Translation
695  Exceptions : none
696  Caller : general
697  Status : Stable
698 
699 =cut
700 
701 sub translation {
702  my ( $self, $translation ) = @_;
703 
704  if ( defined($translation) ) {
705  assert_ref( $translation, 'Bio::EnsEMBL::Translation' );
706 
707  $self->{'translation'} = $translation;
708  $translation->transcript($self);
709 
710  $self->{'cdna_coding_start'} = undef;
711  $self->{'cdna_coding_end'} = undef;
712 
713  $self->{'coding_region_start'} = undef;
714  $self->{'coding_region_end'} = undef;
715 
716  $self->{'transcript_mapper'} = undef;
717 
718  } elsif ( @_ > 1 ) {
719  if ( defined( $self->{'translation'} ) ) {
720  # Removing existing translation
721 
722  $self->{'translation'}->transcript(undef);
723  delete( $self->{'translation'} );
724 
725  $self->{'cdna_coding_start'} = undef;
726  $self->{'cdna_coding_end'} = undef;
727 
728  $self->{'coding_region_start'} = undef;
729  $self->{'coding_region_end'} = undef;
730 
731  $self->{'transcript_mapper'} = undef;
732  }
733  } elsif ( !exists( $self->{'translation'} )
734  && defined( $self->adaptor() ) )
735  {
736  $self->{'translation'} =
737  $self->adaptor()->db()->get_TranslationAdaptor()
738  ->fetch_by_Transcript($self);
739  }
740 
741  return $self->{'translation'};
742 } ## end sub translation
743 
744 =head2 get_all_alternative_translations
745 
746  Args : None
747  Example :
748 
749  my @alt_translations =
750  @{ $transcript->get_all_alternative_translations() };
751 
752  Description: Fetches all alternative translations defined for this
753  transcript. The canonical translation is not returned.
754 
755  Returntype : Arrayref to Bio::EnsEMBL::Translation
756  Exceptions : None
757  Caller : General
758  Status : Stable
759 
760 =cut
761 
762 sub get_all_alternative_translations {
763  my ($self) = @_;
764 
765  if ( !exists( $self->{'alternative_translations'} )
766  && defined( $self->adaptor() ) )
767  {
768  my $pa = $self->adaptor()->db()->get_TranslationAdaptor();
769  my @translations =
770  @{ $pa->fetch_all_alternative_by_Transcript($self) };
771 
772  $self->{'alternative_translations'} = \@translations;
773  }
774 
775  return $self->{'alternative_translations'};
776 }
777 
778 =head2 add_alternative_translation
779 
780  Args : Bio::EnsEMBL::Translation $translation
781  Example :
782 
783  $transcript->add_alternative_translation($translation);
784 
785  Description: Adds an alternative translation to this transcript.
786  Returntype : None
787  Exceptions : None
788  Caller : General
789  Status : Stable
790 
791 =cut
792 
793 sub add_alternative_translation {
794  my ( $self, $translation ) = @_;
795 
796  if ( !( defined($translation)
797  && ref($translation)
798  && $translation->isa('Bio::EnsEMBL::Translation') ) )
799  {
800  throw("Bio::EnsEMBL::Translation argument expected.");
801  }
802 
803  # Load the existsing alternative translations from the database if
804  # they haven't already been loaded.
805  $self->get_all_alternative_translations();
806 
807  push( @{ $self->{'alternative_translations'} }, $translation );
808 }
809 
810 =head2 spliced_seq
811 
812  Args : soft_mask (opt)
813  if specified, will return a sequence where UTR regions are lowercased
814  Description: Retrieves all Exon sequences and concats them together.
815  No phase padding magic is done, even if phases do not align.
816  Returntype : Text
817  Exceptions : none
818  Caller : general
819  Status : Stable
820 
821 =cut
822 
823 sub spliced_seq {
824  my ( $self, $soft_mask ) = @_;
825 
826  my $seq_string = "";
827  for my $ex ( @{$self->get_all_Exons()} ) {
828  my $seq = $ex->seq();
829  if(!$seq) {
830  warning("Could not obtain seq for exon. Transcript sequence may not " .
831  "be correct.");
832  $seq_string .= 'N' x $ex->length();
833  } else {
834  my $exon_seq = $seq->seq();
835  if ($soft_mask) {
836  my $padstr;
837  if (!defined ($ex->coding_region_start($self))) {
838  $exon_seq = lc($exon_seq);
839  }
840 
841  if ($ex->coding_region_start($self) > $ex->start()) {
842  my $forward_length = $ex->coding_region_start($self) - $ex->start();
843  my $reverse_length = $ex->end() - $ex->coding_region_start($self);
844  if ($ex->strand == 1) {
845  $exon_seq = lc (substr($exon_seq, 0, $forward_length)) . substr($exon_seq, $forward_length);
846  } else {
847  $exon_seq = substr($exon_seq, 0, $reverse_length+1) . lc(substr($exon_seq, $reverse_length+1));
848  }
849  }
850 
851  if ($ex->coding_region_end($self) < $ex->end()) {
852  my $forward_length = $ex->coding_region_end($self) - $ex->start();
853  my $reverse_length = $ex->end() - $ex->coding_region_end($self);
854  if ($ex->strand == 1) {
855  $exon_seq = substr($exon_seq, 0, $forward_length+1) . lc(substr($exon_seq, $forward_length+1));
856  } else {
857  $exon_seq = lc(substr($exon_seq, 0, $reverse_length)) . substr($exon_seq, $reverse_length);
858  }
859  }
860  }
861  $seq_string .= $exon_seq;
862  }
863  }
864 
865  # apply post transcriptional edits
866  if($self->edits_enabled()) {
867  my @seqeds = @{$self->get_all_SeqEdits()};
868 
869  # sort edits in reverse order to remove complication of
870  # adjusting downstream edits
871  @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
872 
873  foreach my $se (@seqeds) {
874  $se->apply_edit(\$seq_string);
875  }
876  }
877 
878  return $seq_string;
879 }
880 
881 
882 =head2 translateable_seq
883 
884  Args : none
885  Example : print $transcript->translateable_seq(), "\n";
886  Description: Returns a sequence string which is the the translateable part
887  of the transcripts sequence. This is formed by splicing all
888  Exon sequences together and apply all defined RNA edits.
889  Then the coding part of the sequence is extracted and returned.
890  The code will not support monkey exons any more. If you want to
891  have non phase matching exons, defined appropriate _rna_edit
892  attributes!
893 
894  An empty string is returned if this transcript is a pseudogene
895  (i.e. is non-translateable).
896  Returntype : Text
897  Exceptions : none
898  Caller : general
899  Status : Stable
900 
901 =cut
902 
903 sub translateable_seq {
904  my ( $self ) = @_;
905 
906  if ( !$self->translation() ) {
907  return '';
908  }
909 
910  my $mrna = $self->spliced_seq();
911 
912  my $start = $self->cdna_coding_start();
913  my $end = $self->cdna_coding_end();
914 
915  $mrna = substr( $mrna, $start - 1, $end - $start + 1 );
916 
917  my $start_phase = $self->translation->start_Exon->phase();
918  if( $start_phase > 0 ) {
919  $mrna = "N"x$start_phase . $mrna;
920  }
921  if( ! $start || ! $end ) {
922  return "";
923  }
924 
925  return $mrna;
926 }
927 
928 
929 =head2 cdna_coding_start
930 
931  Arg [1] : (optional) $value
932  Example : $relative_coding_start = $transcript->cdna_coding_start;
933  Description: Retrieves the position of the coding start of this transcript
934  in cdna coordinates (relative to the start of the 5prime end of
935  the transcript, excluding introns, including utrs).
936 
937  This will return undef if this is a pseudogene (i.e. a
938  transcript with no translation).
939  Returntype : int
940  Exceptions : none
941  Caller : five_prime_utr, get_all_snps, general
942  Status : Stable
943 
944 =cut
945 
946 sub cdna_coding_start {
947  my $self = shift;
948 
949  if( @_ ) {
950  $self->{'cdna_coding_start'} = shift;
951  }
952 
953  if(!defined $self->{'cdna_coding_start'} && defined $self->translation){
954  # calc coding start relative from the start of translation (in cdna coords)
955  my $start = 0;
956 
957  my @exons = @{$self->get_all_Exons};
958  my $exon;
959 
960  while($exon = shift @exons) {
961  if($exon == $self->translation->start_Exon) {
962  #add the utr portion of the start exon
963  $start += $self->translation->start;
964  last;
965  } else {
966  #add the entire length of this non-coding exon
967  $start += $exon->length;
968  }
969  }
970 
971  # adjust cdna coords if sequence edits are enabled
972  if($self->edits_enabled()) {
973  my @seqeds = @{$self->get_all_SeqEdits()};
974  if (scalar @seqeds) {
975  my $transl_start = $self->get_all_Attributes('_transl_start');
976  if (@{$transl_start}) {
977  $start = $transl_start->[0]->value;
978  } else {
979  # sort in reverse order to avoid adjustment of downstream edits
980  @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
981 
982  foreach my $se (@seqeds) {
983  # use less than start so that start of CDS can be extended
984  if($se->start() < $start) {
985  $start += $se->length_diff();
986  }
987  }
988  }
989  }
990  }
991 
992  $self->{'cdna_coding_start'} = $start;
993  }
994 
995  return $self->{'cdna_coding_start'};
996 }
997 
998 
999 =head2 cdna_coding_end
1000 
1001  Arg [1] : (optional) $value
1002  Example : $cdna_coding_end = $transcript->cdna_coding_end;
1003  Description: Retrieves the end of the coding region of this transcript in
1004  cdna coordinates (relative to the five prime end of the
1005  transcript, excluding introns, including utrs).
1006 
1007  This will return undef if this transcript is a pseudogene
1008  (i.e. a transcript with no translation and therefor no CDS).
1009  Returntype : int
1010  Exceptions : none
1011  Caller : general
1012  Status : Stable
1013 
1014 =cut
1015 
1016 sub cdna_coding_end {
1017  my $self = shift;
1018 
1019  if( @_ ) {
1020  $self->{'cdna_coding_end'} = shift;
1021  }
1022 
1023  if(!defined $self->{'cdna_coding_end'} && defined $self->translation) {
1024  my @exons = @{$self->get_all_Exons};
1025 
1026  my $end = 0;
1027  while(my $exon = shift @exons) {
1028  if($exon == $self->translation->end_Exon) {
1029  # add coding portion of the final coding exon
1030  $end += $self->translation->end;
1031  last;
1032  } else {
1033  # add entire exon
1034  $end += $exon->length;
1035  }
1036  }
1037 
1038  # adjust cdna coords if sequence edits are enabled
1039  if($self->edits_enabled()) {
1040  my @seqeds = @{$self->get_all_SeqEdits()};
1041  if (scalar @seqeds) {
1042  my $transl_end = $self->get_all_Attributes('_transl_end');
1043  if (@{$transl_end}) {
1044  $end = $transl_end->[0]->value;
1045  } else {
1046  # sort in reverse order to avoid adjustment of downstream edits
1047  @seqeds = sort {$b->start() <=> $a->start()} @seqeds;
1048 
1049  foreach my $se (@seqeds) {
1050  # use less than or equal to end+1 so end of the CDS can be extended
1051  if($se->start() <= $end + 1) {
1052  $end += $se->length_diff();
1053  }
1054  }
1055  }
1056  }
1057  }
1058 
1059  $self->{'cdna_coding_end'} = $end;
1060  }
1061 
1062  return $self->{'cdna_coding_end'};
1063 }
1064 
1065 
1066 =head2 coding_region_start
1067 
1068  Arg [1] : (optional) $value
1069  Example : $coding_region_start = $transcript->coding_region_start
1070  Description: Retrieves the start of the coding region of this transcript
1071  in genomic coordinates (i.e. in either slice or contig coords).
1072  By convention, the coding_region_start is always lower than
1073  the value returned by the coding_end method.
1074  The value returned by this function is NOT the biological
1075  coding start since on the reverse strand the biological coding
1076  start would be the higher genomic value.
1077 
1078  This function will return undef if this is a pseudogene
1079  (a non-translated transcript).
1080  Returntype : int
1081  Exceptions : none
1082  Caller : general
1083  Status : Stable
1084 
1085 =cut
1086 
1087 sub coding_region_start {
1088  my ($self, $value) = @_;
1089 
1090  if( defined $value ) {
1091  $self->{'coding_region_start'} = $value;
1092  } elsif(!defined $self->{'coding_region_start'} &&
1093  defined $self->translation) {
1094  #calculate the coding start from the translation
1095  my $start;
1096  my $strand = $self->translation()->start_Exon->strand();
1097  if( $strand == 1 ) {
1098  $start = $self->translation()->start_Exon->start();
1099  $start += ( $self->translation()->start() - 1 );
1100  } else {
1101  $start = $self->translation()->end_Exon->end();
1102  $start -= ( $self->translation()->end() - 1 );
1103  }
1104  $self->{'coding_region_start'} = $start;
1105  }
1106 
1107  return $self->{'coding_region_start'};
1108 }
1109 
1110 
1111 =head2 coding_region_end
1112 
1113  Arg [1] : (optional) $value
1114  Example : $coding_region_end = $transcript->coding_region_end
1115  Description: Retrieves the end of the coding region of this transcript
1116  in genomic coordinates (i.e. in either slice or contig coords).
1117  By convention, the coding_region_end is always higher than the
1118  value returned by the coding_region_start method.
1119  The value returned by this function is NOT the biological
1120  coding end since on the reverse strand the biological coding
1121  end would be the lower genomic value.
1122 
1123  This function will return undef if this is a pseudogene
1124  (a non-translated transcript).
1125  Returntype : int
1126  Exceptions : none
1127  Caller : general
1128  Status : Stable
1129 
1130 =cut
1131 
1132 sub coding_region_end {
1133  my ($self, $value ) = @_;
1134 
1135  my $strand;
1136  my $end;
1137 
1138  if( defined $value ) {
1139  $self->{'coding_region_end'} = $value;
1140  } elsif( ! defined $self->{'coding_region_end'}
1141  && defined $self->translation() ) {
1142  $strand = $self->translation()->start_Exon->strand();
1143  if( $strand == 1 ) {
1144  $end = $self->translation()->end_Exon->start();
1145  $end += ( $self->translation()->end() - 1 );
1146  } else {
1147  $end = $self->translation()->start_Exon->end();
1148  $end -= ( $self->translation()->start() - 1 );
1149  }
1150  $self->{'coding_region_end'} = $end;
1151  }
1152 
1153  return $self->{'coding_region_end'};
1154 }
1155 
1156 
1157 =head2 edits_enabled
1158 
1159  Arg [1] : (optional) boolean $newval
1160  Example : $transcript->edits_enabled(1);
1161  Description: Enables/Disables the application of SeqEdits to this transcript.
1162  Edits are enabled by default, and affect the cdna/mrna
1163  sequences coordinates and the resultant translation.
1164  Returntype : boolean - the current value of the edits
1165  Exceptions : none
1166  Caller : general, cdna_coding_start, cdna_coding_end, length
1167  Status : Stable
1168 
1169 =cut
1170 
1171 sub edits_enabled {
1172  my ( $self, $boolean ) = @_;
1173 
1174  if ( defined($boolean) ) {
1175  $self->{'edits_enabled'} = $boolean;
1176 
1177  # flush cached values that will be different with/without edits
1178  $self->{'cdna_coding_start'} = undef;
1179  $self->{'cdna_coding_end'} = undef;
1180  $self->{'transcript_mapper'} = undef;
1181  }
1182 
1183  return $self->{'edits_enabled'};
1184 }
1185 
1186 
1187 =head2 get_all_SeqEdits
1188 
1189  Arg [1] : none
1190  Example : my @seqeds = @{$transcript->get_all_SeqEdits()};
1191  Description: Retrieves all post transcriptional sequence modifications for
1192  this transcript.
1193  Returntype : Bio::EnsEMBL::SeqEdit
1194  Exceptions : none
1195  Caller : spliced_seq()
1196  Status : Stable
1197 
1198 =cut
1199 
1200 sub get_all_SeqEdits {
1201  my $self = shift;
1202 
1203  my @seqeds;
1204 
1205  my $attribs = $self->get_all_Attributes('_rna_edit');
1206 
1207  # convert attributes to SeqEdit objects
1208  foreach my $a (@$attribs) {
1209  push @seqeds, Bio::EnsEMBL::SeqEdit->new(-ATTRIB => $a);
1210  }
1211 
1212  return \@seqeds;
1213 }
1214 
1215 
1216 =head2 get_all_Attributes
1217 
1218  Arg [1] : optional string $attrib_code
1219  The code of the attribute type to retrieve values for.
1220  Example : ($rna_edits) = @{$transcript->get_all_Attributes('_rna_edit')};
1221  @transc_attributes = @{$transcript->get_all_Attributes()};
1222  Description: Gets a list of Attributes of this transcript.
1223  Optionally just get Attrubutes for given code.
1224  Returntype : listref Bio::EnsEMBL::Attribute
1225  Exceptions : warning if transcript does not have attached adaptor and
1226  attempts lazy load.
1227  Caller : general
1228  Status : Stable
1229 
1230 =cut
1231 
1232 sub get_all_Attributes {
1233  my $self = shift;
1234  my $attrib_code = shift;
1235 
1236  if( ! exists $self->{'attributes' } ) {
1237  if(!$self->adaptor() ) {
1238  return [];
1239  }
1240 
1241  my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor();
1242  $self->{'attributes'} = $attribute_adaptor->fetch_all_by_Transcript($self);
1243  }
1244 
1245  if( defined $attrib_code) {
1246  my @results = grep { uc($_->code()) eq uc($attrib_code) }
1247  @{$self->{'attributes'}};
1248  return \@results;
1249  } else {
1250  return $self->{'attributes'};
1251  }
1252 }
1253 
1254 
1255 =head2 add_Attributes
1256 
1257  Arg [1...] : Bio::EnsEMBL::Attribute $attribute
1258  You can have more Attributes as arguments, all will be added.
1259  Example : $transcript->add_Attributes($rna_edit_attribute);
1260  Description: Adds an Attribute to the Transcript. Usefull to do _rna_edits.
1261  If you add an attribute before you retrieve any from database,
1262  lazy load will be disabled.
1263  Returntype : none
1264  Exceptions : throw on incorrect arguments
1265  Caller : general
1266  Status : Stable
1267 
1268 =cut
1269 
1270 sub add_Attributes {
1271  my ( $self, @attribs ) = @_;
1272 
1273  if ( !exists( $self->{'attributes'} ) ) {
1274  $self->{'attributes'} = [];
1275  }
1276 
1277  my $seq_change = 0;
1278  foreach my $attrib (@attribs) {
1279  assert_ref( $attrib, 'Bio::EnsEMBL::Attribute' );
1280 
1281  push( @{ $self->{'attributes'} }, $attrib );
1282 
1283  if ( $attrib->code() eq "_rna_edit" ) {
1284  $seq_change = 1;
1285  }
1286  }
1287 
1288  if ($seq_change) {
1289  my $translation = $self->translation();
1290  if ( defined($translation) ) {
1291  delete( $translation->{'seq'} );
1292  }
1293  }
1294 
1295  # flush cdna coord cache b/c we may have added a SeqEdit
1296  delete( $self->{'cdna_coding_start'} );
1297  delete( $self->{'cdna_coding_end'} );
1298  delete( $self->{'transcript_mapper'} );
1299 } ## end sub add_Attributes
1300 
1301 
1302 =head2 add_Exon
1303 
1304  Title : add_Exon
1305  Example : $trans->add_Exon($exon)
1306  Returns : None
1307  Args [1]: Bio::EnsEMBL::Exon object to add
1308  Args [2]: rank
1309  Exceptions: throws if not a valid Bio::EnsEMBL::Exon
1310  : or exon clashes with another one
1311  Status : Stable
1312 
1313 =cut
1314 
1315 sub add_Exon {
1316  my ( $self, $exon, $rank ) = @_;
1317 
1318  assert_ref( $exon, 'Bio::EnsEMBL::Exon' );
1319 
1320  $self->{'_trans_exon_array'} ||= [];
1321 
1322  if ( defined($rank) ) {
1323  $self->{'_trans_exon_array'}->[ $rank - 1 ] = $exon;
1324  return;
1325  }
1326 
1327  my $was_added = 0;
1328 
1329  my $ea = $self->{'_trans_exon_array'};
1330 
1331  if ( @{$ea} ) {
1332  if ( $exon->strand() == 1 ) {
1333 
1334  my $exon_start = $exon->start();
1335 
1336  if ( $exon_start > $ea->[-1]->end() ) {
1337  push( @{$ea}, $exon );
1338  $was_added = 1;
1339  } else {
1340  # Insert it at correct place
1341 
1342  my $i = 0;
1343  foreach my $e ( @{$ea} ) {
1344  if ( $exon_start < $e->start() ) {
1345  if ( $exon->end() >= $e->start() ) {
1346  # Overlap
1347  last;
1348  }
1349  if ( $i and $exon_start <= $ea->[$i-1]->end() ) {
1350  # Overlap
1351  last;
1352  }
1353  splice( @{$ea}, $i, 0, $exon );
1354  $was_added = 1;
1355  last;
1356  }
1357  ++$i;
1358  }
1359 
1360  }
1361 
1362  } else {
1363 
1364  my $exon_end = $exon->end();
1365 
1366  if ( $exon_end < $ea->[-1]->start() ) {
1367  push( @{$ea}, $exon );
1368  $was_added = 1;
1369  } else {
1370  # Insert it at correct place
1371 
1372  my $i = 0;
1373  foreach my $e ( @{$ea} ) {
1374  if ( $exon_end > $e->end() ) {
1375  if ( $exon->start() <= $e->end() ) {
1376  # Overlap
1377  last;
1378  }
1379  if ( $i and $exon_end >= $ea->[$i-1]->start() ) {
1380  # Overlap
1381  last;
1382  }
1383  splice( @{$ea}, $i, 0, $exon );
1384  $was_added = 1;
1385  last;
1386  }
1387  ++$i;
1388  }
1389 
1390  }
1391 
1392  } ## end else [ if ( $exon->strand() ==...)]
1393  } else {
1394  push( @{$ea}, $exon );
1395  $was_added = 1;
1396  }
1397 
1398  # sanity check:
1399  if ( !$was_added ) {
1400  # The exon was not added because it was overloapping with an
1401  # existing exon.
1402  my $all_str = '';
1403 
1404  foreach my $e ( @{$ea} ) {
1405  $all_str .= ' '
1406  . $e->start() . '-'
1407  . $e->end() . ' ('
1408  . $e->strand() . ') '
1409  . ( $e->stable_id() || '' ) . "\n";
1410  }
1411 
1412  my $cur_str = ' '
1413  . $exon->start() . '-'
1414  . $exon->end() . ' ('
1415  . $exon->strand() . ') '
1416  . ( $exon->stable_id() || '' ) . "\n";
1417 
1418  throw( "Exon overlaps with other exon in same transcript.\n"
1419  . "Transcript Exons:\n$all_str\n"
1420  . "This Exon:\n$cur_str" );
1421  }
1422 
1423  # recalculate start, end, slice, strand
1424  $self->recalculate_coordinates();
1425 } ## end sub add_Exon
1426 
1427 
1428 =head2 get_all_Exons
1429 
1430  Arg [1] : Boolean
1431  Only return constitutive exons if true (non-zero)
1432 
1433  Example : my @exons = @{ $transcript->get_all_Exons() };
1434  my @exons = @{ $transcript->get_all_Exons( -constitutive => 1 ) };
1435 
1436  Description: Returns an listref of the exons in this transcript
1437  in order, i.e. the first exon in the listref is the
1438  5prime most exon in the transcript. Only returns
1439  constitutive exons if the CONSTITUTIVE argument is
1440  true.
1441 
1442  Returntype : listref to Bio::EnsEMBL::Exon objects
1443  Exceptions : none
1444  Caller : general
1445  Status : Stable
1446 
1447 =cut
1448 
1449 sub get_all_Exons {
1450  my ( $self, @args ) = @_;
1451 
1452  my $constitutive;
1453  if (@args) {
1454  $constitutive = rearrange( ['CONSTITUTIVE'], @args );
1455  }
1456 
1457  if (!defined( $self->{'_trans_exon_array'} )
1458  && defined( $self->adaptor() ) )
1459  {
1460  $self->{'_trans_exon_array'} =
1461  $self->adaptor()->db()->get_ExonAdaptor()
1462  ->fetch_all_by_Transcript($self);
1463  }
1464 
1465  my @result;
1466  if ( defined($constitutive) && $constitutive != 0 ) {
1467  foreach my $exon ( @{ $self->{'_trans_exon_array'} } ) {
1468  if ( $exon->is_constitutive() ) {
1469  push( @result, $exon );
1470  }
1471  }
1472  } else {
1473  @result = @{ $self->{'_trans_exon_array'} };
1474  }
1475 
1476  return \@result;
1477 } ## end sub get_all_Exons
1478 
1479 
1480 =head2 get_all_ExonTranscripts
1481 
1482  Example : my @exon_transcripts = @{ $transcript->get_all_ExonTranscripts() };
1483 
1484  Description: Returns an listref of the exons in this transcript
1485  in order, i.e. the first exon in the listref is the
1486  5prime most exon in the transcript.
1487 
1488  Returntype : listref to Bio::EnsEMBL::ExonTranscript objects
1489  Exceptions : none
1490  Caller : general
1491  Status : Stable
1492 
1493 =cut
1494 
1495 sub get_all_ExonTranscripts {
1496  my ( $self, @args ) = @_;
1497 
1498  if (!defined( $self->{'_trans_exon_array'} )
1499  && defined( $self->adaptor() ) )
1500  {
1501  $self->{'_trans_exon_array'} =
1502  $self->adaptor()->db()->get_ExonAdaptor()
1503  ->fetch_all_by_Transcript($self);
1504  }
1505 
1506  my @result;
1507  my $i = 0;
1508  foreach my $exon ( @{ $self->{'_trans_exon_array'} } ) {
1509  $i++;
1510  my $exon_transcript = Bio::EnsEMBL::ExonTranscript->new(
1511  -EXON => $exon,
1512  -RANK => $i,
1513  -TRANSCRIPT => $self
1514  );
1515  push (@result, $exon_transcript) ;
1516  }
1517 
1518  return \@result;
1519 } ## end sub get_all_ExonTranscripts
1520 
1521 =head2 get_all_constitutive_Exons
1522 
1523  Arg : None
1524 
1525  Example : my @exons = @{ $transcript->get_all_constitutive_Exons() };
1526 
1527  Description: Returns an listref of the constitutive exons in this
1528  transcript in order, i.e. the first exon in the
1529  listref is the 5prime most exon in the transcript.
1530 
1531  Returntype : listref to Bio::EnsEMBL::Exon objects
1532  Exceptions : none
1533  Caller : general
1534  Status : Stable
1535 
1536 =cut
1537 
1538 sub get_all_constitutive_Exons {
1539  my ($self) = @_;
1540  return $self->get_all_Exons( '-constitutive' => 1 );
1541 }
1542 
1543 =head2 get_all_IntronSupportingEvidence
1544 
1545  Example : $ise->get_all_IntronSupportingEvidence();
1546  Description : Fetches all ISE instances linked to this Transript
1547  Returntype : ArrayRef[Bio::EnsEMBL::IntronSupportEvidence] retrieved from
1548  the DB or from those added via C<add_IntronSupportingEvidence>
1549  Exceptions : None
1550 
1551 =cut
1552 
1553 sub get_all_IntronSupportingEvidence {
1554  my ($self) = @_;
1555  if(! defined $self->{_ise_array} && defined $self->adaptor()) {
1556  my $isea = $self->adaptor()->db()->get_IntronSupportingEvidenceAdaptor();
1557  $self->{_ise_array} = $isea->fetch_all_by_Transcript($self);
1558  }
1559  return $self->{_ise_array} || [];
1560 }
1561 
1562 
1563 =head2 add_IntronSupportingEvidence
1564 
1565  Arg [1] : Bio::EnsEMBL::IntronSupportEvidence Object to add
1566  Example : $ise->add_IntronSupportingEvidence($ise);
1567  Description : Adds the IntronSupportEvidence instance to this Transcript. The
1568  code checks to see if it is a unique ISE instance
1569  Returntype : Boolean; true means it was added. False means it was not
1570  as this ISE was already attached
1571  Exceptions : None
1572 
1573 =cut
1574 
1575 sub add_IntronSupportingEvidence {
1576  my ($self, $ise) = @_;
1577  assert_ref($ise, 'Bio::EnsEMBL::IntronSupportingEvidence', 'IntronSupportingEvidence');
1578  my $unique = 1;
1579  foreach my $other_ise (@{$self->{_ise_array}}) {
1580  if($ise->equals($other_ise)) {
1581  $unique = 0;
1582  last;
1583  }
1584  }
1585  if($unique) {
1586  push(@{$self->{_ise_array}}, $ise);
1587  return 1;
1588  }
1589  return 0;
1590 }
1591 
1592 =head2 get_all_Introns
1593 
1594  Arg [1] : none
1595  Example : my @introns = @{$transcript->get_all_Introns()};
1596  Description: Returns an listref of the introns in this transcript in order.
1597  i.e. the first intron in the listref is the 5prime most exon in
1598  the transcript.
1599  Returntype : listref to Bio::EnsEMBL::Intron objects
1600  Exceptions : none
1601  Caller : general
1602  Status : Stable
1603 
1604 =cut
1605 
1606 sub get_all_Introns {
1607  my ($self) = @_;
1608  if( ! defined $self->{'_trans_exon_array'} && defined $self->adaptor() ) {
1609  $self->{'_trans_exon_array'} = $self->adaptor()->db()->
1610  get_ExonAdaptor()->fetch_all_by_Transcript( $self );
1611  }
1612 
1613  my @introns=();
1614  my @exons = @{$self->{'_trans_exon_array'}};
1615  for(my $i=0; $i < scalar(@exons)-1; $i++){
1616  my $intron = new Bio::EnsEMBL::Intron($exons[$i],$exons[$i+1]);
1617  push(@introns, $intron)
1618  }
1619  return \@introns;
1620 }
1621 
1622 =head2 get_all_CDS_Introns
1623 
1624  Arg [1] : none
1625  Example : my @introns = @{$transcript->get_all_CDS_Introns()};
1626  Description: Returns an listref of the introns between coding exons in this transcript in order.
1627  Returntype : listref to Bio::EnsEMBL::Intron objects
1628  Exceptions : none
1629  Caller : general
1630  Status : Stable
1631 
1632 =cut
1633 
1634 sub get_all_CDS_Introns {
1635  my ($self) = @_;
1636  # return an empty list if there is no translation
1637  my $translation = $self->translation or return [];
1638  if( ! defined $self->{'_trans_exon_array'} && defined $self->adaptor() ) {
1639  $self->{'_trans_exon_array'} = $self->adaptor()->db()->
1640  get_ExonAdaptor()->fetch_all_by_Transcript( $self );
1641  }
1642 
1643  my @introns=();
1644  my @exons = @{$self->{'_trans_exon_array'}};
1645  for(my $i=0; $i < scalar(@exons)-1; $i++){
1646  if (!$exons[$i]->is_coding($self)) { next; }
1647  my $intron = new Bio::EnsEMBL::Intron($exons[$i],$exons[$i+1]);
1648  push(@introns, $intron)
1649  }
1650  return \@introns;
1651 }
1652 
1653 
1654 =head2 length
1655 
1656  Args : none
1657  Example : my $t_length = $transcript->length
1658  Description: Returns the sum of the length of all the exons in the transcript.
1659  Returntype : int
1660  Exceptions : none
1661  Caller : general
1662  Status : Stable
1663 
1664 =cut
1665 
1666 sub length {
1667  my( $self ) = @_;
1668 
1669  my $length = 0;
1670  foreach my $ex (@{$self->get_all_Exons}) {
1671  $length += $ex->length;
1672  }
1673 
1674  # adjust the length if post transcriptional edits are enabled
1675  if($self->edits_enabled()) {
1676  foreach my $se (@{$self->get_all_SeqEdits()}) {
1677  $length += $se->length_diff();
1678  }
1679  }
1680 
1681  return $length;
1682 }
1683 
1684 
1685 =head2 flush_Exons
1686 
1687  Arg [1] : none
1688  Example : $transcript->flush_Exons();
1689  Description: Removes all Exons from this transcript and flushes related
1690  internal caches.
1691  Returntype : none
1692  Exceptions : none
1693  Caller : general
1694  Status : Stable
1695 
1696 =cut
1697 
1698 sub flush_Exons {
1699  my ($self) = @_;
1700 
1701  $self->{'transcript_mapper'} = undef;
1702  $self->{'coding_region_start'} = undef;
1703  $self->{'coding_region_end'} = undef;
1704  $self->{'cdna_coding_start'} = undef;
1705  $self->{'cdna_coding_end'} = undef;
1706  $self->{'start'} = undef;
1707  $self->{'end'} = undef;
1708  $self->{'strand'} = undef;
1709 
1710  $self->{'_trans_exon_array'} = [];
1711 }
1712 
1713 =head2 flush_IntronSupportingEvidence
1714 
1715  Example : $transcript->flush_IntronSupportingEvidence();
1716  Description: Removes all IntronSupportingEvidence from this transcript
1717  Returntype : none
1718  Exceptions : none
1719  Caller : general
1720  Status : Stable
1721 
1722 =cut
1723 
1724 sub flush_IntronSupportingEvidence {
1725  my ($self) = @_;
1726  $self->{_ise_array} = [];
1727  return;
1728 }
1729 
1730 =head2 five_prime_utr
1731 
1732  Arg [1] : none
1733  Example : my $five_prime = $transcrpt->five_prime_utr
1734  or warn "No five prime UTR";
1735  Description: Obtains a Bio::Seq object of the five prime UTR of this
1736  transcript. If this transcript is a pseudogene
1737  (i.e. non-translating) or has no five prime UTR undef is
1738  returned instead.
1739  Returntype : Bio::Seq or undef
1740  Exceptions : none
1741  Caller : general
1742  Status : Stable
1743 
1744 =cut
1745 
1746 sub five_prime_utr {
1747  my $self = shift;
1748 
1749  my $cdna_coding_start = $self->cdna_coding_start();
1750 
1751  return undef if(!$cdna_coding_start);
1752 
1753  my $seq = substr($self->spliced_seq, 0, $cdna_coding_start - 1);
1754 
1755  return undef if(!$seq);
1756 
1757  return
1758  Bio::Seq->new( -id => $self->display_id,
1759  -moltype => 'dna',
1760  -alphabet => 'dna',
1761  -seq => $seq );
1762 }
1763 
1764 
1765 =head2 three_prime_utr
1766 
1767  Arg [1] : none
1768  Example : my $three_prime = $transcrpt->three_prime_utr
1769  or warn "No three prime UTR";
1770  Description: Obtains a Bio::Seq object of the three prime UTR of this
1771  transcript. If this transcript is a pseudogene
1772  (i.e. non-translating) or has no three prime UTR,
1773  undef is returned instead.
1774  Returntype : Bio::Seq or undef
1775  Exceptions : none
1776  Caller : general
1777  Status : Stable
1778 
1779 =cut
1780 
1781 sub three_prime_utr {
1782  my $self = shift;
1783 
1784  my $cdna_coding_end = $self->cdna_coding_end();
1785 
1786  return undef if(!$cdna_coding_end);
1787 
1788  my $seq = substr($self->spliced_seq, $cdna_coding_end);
1789 
1790  return undef if(!$seq);
1791 
1792  return
1793  Bio::Seq->new( -id => $self->display_id,
1794  -moltype => 'dna',
1795  -alphabet => 'dna',
1796  -seq => $seq );
1797 }
1798 
1799 =head2 five_prime_utr_Feature
1800 
1801  Example : my $five_prime = $transcrpt->five_prime_utr_Feature
1802  or warn "No five prime UTR";
1803  Description: Returns the genomic coordinates of the start and end of the
1804  5' UTR of this transcript. Note that if you want the sequence
1805  of the 5' UTR use C<five_prime_utr> as this will return the
1806  sequence from the spliced transcript.
1807  Returntype : Bio::EnsEMBL::Feature or undef if there is no UTR
1808  Exceptions : none
1809 
1810 =cut
1811 
1812 sub five_prime_utr_Feature {
1813  my ($self) = @_;
1814  my $start = $self->seq_region_end();
1815  my $end = 0;
1816  my $features = $self->get_all_five_prime_UTRs();
1817  if (scalar(@$features) == 0) { return; }
1818  foreach my $feature (@$features) {
1819  if ($feature->start() < $start) {
1820  $start = $feature->start();
1821  }
1822  if ($feature->end() > $end) {
1823  $end = $feature->end();
1824  }
1825  }
1826  my $feature = Bio::EnsEMBL::Feature->new(
1827  -START => $start,
1828  -END => $end,
1829  -STRAND => $self->strand(),
1830  -SLICE => $self->slice(),
1831  -TYPE => 'five_prime_UTR',
1832  -TRANSCRIPT => $self
1833  );
1834  return $feature;
1835 }
1836 
1837 
1838 
1839 =head2 three_prime_utr_Feature
1840 
1841  Example : my $five_prime = $transcrpt->three_prime_utr_Feature
1842  or warn "No three prime UTR";
1843  Description: Returns the genomic coordinates of the start and end of the
1844  3' UTR of this transcript. Note that if you want the sequence
1845  of the 3' UTR use C<three_prime_utr> as this will return the
1846  sequence from the spliced transcript.
1847  Returntype : Bio::EnsEMBL::Feature or undef if there is no UTR
1848  Exceptions : none
1849 
1850 =cut
1851 
1852 sub three_prime_utr_Feature {
1853  my ($self) = @_;
1854  my $start = $self->seq_region_end();
1855  my $end = 0;
1856  my $features = $self->get_all_three_prime_UTRs();
1857  if (scalar(@$features) == 0) { return; }
1858  foreach my $feature (@$features) {
1859  if ($feature->start() < $start) {
1860  $start = $feature->start();
1861  }
1862  if ($feature->end() > $end) {
1863  $end = $feature->end();
1864  }
1865  }
1866  my $feature = Bio::EnsEMBL::Feature->new(
1867  -START => $start,
1868  -END => $end,
1869  -STRAND => $self->strand(),
1870  -SLICE => $self->slice(),
1871  -TYPE => 'three_prime_UTR',
1872  -TRANSCRIPT => $self
1873  );
1874  return $feature;
1875 }
1876 
1877 
1878 =head2 get_all_five_prime_UTRs
1879 
1880  Example : my $five_primes = $transcript->get_all_five_prime_UTRs
1881  Description: Returns a list of features forming the 5' UTR of this transcript.
1882  Returntype : listref of Bio::EnsEMBL::UTR
1883  Exceptions : none
1884 
1885 =cut
1886 
1887 sub get_all_five_prime_UTRs {
1888  my ($self) = @_;
1889  my $translation = $self->translation();
1890  return [] if ! $translation;
1891 
1892  my @utrs;
1893 
1894  my $cdna_coding_start = $self->cdna_coding_start();
1895 
1896  # if it is greater than 1 then it must have UTR
1897  if($cdna_coding_start > 1) {
1898  my @projections = $self->cdna2genomic(1, ($cdna_coding_start-1));
1899  foreach my $projection (@projections) {
1900  next if $projection->isa('Bio::EnsEMBL::Mapper::Gap');
1901  my $utr = Bio::EnsEMBL::UTR->new(
1902  -START => $projection->start - $self->slice->start + 1,
1903  -END => $projection->end - $self->slice->start + 1,
1904  -SEQ_REGION_START => $projection->start,
1905  -SEQ_REGION_END => $projection->end,
1906  -STRAND => $projection->strand,
1907  -SLICE => $self->slice,
1908  -TRANSCRIPT => $self,
1909  -TYPE => 'five_prime_utr'
1910  );
1911  push(@utrs, $utr);
1912  }
1913  }
1914 
1915  return \@utrs;
1916 }
1917 
1918 
1919 =head2 get_all_three_prime_UTRs
1920 
1921  Example : my $three_primes = $transcript->get_all_three_prime_UTRs
1922  Description: Returns a list of features forming the 3' UTR of this transcript.
1923  Returntype : listref of Bio::EnsEMBL::UTR
1924  Exceptions : none
1925 
1926 =cut
1927 
1928 sub get_all_three_prime_UTRs {
1929  my ($self) = @_;
1930  my $translation = $self->translation();
1931  return [] if ! $translation;
1932 
1933  my @utrs;
1934 
1935  my $cdna_coding_end = $self->cdna_coding_end();
1936  if($cdna_coding_end < $self->length()) {
1937  my @projections = $self->cdna2genomic(($cdna_coding_end+1), $self->length());
1938  foreach my $projection (@projections) {
1939  next if $projection->isa('Bio::EnsEMBL::Mapper::Gap');
1940  my $utr = Bio::EnsEMBL::UTR->new(
1941  -START => $projection->start - $self->slice->start + 1,
1942  -END => $projection->end - $self->slice->start + 1,
1943  -SEQ_REGION_START => $projection->start,
1944  -SEQ_REGION_END => $projection->end,
1945  -STRAND => $projection->strand,
1946  -SLICE => $self->slice,
1947  -TRANSCRIPT => $self,
1948  -TYPE => 'three_prime_utr'
1949  );
1950  push(@utrs, $utr);
1951  }
1952  }
1953 
1954  return \@utrs;
1955 }
1956 
1957 =head2 get_all_CDS
1958 
1959  Example : my $cds = $transcript->get_all_CDS
1960  Description: Returns a list of features forming the coding regions of the transcript
1961  Returntype : listref of Bio::EnsEMBL::CDS
1962  Exceptions : none
1963 
1964 =cut
1965 
1966 sub get_all_CDS {
1967  my ($self) = @_;
1968  my $translation = $self->translation();
1969  return [] if ! $translation;
1970 
1971  my @cds;
1972  my $translation_id = $translation->stable_id();
1973 
1974  foreach my $exon (@{ $self->get_all_translateable_Exons}) {
1975  my $phase = $exon->phase();
1976  $phase = 0 if $phase < 0;
1977  $phase =~ tr/12/21/;
1978 
1979  my $cds = Bio::EnsEMBL::CDS->new(
1980  -START => $exon->start,
1981  -END => $exon->end,
1982  -STRAND => $exon->strand,
1983  -SEQ_REGION_START => $exon->seq_region_start,
1984  -SEQ_REGION_END => $exon->seq_region_end,
1985  -TRANSLATION_ID => $translation_id,
1986  -SLICE => $self->slice,
1987  -TRANSCRIPT => $self,
1988  -PHASE => $phase
1989  );
1990  push(@cds, $cds);
1991  }
1992 
1993  return \@cds;
1994 }
1995 
1996 
1997 
1998 =head2 get_all_translateable_Exons
1999 
2000  Args : none
2001  Description: Returns a list of exons that translate with the
2002  start and end exons truncated to the CDS regions.
2003  This function does not take into account any SeqEdits
2004  (post transcriptional RNA modifictions) when constructing the
2005  the 'translateable' exons, and it does not update the phase
2006  information of the created 'translateable' exons.
2007 
2008  If this transcript is a pseudogene (i.e. non-translateable)
2009  a reference to an empty list is returned.
2010 
2011  Returntype : listref Bio::EnsEMBL::Exon
2012  Exceptions : throw if translation has invalid information
2013  Caller : Genebuild
2014  Status : Stable
2015 
2016 =cut
2017 
2018 
2019 sub get_all_translateable_Exons {
2020  my ( $self ) = @_;
2021 
2022  #return an empty list if there is no translation (i.e. pseudogene)
2023  my $translation = $self->translation or return [];
2024  my $start_exon = $translation->start_Exon;
2025  my $end_exon = $translation->end_Exon;
2026  my $t_start = $translation->start;
2027  my $t_end = $translation->end;
2028 
2029  my( @translateable );
2030 
2031  foreach my $ex (@{$self->get_all_Exons}) {
2032 
2033  if ($ex ne $start_exon and ! @translateable) {
2034  next; # Not yet in translated region
2035  }
2036 
2037  my $length = $ex->length;
2038 
2039  my $adjust_start = 0;
2040  my $adjust_end = 0;
2041  # Adjust to translation start if this is the start exon
2042  if ($ex == $start_exon ) {
2043  if ($t_start < 1 or $t_start > $length) {
2044  warning("WARN: Translation start '$t_start' is outside exon " . $ex->display_id . " length=$length");
2045  return [];
2046  }
2047  $adjust_start = $t_start - 1;
2048  }
2049 
2050  # Adjust to translation end if this is the end exon
2051  if ($ex == $end_exon) {
2052 # if ($t_end < 1 or $t_end > $length) {
2053 # throw("Translation end '$t_end' is outside exon $ex length=$length");
2054 # }
2055  $adjust_end = $t_end - $length;
2056  }
2057 
2058  # Make a truncated exon if the translation start or
2059  # end causes the coordinates to be altered.
2060  if ($adjust_end || $adjust_start) {
2061  my $newex = $ex->adjust_start_end( $adjust_start, $adjust_end );
2062 
2063  push( @translateable, $newex );
2064  } else {
2065  push(@translateable, $ex);
2066  }
2067 
2068  # Exit the loop when we've found the last exon
2069  last if $ex eq $end_exon;
2070  }
2071  return \@translateable;
2072 }
2073 
2074 
2075 =head2 translate
2076 
2077  Arg [1] : Boolean, emulate the behavior of old bioperl versions where
2078  an incomplete final codon of 2 characters is padded and guessed
2079  Example : none
2080  Description: Return the peptide (plus eventual stop codon) for
2081  this transcript. Does N-padding of non-phase
2082  matching exons. It uses translateable_seq
2083  internally. Returns undef if this Transcript does
2084  not have a translation (i.e. pseudogene).
2085  Returntype : Bio::Seq or undef
2086  Exceptions : none
2087  Caller : general
2088  Status : Stable
2089 
2090 =cut
2091 
2092 sub translate {
2093  my ($self, $complete_codon) = @_;
2094  my $codon_table_id;
2095 
2096  if ( !defined( $self->translation() ) ) { return undef }
2097 
2098  # Alternative codon tables (such as the mitochondrial codon table)
2099  # can be specified for a sequence region via the seq_region_attrib
2100  # table. A list of codon tables and their codes is at:
2101  # http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
2102 
2103  if ( defined( $self->slice() ) ) {
2104  my $attrib;
2105 
2106  ($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') };
2107  if ( defined($attrib) ) {
2108  $codon_table_id = $attrib->value();
2109  }
2110  }
2111 
2112  $codon_table_id ||= 1; # default vertebrate codon table
2113  my $codon_table =
2114  Bio::Tools::CodonTable->new( -id => $codon_table_id );
2115 
2116  my $mrna = $self->translateable_seq();
2117 
2118  my $delta = CORE::length($mrna) % 3;
2119  if ( $delta > 0 ) {
2120  # If we have a partial codon of we need to decide if we
2121  # trim it or not to fix some bad behaviour in older bioperl
2122  # versions
2123  if ( $complete_codon ) {
2124  # If we want to do the bad behavior of bioperl 1.6.1 and older
2125  # where we guess the last codon if incomplete, pad an N
2126  # to the mrna sequence
2127  $mrna .= 'N' x (3 - $delta);
2128  } else {
2129  # Otherwise trim those last bp's off so the behavior is
2130  # consistent across bioperl versions
2131  chop $mrna for 1 .. $delta;
2132  }
2133  }
2134 
2135  if ( CORE::length($mrna) < 1 ) { return undef }
2136 
2137  my $first_mrna_codon = substr( $mrna, 0, 3 );
2138  my $last_mrna_codon = substr( $mrna, -3, 3 );
2139 
2140  my $display_id = $self->translation->display_id()
2141  || "" . $self->translation();
2142 
2143  # From BioPerl perspective, we'll treat our CDS as incomplete:
2144  # thus, BioPerl will not
2145  # - remove the terminator character from the peptide
2146  # - check if there are terminator character inside the peptide
2147  # - turn the first amino acid into 'M', if the first codon is a
2148  # legitimate start
2149  # NOTE: We will not be checking for in-sequence '*', as we have to
2150  # apply the seq_edits after translation occurs
2151 
2152  my $p_seq = $codon_table->translate($mrna, 0);
2153 
2154  # Remove final stop codon from the mrna if it is present. Produced
2155  # peptides will not have '*' at end. If terminal stop codon is
2156  # desired call translatable_seq directly and produce a translation
2157  # from it.
2158  if ( $codon_table->is_ter_codon( $last_mrna_codon ) ) {
2159  chop $p_seq;
2160  }
2161 
2162  # Update the first amino acid to M, as appropriate
2163  if ( substr($p_seq,0,1) ne 'M' && $codon_table->is_start_codon( $first_mrna_codon ) ) {
2164  $p_seq = 'M' . substr($p_seq,1);
2165  }
2166 
2167  my $translation = Bio::Seq->new( -seq => $p_seq,
2168  -alphabet => 'protein',
2169  -id => $display_id );
2170 
2171  if ( $self->edits_enabled() ) {
2172  $self->translation()->modify_translation($translation);
2173  }
2174 
2175  return $translation;
2176 } ## end sub translate
2177 
2178 
2179 =head2 seq
2180 
2181  Description: Returns a Bio::Seq object which consists of just
2182  : the sequence of the exons concatenated together,
2183  : without messing about with padding with N\'s from
2184  : Exon phases like B<dna_seq> does.
2185  Args : none
2186  Example : none
2187  Returntype : Bio::Seq
2188  Exceptions : none
2189  Caller : general
2190  Status : Stable
2191 
2192 =cut
2193 
2194 sub seq {
2195  my ($self) = @_;
2196 
2197  return
2198  Bio::Seq->new( -id => $self->display_id,
2199  -moltype => 'dna',
2200  -alphabet => 'dna',
2201  -seq => $self->spliced_seq );
2202 }
2203 
2204 
2205 =head2 pep2genomic
2206 
2207  Description: See Bio::EnsEMBL::TranscriptMapper::pep2genomic
2208 
2209 =cut
2210 
2211 sub pep2genomic {
2212  my $self = shift;
2213  return $self->get_TranscriptMapper()->pep2genomic(@_);
2214 }
2215 
2216 
2217 =head2 genomic2pep
2218 
2219  Description: See Bio::EnsEMBL::TranscriptMapper::genomic2pep
2220 
2221 =cut
2222 
2223 sub genomic2pep {
2224  my $self = shift;
2225  return $self->get_TranscriptMapper()->genomic2pep(@_);
2226 }
2227 
2228 
2229 =head2 cdna2genomic
2230 
2231  Description: See Bio::EnsEMBL::TranscriptMapper::cdna2genomic
2232 
2233 =cut
2234 
2235 sub cdna2genomic {
2236  my $self = shift;
2237  return $self->get_TranscriptMapper()->cdna2genomic(@_);
2238 }
2239 
2240 
2241 =head2 genomic2cdna
2242 
2243  Description: See Bio::EnsEMBL::TranscriptMapper::genomic2cdna
2244 
2245 =cut
2246 
2247 sub genomic2cdna {
2248  my $self = shift;
2249  return $self->get_TranscriptMapper->genomic2cdna(@_);
2250 }
2251 
2252 
2253 =head2 get_TranscriptMapper
2254 
2255  Args : none
2256  Example : my $trans_mapper = $transcript->get_TranscriptMapper();
2257  Description: Gets a TranscriptMapper object which can be used to perform
2258  a variety of coordinate conversions relating this transcript,
2259  genomic sequence and peptide resulting from this transcripts
2260  translation.
2261  Returntype : Bio::EnsEMBL::TranscriptMapper
2262  Exceptions : none
2263  Caller : cdna2genomic, pep2genomic, genomic2cdna, cdna2genomic
2264  Status : Stable
2265 
2266 =cut
2267 
2268 sub get_TranscriptMapper {
2269  my ( $self ) = @_;
2270  return $self->{'transcript_mapper'} ||=
2271  Bio::EnsEMBL::TranscriptMapper->new($self);
2272 }
2273 
2274 
2275 =head2 start_Exon
2276 
2277  Title : start_Exon
2278  Usage : $start_exon = $transcript->start_Exon;
2279  Returntype : Bio::EnsEMBL::Exon
2280  Description : The first exon in the transcript.
2281  Args : NONE
2282  Status : Stable
2283 
2284 =cut
2285 
2286 sub start_Exon {
2287  my $self = shift;
2288  return $self->get_all_Exons()->[0];
2289 }
2290 
2291 
2292 =head2 end_Exon
2293 
2294  Title : end_exon
2295  Usage : $end_exon = $transcript->end_Exon;
2296  Description : The last exon in the transcript.
2297  Returntype : Bio::EnsEMBL::Exon
2298  Args : NONE
2299  Status : Stable
2300 
2301 =cut
2302 
2303 sub end_Exon {
2304  my $self = shift;
2305  return $self->get_all_Exons()->[-1];
2306 }
2307 
2308 
2309 =head2 description
2310 
2311  Title : description
2312  Usage : $obj->description($newval)
2313  Function:
2314  Returns : String
2315  Args : newvalue (optional)
2316  Status : Stable
2317 
2318 =cut
2319 
2320 sub description {
2321  my $self = shift;
2322  $self->{'description'} = shift if( @_ );
2323  return $self->{'description'};
2324 }
2325 
2326 
2327 =head2 version
2328 
2329  Title : version
2330  Usage : $obj->version()
2331  Function:
2332  Returns : String
2333  Args :
2334  Status : Stable
2335 
2336 =cut
2337 
2338 sub version {
2339  my $self = shift;
2340  $self->{'version'} = shift if( @_ );
2341  return $self->{'version'};
2342 }
2343 
2344 =head2 stable_id
2345 
2346  Title : stable_id
2347  Usage : $obj->stable_id
2348  Function:
2349  Returns : String
2350  Args :
2351  Status : Stable
2352 
2353 =cut
2354 
2355 sub stable_id {
2356  my $self = shift;
2357  $self->{'stable_id'} = shift if( @_ );
2358  return $self->{'stable_id'};
2359 }
2360 
2361 =head2 stable_id_version
2362 
2363  Arg [1] : (optional) String - the stable ID with version to set
2364  Example : $transcript->stable_id("ENST0000000001.3");
2365  Description: Getter/setter for stable id with version for this transcript.
2366  Returntype : String
2367  Exceptions : none
2368  Caller : general
2369  Status : Stable
2370 
2371 =cut
2372 
2373 sub stable_id_version {
2374  my $self = shift;
2375  if(my $stable_id = shift) {
2376  # See if there's an embedded period, assume that's a
2377  # version, might not work for some species but you
2378  # should use ->stable_id() and version() if you're worried
2379  # about ambiguity
2380  my $vindex = rindex($stable_id, '.');
2381  # Set the stable_id and version pair depending on if
2382  # we found a version delimiter in the stable_id
2383  ($self->{stable_id}, $self->{version}) = ($vindex > 0 ?
2384  (substr($stable_id,0,$vindex), substr($stable_id,$vindex+1)) :
2385  $stable_id, undef);
2386  }
2387  return $self->{stable_id} . ($self->{version} ? ".$self->{version}" : '');
2388 }
2389 
2390 =head2 is_current
2391 
2392  Arg [1] : Boolean $is_current
2393  Example : $transcript->is_current(1)
2394  Description: Getter/setter for is_current state of this transcript.
2395  Returntype : Int
2396  Exceptions : none
2397  Caller : general
2398  Status : Stable
2399 
2400 =cut
2401 
2402 sub is_current {
2403  my $self = shift;
2404  $self->{'is_current'} = shift if (@_);
2405  return $self->{'is_current'};
2406 }
2407 
2408 
2409 =head2 created_date
2410 
2411  Arg [1] : (optional) string to be used for the created date
2412  Example : none
2413  Description: get/set for attribute created date
2414  Returntype : string
2415  Exceptions : none
2416  Caller : general
2417  Status : Stable
2418 
2419 =cut
2420 
2421 sub created_date {
2422  my $self = shift;
2423  $self->{'created_date'} = shift if ( @_ );
2424  return $self->{'created_date'};
2425 }
2426 
2427 
2428 =head2 modified_date
2429 
2430  Arg [1] : (optional) string to be used for the modified date
2431  Example : none
2432  Description: get/set for attribute modified date
2433  Returntype : string
2434  Exceptions : none
2435  Caller : general
2436  Status : Stable
2437 
2438 =cut
2439 
2440 sub modified_date {
2441  my $self = shift;
2442  $self->{'modified_date'} = shift if ( @_ );
2443  return $self->{'modified_date'};
2444 }
2445 
2446 
2447 =head2 swap_exons
2448 
2449  Arg [1] : Bio::EnsEMBL::Exon $old_Exon
2450  An exon that should be replaced
2451  Arg [2] : Bio::EnsEMBL::Exon $new_Exon
2452  The replacement Exon
2453  Example : none
2454  Description: exchange an exon in the current Exon list with a given one.
2455  Usually done before storing of Gene, so the Exons can
2456  be shared between Transcripts.
2457  Returntype : none
2458  Exceptions : none
2459  Caller : GeneAdaptor->store()
2460  Status : Stable
2461 
2462 =cut
2463 
2464 sub swap_exons {
2465  my ( $self, $old_exon, $new_exon, $skip_exon_sf) = @_;
2466 
2467  my $arref = $self->{'_trans_exon_array'};
2468  for(my $i = 0; $i < @$arref; $i++) {
2469  if($arref->[$i] == $old_exon ) {
2470  $new_exon->add_supporting_features(@{$old_exon->get_all_supporting_features}) unless $skip_exon_sf;
2471  $arref->[$i] = $new_exon;
2472  last;
2473  }
2474  }
2475 
2476  if( defined $self->{'translation'} ) {
2477  if( $self->translation()->start_Exon() == $old_exon ) {
2478  $self->translation()->start_Exon( $new_exon );
2479  }
2480  if( $self->translation()->end_Exon() == $old_exon ) {
2481  $self->translation()->end_Exon( $new_exon );
2482  }
2483  }
2484 }
2485 
2486 =head2 exon_rank
2487 
2488  Arg [1] : Bio::EnsEMBL::Exon $Exon
2489  Query exon
2490  Example : $rank = $transcript->exon_rank($exon);
2491  Description: Returns the rank of an exon relative to the transcript
2492  Returntype : none
2493  Exceptions : Throws if the exon does not belong to the transcript
2494  Caller : General
2495  Status : Stable
2496 
2497 =cut
2498 
2499 sub exon_rank {
2500  my ( $self, $exon ) = @_;
2501 
2502  if (!defined( $self->{'_trans_exon_array'} )
2503  && defined( $self->adaptor() ) )
2504  {
2505  $self->{'_trans_exon_array'} =
2506  $self->adaptor()->db()->get_ExonAdaptor()
2507  ->fetch_all_by_Transcript($self);
2508  }
2509 
2510  my $arref = $self->{'_trans_exon_array'};
2511  my $rank;
2512 
2513  if (!defined $arref) {
2514  throw "Transcript does not have any exons";
2515  }
2516  for(my $i = 0; $i < @$arref; $i++) {
2517  if($arref->[$i]->stable_id() eq $exon->stable_id()) {
2518  $rank = $i+1;
2519  last;
2520  }
2521  }
2522 
2523  if (!defined $rank) {
2524  throw "Exon does not belong to transcript";
2525  }
2526 
2527  return $rank;
2528 }
2529 
2530 
2531 =head2 equals
2532 
2534  Example : if ($transcriptA->equals($transcriptB)) { ... }
2535  Description : Compares two transcripts for equality.
2536  The test for eqality goes through the following list
2537  and terminates at the first true match:
2538 
2539  1. If Bio::EnsEMBL::Feature::equals() returns false,
2540  then the transcripts are *not* equal.
2541  2. If the biotypes differ, then the transcripts are
2542  *not* equal.
2543  3. If both transcripts have stable IDs: if these are
2544  the same, the transcripts are equal, otherwise not.
2545  4. If both transcripts have the same number of exons
2546  and if these are (when compared pair-wise sorted by
2547  start-position and length) the same, then they are
2548  equal, otherwise not.
2549 
2550  Return type : Boolean (0, 1)
2551 
2552  Exceptions : Thrown if a non-transcript is passed as the argument.
2553 
2554 =cut
2555 
2556 sub equals {
2557  my ( $self, $transcript ) = @_;
2558 
2559  if ( !defined($transcript) ) { return 0 }
2560  if ( $self eq $transcript ) { return 1 }
2561 
2562  assert_ref( $transcript, 'Bio::EnsEMBL::Transcript' );
2563 
2564  my $feature_equals = $self->SUPER::equals($transcript);
2565  if ( defined($feature_equals) && $feature_equals == 0 ) {
2566  return 0;
2567  }
2568 
2569  if ( $self->get_Biotype->name ne $transcript->get_Biotype->name ) {
2570  return 0;
2571  }
2572 
2573  if ( defined( $self->stable_id() ) &&
2574  defined( $transcript->stable_id() ) )
2575  {
2576  if ( $self->stable_id() eq $transcript->stable_id() ) {
2577  return 1;
2578  }
2579  else {
2580  return 0;
2581  }
2582  }
2583 
2584  my @self_exons = sort {
2585  $a->start() <=> $b->start() ||
2586  $a->length() <=> $b->length()
2587  } @{ $self->get_all_Exons() };
2588  my @transcript_exons = sort {
2589  $a->start() <=> $b->start() ||
2590  $a->length() <=> $b->length()
2591  } @{ $transcript->get_all_Exons() };
2592 
2593  if ( scalar(@self_exons) != scalar(@transcript_exons) ) {
2594  return 0;
2595  }
2596 
2597  while (@self_exons) {
2598  my $self_exon = shift(@self_exons);
2599  my $transcript_exon = shift(@transcript_exons);
2600 
2601  if ( !$self_exon->equals($transcript_exon) ) {
2602  return 0;
2603  }
2604  }
2605 
2606  return 1;
2607 } ## end sub equals
2608 
2609 =head2 transform
2610 
2611  Arg 1 : String $coordinate_system_name
2612  Arg [2] : String $coordinate_system_version
2613  Example : $transcript = $transcript->transform('contig');
2614  $transcript = $transcript->transform('chromosome', 'NCBI33');
2615  Description: Moves this Transcript to the given coordinate system.
2616  If this Transcript has Exons attached, they move as well.
2617  A new Transcript is returned. If the transcript cannot be
2618  transformed to the destination coordinate system undef is
2619  returned instead.
2620  Returntype : Bio::EnsEMBL::Transcript
2621  Exceptions : wrong parameters
2622  Caller : general
2623  Status : Medium Risk
2624  : deprecation needs to be removed at some time
2625 
2626 =cut
2627 
2628 
2629 sub transform {
2630  my $self = shift;
2631 
2632  my $new_transcript = $self->SUPER::transform(@_);
2633  if ( !defined($new_transcript) ) {
2634  my @segments = @{ $self->project(@_) };
2635  # if it projects, maybe the exons transform well?
2636  # lazy load them here
2637  if ( !@segments ) {
2638  return undef;
2639  }
2640  $self->get_all_Exons();
2641  }
2642 
2643 
2644  if( exists $self->{'_trans_exon_array'} ) {
2645  my @new_exons;
2646  my ( $low_start, $hi_end, $slice );
2647  # we want to check whether the transform preserved 5prime 3prime
2648  # ordering. This assumes 5->3 order. No complaints on transsplicing.
2649 
2650  my ( $last_new_start, $last_old_strand,
2651  $last_new_strand, $start_exon, $end_exon,
2652  $last_seq_region_name );
2653  my $first = 1;
2654  my $ignore_order = 0;
2655  my $order_broken = 0;
2656 
2657  for my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
2658  my $new_exon = $old_exon->transform( @_ );
2659  return undef if( !defined $new_exon );
2660  if( ! defined $new_transcript ) {
2661  if( !$first ) {
2662  if( $old_exon->strand() != $last_old_strand ) {
2663  # transsplicing, ignore ordering
2664  $ignore_order = 1;
2665  }
2666 
2667  if( $new_exon->slice()->seq_region_name() ne
2668  $last_seq_region_name ) {
2669  return undef;
2670  }
2671 
2672  if( $last_new_strand == 1 and
2673  $new_exon->start() < $last_new_start ) {
2674  $order_broken = 1;
2675  }
2676 
2677  if( $last_new_strand == -1 and
2678  $new_exon->start() > $last_new_start ) {
2679  $order_broken = 1;
2680  }
2681 
2682  #additional check that if exons were on same strand previously, they should be again
2683  if(($last_old_strand == $old_exon->strand()) and !($last_new_strand == $new_exon->strand())){
2684  return undef;
2685  }
2686 
2687  if( $new_exon->start() < $low_start ) {
2688  $low_start = $new_exon->start();
2689  }
2690  if( $new_exon->end() > $hi_end ) {
2691  $hi_end = $new_exon->end();
2692  }
2693  } else {
2694  $first = 0;
2695  $low_start = $new_exon->start();
2696  $hi_end = $new_exon->end();
2697  }
2698 
2699  $last_seq_region_name = $new_exon->slice()->seq_region_name();
2700  $last_old_strand = $old_exon->strand();
2701  $last_new_start = $new_exon->start();
2702  $last_new_strand = $new_exon->strand();
2703  }
2704 
2705  if( defined $self->{'translation'} ) {
2706  if( $self->translation()->start_Exon() == $old_exon ) {
2707  $start_exon = $new_exon;
2708  }
2709  if( $self->translation()->end_Exon() == $old_exon ) {
2710  $end_exon = $new_exon;
2711  }
2712  }
2713  push( @new_exons, $new_exon );
2714  }
2715 
2716  if( $order_broken && !$ignore_order ) {
2717  warning( "Order of exons broken in transform of ".$self->dbID() );
2718  return undef;
2719  }
2720 
2721  if( !defined $new_transcript ) {
2722  %$new_transcript = %$self;
2723  bless $new_transcript, ref( $self );
2724  $new_transcript->start( $low_start );
2725  $new_transcript->end( $hi_end );
2726  $new_transcript->slice( $new_exons[0]->slice() );
2727  $new_transcript->strand( $new_exons[0]->strand() );
2728  }
2729 
2730  $new_transcript->{'_trans_exon_array'} = \@new_exons;
2731 
2732  # should be ok to do inside exon array loop
2733  # translations only exist together with the exons ...
2734 
2735  if( defined $self->{'translation'} ) {
2736  my $new_translation;
2737  %$new_translation = %{$self->{'translation'}};;
2738  bless $new_translation, ref( $self->{'translation'} );
2739  $new_transcript->{'translation'} = $new_translation;
2740  $new_translation->start_Exon( $start_exon );
2741  $new_translation->end_Exon( $end_exon );
2742  }
2743  }
2744 
2745  if( exists $self->{'_supporting_evidence'} ) {
2746  my @new_features;
2747  for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
2748  my $new_feature = $old_feature->transform( @_ );
2749  if (defined $new_feature) {
2750  push @new_features, $new_feature;
2751  }
2752  }
2753  $new_transcript->{'_supporting_evidence'} = \@new_features;
2754  }
2755 
2756  if(exists $self->{_ise_array}) {
2757  my @new_features;
2758  foreach my $old_feature ( @{$self->{_ise_array}} ) {
2759  my $new_feature = $old_feature->transform(@_);
2760  push( @new_features, $new_feature );
2761  }
2762  $new_transcript->{_ise_array} = \@new_features;
2763  }
2764 
2765  if(exists $self->{attributes}) {
2766  $new_transcript->{attributes} = [@{$self->{attributes}}];
2767  }
2768 
2769  # flush cached internal values that depend on the exon coords
2770  $new_transcript->{'transcript_mapper'} = undef;
2771  $new_transcript->{'coding_region_start'} = undef;
2772  $new_transcript->{'coding_region_end'} = undef;
2773  $new_transcript->{'cdna_coding_start'} = undef;
2774  $new_transcript->{'cdna_coding_end'} = undef;
2775 
2776  return $new_transcript;
2777 }
2778 
2779 
2780 =head2 transfer
2781 
2782  Arg 1 : Bio::EnsEMBL::Slice $destination_slice
2783  Example : $transcript = $transcript->transfer($slice);
2784  Description: Moves this transcript to the given slice.
2785  If this Transcripts has Exons attached, they move as well.
2786  Returntype : Bio::EnsEMBL::Transcript
2787  Exceptions : none
2788  Caller : general
2789  Status : Stable
2790 
2791 =cut
2792 
2793 
2794 sub transfer {
2795  my $self = shift;
2796 
2797  my $new_transcript = $self->SUPER::transfer( @_ );
2798  return undef unless $new_transcript;
2799 
2800  if( defined $self->{'translation'} ) {
2801  my $new_translation;
2802  %$new_translation = %{$self->{'translation'}};;
2803  bless $new_translation, ref( $self->{'translation'} );
2804  $new_transcript->{'translation'} = $new_translation;
2805  }
2806 
2807  if( exists $self->{'_trans_exon_array'} ) {
2808  my @new_exons;
2809  for my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
2810  my $new_exon = $old_exon->transfer( @_ );
2811  if( defined $new_transcript->{'translation'} ) {
2812  if( $new_transcript->translation()->start_Exon() == $old_exon ) {
2813  $new_transcript->translation()->start_Exon( $new_exon );
2814  }
2815  if( $new_transcript->translation()->end_Exon() == $old_exon ) {
2816  $new_transcript->translation()->end_Exon( $new_exon );
2817  }
2818  }
2819  push( @new_exons, $new_exon );
2820  }
2821 
2822  $new_transcript->{'_trans_exon_array'} = \@new_exons;
2823  }
2824 
2825  if( exists $self->{'_supporting_evidence'} ) {
2826  my @new_features;
2827  for my $old_feature ( @{$self->{'_supporting_evidence'}} ) {
2828  my $new_feature = $old_feature->transfer( @_ );
2829  push( @new_features, $new_feature );
2830  }
2831  $new_transcript->{'_supporting_evidence'} = \@new_features;
2832  }
2833 
2834  if(exists $self->{_ise_array}) {
2835  my @new_features;
2836  foreach my $old_feature ( @{$self->{_ise_array}} ) {
2837  my $new_feature = $old_feature->transfer(@_);
2838  push( @new_features, $new_feature );
2839  }
2840  $new_transcript->{_ise_array} = \@new_features;
2841  }
2842 
2843  if(exists $self->{attributes}) {
2844  $new_transcript->{attributes} = [@{$self->{attributes}}];
2845  }
2846 
2847  # flush cached internal values that depend on the exon coords
2848  $new_transcript->{'transcript_mapper'} = undef;
2849  $new_transcript->{'coding_region_start'} = undef;
2850  $new_transcript->{'coding_region_end'} = undef;
2851  $new_transcript->{'cdna_coding_start'} = undef;
2852  $new_transcript->{'cdna_coding_end'} = undef;
2853 
2854  return $new_transcript;
2855 }
2856 
2857 
2858 =head2 recalculate_coordinates
2859 
2860  Args : none
2861  Example : none
2862  Description: called when exon coordinate change happened to recalculate the
2863  coords of the transcript. This method should be called if one
2864  of the exons has been changed.
2865  Returntype : none
2866  Exceptions : none
2867  Caller : internal
2868  Status : Stable
2869 
2870 =cut
2871 
2872 sub recalculate_coordinates {
2873  my ($self) = @_;
2874 
2875  my $exons = $self->get_all_Exons();
2876 
2877  if ( !$exons || !@{$exons} ) { return }
2878 
2879  my ( $slice, $start, $end, $strand );
2880 
2881  my $e_index;
2882  for ( $e_index = 0; $e_index < @{$exons}; $e_index++ ) {
2883  my $e = $exons->[$e_index];
2884 
2885  # Skip missing or unmapped exons!
2886  if ( defined($e) && defined( $e->start() ) ) {
2887  $slice = $e->slice();
2888  $strand = $e->strand();
2889  $start = $e->start();
2890  $end = $e->end();
2891 
2892  last;
2893  }
2894  }
2895 
2896  my $transsplicing = 0;
2897 
2898  # Start loop after first exon with coordinates
2899  for ( ; $e_index < @{$exons}; $e_index++ ) {
2900  my $e = $exons->[$e_index];
2901 
2902  # Skip missing or unmapped exons!
2903  if ( !defined($e) || !defined( $e->start() ) ) { next }
2904 
2905  if ( $e->start() < $start ) {
2906  $start = $e->start();
2907  }
2908 
2909  if ( $e->end() > $end ) {
2910  $end = $e->end();
2911  }
2912 
2913  if ( defined($slice)
2914  && $e->slice()
2915  && $e->slice()->name() ne $slice->name() )
2916  {
2917  throw( "Exons with different slices "
2918  . "are not allowed on one Transcript" );
2919  }
2920 
2921  if ( $e->strand() != $strand ) {
2922  $transsplicing = 1;
2923  }
2924  } ## end for ( ; $e_index < @{$exons...})
2925  if ($transsplicing) {
2926  warning("Transcript contained trans splicing event");
2927  }
2928 
2929  $self->start($start);
2930  $self->end($end);
2931  $self->strand($strand);
2932  $self->slice($slice);
2933 
2934  # flush cached internal values that depend on the exon coords
2935  $self->{'transcript_mapper'} = undef;
2936  $self->{'coding_region_start'} = undef;
2937  $self->{'coding_region_end'} = undef;
2938  $self->{'cdna_coding_start'} = undef;
2939  $self->{'cdna_coding_end'} = undef;
2940 } ## end sub recalculate_coordinates
2941 
2942 
2943 =head2 display_id
2944 
2945  Arg [1] : none
2946  Example : print $transcript->display_id();
2947  Description: This method returns a string that is considered to be
2948  the 'display' identifier. For transcripts this is (depending on
2949  availability and in this order) the stable Id, the dbID or an
2950  empty string.
2951  Returntype : string
2952  Exceptions : none
2953  Caller : web drawing code
2954  Status : Stable
2955 
2956 =cut
2957 
2958 sub display_id {
2959  my $self = shift;
2960  return $self->{'stable_id'} || $self->dbID || '';
2961 }
2962 
2963 
2964 =head2 get_all_DASFactories
2965 
2966  Arg [1] : none
2967  Function : Retrieves a listref of registered DAS objects
2968  Returntype: [ DAS_objects ]
2969  Exceptions:
2970  Caller :
2971  Example : $dasref = $prot->get_all_DASFactories
2972  Status : Stable
2973 
2974 =cut
2975 
2976 sub get_all_DASFactories {
2977  my $self = shift;
2978  return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
2979 }
2980 
2981 
2982 =head2 get_all_DAS_Features
2983 
2984  Arg [1] : none
2985  Example : $features = $prot->get_all_DAS_Features;
2986  Description: Retrieves a hash reference to a hash of DAS feature
2987  sets, keyed by the DNS, NOTE the values of this hash
2988  are an anonymous array containing:
2989  (1) a pointer to an array of features;
2990  (2) a pointer to the DAS stylesheet
2991  Returntype : hashref of Bio::SeqFeatures
2992  Exceptions : ?
2993  Caller : webcode
2994  Status : Stable
2995 
2996 
2997 =cut
2998 
2999 sub get_all_DAS_Features {
3000  my ($self,@args) = @_;
3001 
3002  my $db = $self->adaptor->db;
3003  my $GeneAdaptor = $db->get_GeneAdaptor;
3004  my $Gene = $GeneAdaptor->fetch_by_transcript_stable_id($self->stable_id);
3005  my $slice = $Gene->feature_Slice;
3006  return $self->SUPER::get_all_DAS_Features($slice);
3007 }
3008 
3009 
3010 =head2 get_all_RNAProducts
3011 
3012  Arg [1] : optional string $type_code type of rnaproducts to retrieve
3013  Example : @transc_mirnas = @{$transcript->get_all_RNAProducts('miRNA')};
3014  @transc_rnaproducts = @{$transcript->get_all_RNAProducts()};
3015  Description: Gets a list of RNAProducts of this transcript.
3016  Optionally just get RNAProducts for given type code.
3017  Returntype : listref Bio::EnsEMBL::RNAProduct
3018  Exceptions : none
3019  Caller : general
3020  Status : In Development
3021 
3022 =cut
3023 
3024 sub get_all_RNAProducts {
3025  my ($self, $type_code) = @_;
3026 
3027  if (!exists $self->{'rnaproducts'}) {
3028  if (!$self->adaptor()) {
3029  return [];
3030  }
3031 
3032  my $rnaproduct_adaptor = $self->adaptor->db->get_RNAProductAdaptor();
3033  $self->{'rnaproducts'} = $rnaproduct_adaptor->fetch_all_by_Transcript($self);
3034  }
3035 
3036  if (defined $type_code) {
3037  my @results = grep { $_->type_code() eq $type_code } @{$self->{'rnaproducts'}};
3038  return \@results;
3039  } else {
3040  return $self->{'rnaproducts'};
3041  }
3042 }
3043 
3044 
3045 
3046 =head2 _compare_xrefs
3047 
3048  Description: compare xrefs based on priority (descending), then
3049  name (ascending), then display_label (ascending)
3050 
3051 =cut
3052 
3053 sub _compare_xrefs {
3054  # compare on priority first (descending)
3055  if ( $a->priority() != $b->priority() ) {
3056  return $b->priority() <=> $a->priority();
3057  } else {
3058  # equal priorities, compare on external_db name
3059  if ( $a->dbname() ne $b->dbname() ) {
3060  return $a->dbname() cmp $b->dbname();
3061  } else {
3062  # equal priorities and names, compare on display_label
3063  return $a->display_id() cmp $b->display_id();
3064  }
3065  }
3066 }
3067 
3068 
3069 =head2 load
3070 
3071  Arg [1] : Boolean $load_xrefs
3072  Load (or don't load) xrefs. Default is to load xrefs.
3073  Example : $transcript->load();
3074  Description : The Ensembl API makes extensive use of
3075  lazy-loading. Under some circumstances (e.g.,
3076  when copying genes between databases), all data of
3077  an object needs to be fully loaded. This method
3078  loads the parts of the object that are usually
3079  lazy-loaded. It will also call the equivalent
3080  method on any translation and on all exons of the
3081  transcript.
3082  Returntype : None
3083 
3084 =cut
3085 
3086 sub load {
3087  my ( $self, $load_xrefs ) = @_;
3088 
3089  if ( !defined($load_xrefs) ) { $load_xrefs = 1 }
3090 
3091  my $translation = $self->translation();
3092  if ( defined($translation) ) {
3093  $translation->load($load_xrefs);
3094 
3095  my $alt_translations = $self->get_all_alternative_translations();
3096 
3097  if ( defined($alt_translations) ) {
3098  foreach my $alt_translation ( @{$alt_translations} ) {
3099  $alt_translation->load($load_xrefs);
3100  }
3101  }
3102  }
3103 
3104  foreach my $exon ( @{ $self->get_all_Exons() } ) {
3105  $exon->load();
3106  }
3107 
3108  $self->stable_id();
3109  $self->analysis();
3110  $self->get_all_Attributes();
3111  $self->get_all_supporting_features();
3112  $self->get_all_IntronSupportingEvidence();
3113 
3114  if ($load_xrefs) {
3115  $self->get_all_DBEntries();
3116  }
3117 
3118 } ## end sub load
3119 
3120 =head2 summary_as_hash
3121 
3122  Example : $transcript_summary = $transcript->summary_as_hash();
3123  Description : Extends Feature::summary_as_hash
3124  Retrieves a summary of this Transcript.
3125  Returns : hashref of descriptive strings
3126  Status : Intended for internal use
3127 =cut
3128 
3129 sub summary_as_hash {
3130  my $self = shift;
3131  my $summary_ref = $self->SUPER::summary_as_hash;
3132  $summary_ref->{'description'} = $self->description;
3133  $summary_ref->{'Name'} = $self->external_name if $self->external_name;
3134  $summary_ref->{'biotype'} = $self->get_Biotype->name;
3135  $summary_ref->{'logic_name'} = $self->analysis->logic_name() if defined $self->analysis();
3136  my $parent_gene = $self->get_Gene();
3137  $summary_ref->{'Parent'} = $parent_gene->stable_id;
3138  $summary_ref->{'source'} = $self->source();
3139  $summary_ref->{'transcript_id'} = $summary_ref->{'id'};
3140 
3141  ## Specific attributes for merged species
3142  ## No data will be stored if these fields are not available
3143  my $havana_transcript = $self->havana_transcript();
3144  $summary_ref->{'havana_transcript'} = $havana_transcript->display_id() if defined $havana_transcript;
3145  $summary_ref->{'havana_version'} = $havana_transcript->version() if defined $havana_transcript;
3146  $summary_ref->{'ccdsid'} = $self->ccds->display_id() if $self->ccds();
3147  $summary_ref->{'transcript_support_level'} = $self->tsl() if $self->tsl();
3148 
3149  my @tags;
3150  push @tags, $self->get_all_Attributes('gencode_basic')->[0]->code if $self->gencode_basic();
3151  push @tags, 'Ensembl_canonical' if $self->is_canonical();
3152  push @tags, $self->get_all_Attributes('gencode_primary')->[0]->code if $self->gencode_primary();
3153  push @tags, $self->get_all_Attributes('ens_canon_extended')->[0]->code if $self->ens_canon_extended();
3154 
3155  my $mane = $self->mane_transcript();
3156  if ($mane) {
3157  push @tags, $mane->type() if ($mane->type());
3158  }
3159 
3160  $summary_ref->{'tag'} = \@tags if @tags;
3161 
3162  ## Stable identifier of the parent transcript this transcript was projected from
3163  my $proj_parent_attributes = $self->get_all_Attributes("proj_parent_t");
3164  if (@{$proj_parent_attributes}) {
3165  $summary_ref->{'projection_parent_transcript'} = $proj_parent_attributes->[0]->value;
3166  }
3167  return $summary_ref;
3168 }
3169 
3170 =head2 gencode_basic
3171 
3172  Example : $gencode_basic = $transcript->gencode_basic();
3173  Description : Returns true if gencode_basic is set
3174  Returns : boolean
3175 =cut
3176 
3177 sub gencode_basic {
3178  my $self = shift;
3179  my @attributes = @{ $self->get_all_Attributes('gencode_basic') };
3180  my $basic = 0;
3181  $basic = 1 if scalar(@attributes) > 0;
3182  return $basic;
3183 }
3184 
3185 =head2 gencode_primary
3186 
3187  Example : $gencode_primary = $transcript->gencode_primary();
3188  Description : Returns true if gencode_primary is set
3189  Returns : boolean
3190 =cut
3191 
3192 sub gencode_primary {
3193  my $self = shift;
3194  my @attributes = @{ $self->get_all_Attributes('gencode_primary') };
3195  my $primary = 0;
3196  $primary = 1 if scalar(@attributes) > 0;
3197  return $primary;
3198 }
3199 
3200 =head2 ens_canon_extended
3201 
3202  Example : $ens_canon_extended = $transcript->ens_canon_extended();
3203  Description : Returns true if ens_canon_extended is set
3204  Returns : boolean
3205 =cut
3206 
3207 sub ens_canon_extended {
3208  my $self = shift;
3209  my @attributes = @{ $self->get_all_Attributes('ens_canon_extended') };
3210  my $canon_extended = 0;
3211  $canon_extended = 1 if scalar(@attributes) > 0;
3212  return $canon_extended;
3213 }
3214 
3215 =head2 tsl
3216 
3217  Example : $tsl = $transcript->tsl();
3218  Description : Returns the corresponding transcript support level
3219  Returns : string
3220 =cut
3221 
3222 sub tsl {
3223  my $self = shift;
3224  my @attributes = @{ $self->get_all_Attributes('TSL') };
3225  my $tsl;
3226  $tsl = $attributes[0]->value if scalar(@attributes) > 0;
3227  if (defined $tsl) { $tsl =~ s/tsl//; }
3228  return $tsl;
3229 }
3230 
3231 =head2 appris
3232 
3233  Example : $appris = $transcript->appris();
3234  Description : Returns the corresponding APPRIS tag
3235  Returns : string
3236 
3237 =cut
3238 
3239 sub appris {
3240  my $self = shift;
3241  my @attributes = @{ $self->get_all_Attributes('appris') };
3242  my $appris;
3243  $appris = $attributes[0]->value if scalar(@attributes) > 0;
3244 
3245  return $appris;
3246 }
3247 
3248 =head2 havana_transcript
3249 
3250  Example : $havana_transcript = $transcript->havana_transcript();
3251  Description : Locates the corresponding havana transcript
3252  Returns : Bio::EnsEMBL::DBEntry
3253 =cut
3254 
3255 sub havana_transcript {
3256  my $self = shift;
3257  my @otts = @{ $self->get_all_DBEntries('Vega_transcript') };
3258  my $ott;
3259  foreach my $xref (@otts) {
3260  if ($xref->display_id() =~ /OTT/) {
3261  $ott = $xref;
3262  last;
3263  }
3264  }
3265  return $ott;
3266 }
3267 
3268 =head2 ccds
3269 
3270  Example : $ccds = $transcript->ccds();
3271  Description : Locates the corresponding ccds xref
3272  Returns : Bio::EnsEMBL::DBEntry
3273 =cut
3274 
3275 sub ccds {
3276  my $self = shift;
3277  my @ccds = @{ $self->get_all_DBEntries('CCDS') };
3278  my $ccds;
3279  $ccds = $ccds[0] if scalar(@ccds) > 0;
3280  return $ccds;
3281 }
3282 
3283 
3284 
3285 =head2 get_Gene
3286 
3287  Example : $gene = $transcript->get_Gene;
3288  Description : Locates the parent Gene using a transcript dbID
3289  Returns : Bio::EnsEMBL::Gene
3290 
3291 =cut
3292 
3293 sub get_Gene {
3294  my $self = shift;
3295  my $gene_adaptor = $self->adaptor->db->get_GeneAdaptor();
3296  my $parent_gene = $gene_adaptor->fetch_by_transcript_id($self->dbID);
3297  return $parent_gene;
3298 }
3299 
3300 =head2 get_Biotype
3301 
3302  Example : my $biotype = $transcript->get_Biotype;
3303  Description: Returns the Biotype object of this transcript.
3304  When no biotype exists, defaults to 'protein_coding'.
3305  When used to set to a biotype that does not exist in
3306  the biotype table, a biotype object is created with
3307  the provided argument as name and object_type transcript.
3308  Returntype : Bio::EnsEMBL::Biotype
3309  Exceptions : none
3310 
3311 =cut
3312 
3313 sub get_Biotype {
3314  my ( $self ) = @_;
3315 
3316  # have a biotype object, return it
3317  if ( ref $self->{'biotype'} eq 'Bio::EnsEMBL::Biotype' ) {
3318  return $self->{'biotype'};
3319  }
3320 
3321  # biotype is first set as a string retrieved from the transcript table
3322  # there is no biotype object in the transcript object, retrieve it using the biotype string
3323  # if no string, default to protein_coding. this is legacy behaviour and should probably be revisited
3324  my $biotype_name = $self->{'biotype'} // 'protein_coding';
3325 
3326  return $self->set_Biotype( $biotype_name );
3327 }
3328 
3329 =head2 set_Biotype
3330 
3331  Arg [1] : Arg [1] : String - the biotype name to set
3332  Example : my $biotype = $transcript->set_Biotype('protin_coding');
3333  Description: Sets the Biotype of this transcript to the provided biotype name.
3334  Returns the Biotype object of this transcript.
3335  When no biotype exists, defaults to 'protein_coding' name.
3336  When setting a biotype that does not exist in
3337  the biotype table, a biotype object is created with
3338  the provided argument as name and object_type transcript.
3339  Returntype : Bio::EnsEMBL::Biotype
3340  Exceptions : If no argument provided
3341 
3342 =cut
3343 
3344 sub set_Biotype {
3345  my ( $self, $name ) = @_;
3346 
3347  throw('No argument provided') unless defined $name;
3348 
3349  # retrieve biotype object from the biotype adaptor
3350  if( defined $self->adaptor() ) {
3351  my $ba = $self->adaptor()->db()->get_BiotypeAdaptor();
3352  $self->{'biotype'} = $ba->fetch_by_name_object_type( $name, 'transcript' );
3353  }
3354  # if $self->adaptor is unavailable, create a new biotype object containing name and object_type only
3355  else {
3356  $self->{'biotype'} = Bio::EnsEMBL::Biotype->new(
3357  -NAME => $name,
3358  -OBJECT_TYPE => 'transcript',
3359  )
3360  }
3361 
3362  return $self->{'biotype'} ;
3363 }
3364 
3365 =head2 biotype
3366  Arg [1] : (optional) String - the biotype to set
3367  Example : $transcript->biotype("protein_coding");
3368  Description: Getter/setter for the attribute biotype name.
3369  Recommended to use instead for a getter:
3370  $biotype = $transcript->get_Biotype;
3371  and for a setter:
3372  $biotype = $transcript->set_Biotype("protein_coding");
3373  The String biotype name can then be retrieved by
3374  calling name on the Biotype object:
3375  $biotype_name = $biotype->name;
3376  Returntype : String
3377  Exceptions : none
3378  Caller : general
3379  Status : Stable
3380 =cut
3381 
3382 sub biotype {
3383  my ( $self, $biotype_name) = @_;
3384 
3385  # Setter? set_Biotype()
3386  if (defined $biotype_name) {
3387  return $self->set_Biotype($biotype_name)->name;
3388  }
3389 
3390  # Getter? get_Biotype()
3391  return $self->get_Biotype->name;
3392 }
3393 
3394 =head2 mane_transcript
3395  Example : $mane = $transcript->mane_transcript();
3396  Description: Retrieve the corresponding MANE transcript
3397  Returntype : Bio::EnsEMBL::MANE
3398  Exceptions : none
3399  Caller : general
3400  Status : Stable
3401 =cut
3402 
3403 sub mane_transcript {
3404  my ($self) = @_;
3405  if ($self->is_mane) {
3406 
3407  my $mane = Bio::EnsEMBL::MANE->new(
3408  -SEQ_REGION_START => $self->seq_region_start,
3409  -SEQ_REGION_END => $self->seq_region_end,
3410  -START => $self->start,
3411  -END => $self->end,
3412  -STABLE_ID => $self->stable_id,
3413  -SLICE => $self->slice,
3414  -TRANSCRIPT => $self
3415  );
3416  return $mane;
3417  }
3418  return undef;
3419 }
3420 
3421 =head2 is_mane
3422  Example : $boolean = $transcript->is_mane();
3423  Description: Check if a transcript is part of MANE
3424  Returntype : boolean
3425  Exceptions : none
3426  Caller : general
3427  Status : Stable
3428 =cut
3429 
3430 sub is_mane {
3431  my ($self) = @_;
3432  my $is_mane = 0;
3433  foreach my $attribute (@{ $self->get_all_Attributes() } ) {
3434  if ($attribute->code=~ /MANE/) {
3435  $is_mane = 1;
3436  }
3437  }
3438  return $is_mane;
3439 }
3440 
3441 1;
Bio::EnsEMBL::RNAProduct
Definition: RNAProduct.pm:33
transcript
public transcript()
Bio::EnsEMBL::Translation
Definition: Translation.pm:32
Bio::EnsEMBL::Biotype
Definition: Biotype.pm:35
EnsEMBL
Definition: Filter.pm:1
Bio::EnsEMBL::UTR::new
public Bio::EnsEMBL::UTR new()
Bio::EnsEMBL::SeqEdit::new
public Bio::EnsEMBL::SeqEdit new()
Bio::EnsEMBL::Feature
Definition: Feature.pm:47
Bio::EnsEMBL::Translation::start_Exon
public Bio::EnsEMBL::Exon start_Exon()
Bio::EnsEMBL::Translation::stable_id
public String stable_id()
Bio::EnsEMBL::CDS::new
public Bio::EnsEMBL::CDS new()
Bio::EnsEMBL::UTR::translation
public Bio::EnsEMBL::Translation translation()
Bio::EnsEMBL::Feature::seq_region_end
public Int seq_region_end()
Bio::EnsEMBL::SeqEdit
Definition: SeqEdit.pm:55
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
exon
public exon()
Bio::EnsEMBL::Exon
Definition: Exon.pm:42
Bio::EnsEMBL::DBEntry::new
public Bio::EnsEMBL::DBEntry new()
Bio::EnsEMBL::FeaturePair
Definition: FeaturePair.pm:56
Bio::EnsEMBL::TranscriptMapper
Definition: TranscriptMapper.pm:34
Bio::EnsEMBL::Intron
Definition: Intron.pm:10
Bio::EnsEMBL::CDS
Definition: CDS.pm:28
Bio::EnsEMBL::Transcript
Definition: Transcript.pm:44
about
public about()
Bio::EnsEMBL::ExonTranscript::new
public Bio::EnsEMBL::ExonTranscript new()
Bio::EnsEMBL::Utils::Scalar
Definition: Scalar.pm:66
Bio::EnsEMBL::DBSQL::BaseAdaptor::db
public Bio::EnsEMBL::DBSQL::DBAdaptor db()
Bio::EnsEMBL::Exon::start
public Int start()
Bio::EnsEMBL::Feature::project
public Listref project()
Bio::EnsEMBL::Attribute
Definition: Attribute.pm:34
Bio::EnsEMBL::Transcript::transfer
public Bio::EnsEMBL::Transcript transfer()
Bio::EnsEMBL::ExonTranscript
Definition: ExonTranscript.pm:20
Bio::EnsEMBL::MANE
Definition: MANE.pm:23
Bio::EnsEMBL::Feature::new
public Bio::EnsEMBL::Feature new()
Bio::EnsEMBL::DBEntry
Definition: DBEntry.pm:12
Bio::EnsEMBL::CDS::translation
public Bio::EnsEMBL::Translation translation()
Bio::EnsEMBL::Mapper::Gap
Definition: Gap.pm:14
Bio::EnsEMBL::UTR
Definition: UTR.pm:28
compare
public compare()
Bio::EnsEMBL::Transcript::translation
public Bio::EnsEMBL::Translation translation()
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68
Bio::EnsEMBL::Feature::equals
public Boolean Or Undef equals()
Bio::EnsEMBL::Storable::adaptor
public Bio::EnsEMBL::DBSQL::BaseAdaptor adaptor()