ensembl-hive  2.8.1
PredictionTranscript.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 PredictionTranscript
34 
35 =head1 SYNOPSIS
36 
37 =head1 DESCRIPTION
38 
39 Container for single transcript ab initio gene prediction such as
40 GenScan or SNAP. Is directly storable/retrievable in Ensembl using
41 PredictionTranscriptAdaptor.
42 
43 Creation:
44 
45  my $tran = new Bio::EnsEMBL::PredictionTranscript();
46  $tran->add_Exon($pred_exon);
47 
48  my $tran =
49  new Bio::EnsEMBL::PredictionTranscript( -EXONS => @pred_exons );
50 
51 Manipulation:
52 
53  # Returns an array of PredictionExon objects
54  my @pred_exons = @{ $tran->get_all_Exons };
55 
56  # Returns the peptide translation as string
57  my $pep = $tran->translate()->seq();
58 
59  # Get the exon cdna sequence.
60  my $cdna = $trans->spliced_seq();
61 
62 =head1 METHODS
63 
64 =cut
65 
66 package Bio::EnsEMBL::PredictionTranscript;
67 
68 use vars qw(@ISA);
69 use strict;
70 
71 use Bio::Seq;
75 
76 use Bio::EnsEMBL::Utils::Exception qw( throw warning );
77 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
78 
79 @ISA = qw(Bio::EnsEMBL::Transcript);
80 
81 
82 =head2 new
83 
84  Arg [-DISPLAY_LABEL]
85  string - a displayable identifier for this prediction
86  Arg [...] : See Bio::EnsEMBL::Transcript superclass constructor
88  ( '-start' => $seq_region_start,
89  '-end' => $seq_region_end,
90  '-strand' => $seq_region_strand,
91  '-adaptor' => $self,
92  '-slice' => $slice,
93  '-analysis' => $analysis,
94  '-dbID' => $prediction_transcript_id,
95  '-display_label' => $display_label);
96  Description: Constructor. Creates a new Bio::EnsEMBL::PredictionTranscript
97  object
99  Exceptions : none
100  Caller : general
101  Status : Stable
102 
103 =cut
104 
105 sub new {
106  my $class = shift;
107 
108  my $self = $class->SUPER::new(@_);
109 
110  my ($display_label) = rearrange(['DISPLAY_LABEL'], @_);
111 
112  $self->{'display_label'} = $display_label;
113 
114  return $self;
115 }
116 
117 
118 =head2 coding_region_start
119 
120  Arg [1] : none
121  Example : $coding_region_start = $pt->coding_region_start
122  Description: Retrieves the start of the coding region of this transcript in
123  slice coordinates. For prediction transcripts this
124  is always the start of the transcript (i.e. there is no UTR).
125  By convention, the coding_region_start is always lower than
126  the value returned by the coding_end method.
127  The value returned by this function is NOT the biological
128  coding start since on the reverse strand the biological coding
129  start would be the higher genomic value.
130  Returntype : int
131  Exceptions : none
132  Caller : general
133  Status : Stable
134 
135 =cut
136 
137 sub coding_region_start {
138  my $self = shift;
139  return $self->start();
140 }
141 
142 
143 =head2 coding_region_end
144 
145  Arg [1] : none
146  Example : $coding_region_end = $transcript->coding_region_end
147  Description: Retrieves the start of the coding region of this prediction
148  transcript. For prediction transcripts this is always the same
149  as the end since no UTRs are stored.
150  By convention, the coding_region_end is always higher than the
151  value returned by the coding_region_start method.
152  The value returned by this function is NOT the biological
153  coding start since on the reverse strand the biological coding
154  end would be the lower genomic value.
155  Returntype : int
156  Exceptions : none
157  Caller : general
158  Status : Stable
159 
160 =cut
161 
162 sub coding_region_end {
163  my $self = shift;
164  return $self->end();
165 }
166 
167 
168 
169 =head2 get_all_translateable_Exons
170 
171  Arg [1] : none
172  Example : $exons = $self->get_all_translateable_Exons
173  Description: Retrieves the translateable portion of all exons in this
174  transcript. For prediction transcripts this means all exons
175  since no UTRs are stored for them.
176  Returntype : listref of Bio::EnsEMBL::PredictionExons
177  Exceptions : none
178  Caller : general
179  Status : Stable
180 
181 =cut
182 
183 sub get_all_translateable_Exons {
184  my $self = shift;
185  return $self->get_all_Exons();
186 }
187 
188 
189 =head2 display_label
190 
191  Arg [1] : string $newval (optional)
192  The new value to set the display_label attribute to
193  Example : $display_label = $pt->display_label()
194  Description: Getter/Setter for a displayable identifier for this
195  prediction transcript.
196  Returntype : string
197  Exceptions : none
198  Caller : general
199  Status : Stable
200 
201 =cut
202 
203 sub display_label{
204  my $self = shift;
205  return $self->{'display_label'} = shift if(@_);
206  return $self->{'display_label'};
207 }
208 
209 
210 =head2 summary_as_hash
211 
212  Example : my $hash = $misc_feature->summary_as_hash();
213  Description: Retrieves a textual summary of this prediction.
214  Not inherited from Features.
215  Returntype : Hashref of arrays of descriptive strings
216  Exceptions : none
217  Caller : general
218  Status : Stable
219 
220 =cut
221 
222 sub summary_as_hash {
223  my $self = shift;
224  my %summary;
225  $summary{'id'} = $self->dbID;
226  $summary{'Name'} = $self->display_id;
227  $summary{'version'} = $self->version() if $self->version();
228  $summary{'start'} = $self->seq_region_start;
229  $summary{'end'} = $self->seq_region_end;
230  $summary{'strand'} = $self->strand;
231  $summary{'seq_region_name'} = $self->seq_region_name;
232  $summary{'source'} = $self->analysis->gff_source() || 'ensembl';
233  return \%summary;
234 }
235 
236 
237 
238 =head2 stable_id
239 
240  Arg [1] : none
241  Example : print $pt->stable_id();
242  Description: Gets a 'stable' identifier for this prediction transcript. Note
243  that prediction transcripts do not have true *stable*
244  identifiers (i.e. identifiers maintained between releases).
245  This method chains to the display_label method and is intended
246  to provide polymorphism with the Transcript class.
247  Returntype : string
248  Exceptions : none
249  Caller : general
250  Status : Stable
251 
252 =cut
253 
254 sub stable_id { return display_label(@_); }
255 
256 sub get_all_DBEntries { return []; }
257 
258 sub get_all_DBLinks { return []; }
259 
260 sub add_DBEntry {}
261 
262 sub external_db { return undef; }
263 
264 sub external_status { return undef; }
265 
266 sub external_name { return undef; }
267 
268 sub is_known { return 0;}
269 
270 
271 =head2 translation
272 
273  Arg [1] : none
274  Example : $translation = $pt->translation();
275  Description: Retrieves a Bio::EnsEMBL::Translation object for this prediction
276  transcript. Note that this translation is generated on the fly
277  and is not stored in the database. The translation always
278  spans the entire transcript (no UTRs; all CDS) and does not
279  have an associated dbID, stable_id or adaptor.
280  Returntype : int
281  Exceptions : none
282  Caller : general
283  Status : Stable
284 
285 =cut
286 
287 sub translation {
288  my $self = shift;
289 
290  #calculate translation on the fly
291  my $strand = $self->strand();
292 
293  my $start_exon;
294  my $end_exon;
295 
296  my @exons = @{$self->get_all_Exons()};
297 
298  return undef if(!@exons);
299 
300  $start_exon = $exons[0];
301  $end_exon = $exons[-1];
302 
303  my $pta;
304 
305  if($self->adaptor()) {
306  $pta = $self->adaptor()->db()->get_TranslationAdaptor();
307  } else {
308  #warning("PredictionTranscript has no adaptor, may not be able to obtain " .
309  # "translation");
310  }
311 
312  my $Xseq = $self->spliced_seq();
313  my $start_phase = $start_exon->phase;
314  if( $start_phase > 0 ) {
315  $Xseq = "N"x$start_phase . $Xseq;
316  }
317 
318  my $tmpSeq = new Bio::Seq( -id => $self->display_id,
319  -seq => $Xseq,
320  -moltype => 'dna',
321  -alphabet => 'dna' );
322 
324  (-ADAPTOR => $pta,
325  -START_EXON => $start_exon,
326  -END_EXON => $end_exon,
327  -SEQ_START => 1,
328  -SEQ_END => $end_exon->length(),
329  -SEQ => $tmpSeq->translate()->seq());
330 }
331 
332 
333 
334 =head2 translate
335 
336  Arg [1] : Boolean, emulate the behavior of old bioperl versions where
337  an incomplete final codon of 2 characters is padded and guessed
338  Function : Give a peptide translation of all exons currently in
339  the PT. Gives empty string when none is in.
340  Returntype: a Bio::Seq as in transcript->translate()
341  Exceptions: none
342  Caller : general
343  Status : Stable
344 
345 =cut
346 
347 
348 sub translate {
349  my ($self, $complete_codon) = @_;
350 
351  my $dna = $self->translateable_seq();
352 
353  my $codon_table_id;
354  if ( defined( $self->slice() ) ) {
355  my $attrib;
356 
357  ($attrib) = @{ $self->slice()->get_all_Attributes('codon_table') };
358  if ( defined($attrib) ) {
359  $codon_table_id = $attrib->value();
360  }
361  }
362  $codon_table_id ||= 1; #default will be vertebrates
363 
364  # Remove the final stop codon from the mrna
365  # sequence produced if it is present, this is so any peptide produced
366  # won't have a terminal stop codon
367  # if you want to have a terminal stop codon either comment this line out
368  # or call translatable seq directly and produce a translation from it
369  if( CORE::length( $dna ) % 3 == 0 ) {
370  # $dna =~ s/TAG$|TGA$|TAA$//i;
371  my $codon_table = Bio::Tools::CodonTable->new( -id => $codon_table_id );
372 
373  if ( $codon_table->is_ter_codon( substr( $dna, -3, 3 ) ) ) {
374  substr( $dna, -3, 3, '' );
375  }
376  } elsif ( CORE::length($dna) % 3 == 2 ) {
377  # If we have a partial codon of 2 bp we need to decide if we
378  # trim it or not to fix some bad behaviour in older bioperl
379  # versions
380  if ( $complete_codon ) {
381  # If we want to do the bad behavior of bioperl 1.6.1 and older
382  # where we guess the last codon if inomplete, pad an N
383  # to the mrna sequence
384  $dna .= 'N';
385  } else {
386  # Otherwise trim those last two bp off so the behavior is
387  # consistent across bioperl versions
388  substr( $dna, -2, 2, '' );
389  }
390  }
391 
392  my $bioseq = new Bio::Seq( -id => $self->display_id,
393  -seq => $dna,
394  -moltype => 'dna',
395  -alphabet => 'dna' );
396 
397  my $translation = $bioseq->translate(undef,undef,undef,$codon_table_id);
398 
399  return $translation;
400 }
401 
402 
403 =head2 cdna_coding_start
404 
405  Arg [1] : none
406  Example : $relative_coding_start = $transcript->cdna_coding_start();
407  Description: Retrieves the position of the coding start of this transcript
408  in cdna coordinates (relative to the start of the 5prime end of
409  the transcript, excluding introns, including utrs). This is
410  always 1 for prediction transcripts because they have no UTRs.
411  Returntype : int
412  Exceptions : none
413  Caller : five_prime_utr, get_all_snps, general
414  Status : Stable
415 
416 =cut
417 
418 sub cdna_coding_start { return 1 }
419 
420 
421 
422 =head2 cdna_coding_end
423 
424  Arg [1] : none
425  Example : $relative_coding_start = $transcript->cdna_coding_end();
426  Description: Retrieves the position of the coding end of this transcript
427  in cdna coordinates (relative to the start of the 5prime end of
428  the transcript, excluding introns, including utrs). This is
429  always te length of the cdna for prediction transcripts because
430  they have no UTRs.
431  Returntype : int
432  Exceptions : none
433  Caller : five_prime_utr, get_all_snps, general
434  Status : Stable
435 
436 =cut
437 
438 sub cdna_coding_end {
439  my ($self) = @_;
440  return length( $self->spliced_seq() );
441 }
442 
443 
444 =head2 transform
445 
446  Arg 1 : String $coordinate_system_name
447  Arg [2] : String $coordinate_system_version
448  Example : $ptrans = $ptrans->transform('chromosome', 'NCBI33');
449  $ptrans = $ptrans->transform('clone');
450  Description: Moves this PredictionTranscript to the given coordinate system.
451  If this Transcript has Exons attached, they move as well.
452  A new Transcript is returned or undefined if this PT is not
453  defined in the new coordinate system.
455  Exceptions : wrong parameters
456  Caller : general
457  Status : Stable
458 
459 =cut
460 
461 sub transform {
462  my $self = shift;
463 
464  # catch for old style transform calls
465  if( ref $_[0] && ($_[0]->isa( "Bio::EnsEMBL::Slice" ) or $_[0]->isa( "Bio::EnsEMBL::LRGSlice" ))) {
466  throw("transform needs coordinate systems details now," .
467  "please use transfer");
468  }
469 
470  my $new_transcript = Bio::EnsEMBL::Feature::transform($self, @_ );
471  return undef unless $new_transcript;
472 
473  #go through the _trans_exon_array so as not to prompt lazy-loading
474  if(exists($self->{'_trans_exon_array'})) {
475  my @new_exons;
476  foreach my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
477  my $new_exon = $old_exon->transform(@_);
478  push(@new_exons, $new_exon);
479  }
480  $new_transcript->{'_trans_exon_array'} = \@new_exons;
481  }
482 
483  return $new_transcript;
484 }
485 
486 
487 
488 =head2 transfer
489 
490  Arg 1 : Bio::EnsEMBL::Slice $destination_slice
491  Example : $ptrans = $ptrans->transfer($slice);
492  Description: Moves this PredictionTranscript to the given slice.
493  If this Transcripts has Exons attached, they move as well.
494  If this transcript cannot be moved then undef is returned
495  instead.
497  Exceptions : none
498  Caller : general
499  Status : Stable
500 
501 =cut
502 
503 sub transfer {
504  my $self = shift;
505 
506  my $new_transcript = $self->SUPER::transfer( @_ );
507  return undef unless $new_transcript;
508 
509  if( exists $self->{'_trans_exon_array'} ) {
510  my @new_exons;
511  for my $old_exon ( @{$self->{'_trans_exon_array'}} ) {
512  my $new_exon = $old_exon->transfer( @_ );
513  push( @new_exons, $new_exon );
514  }
515 
516  $new_transcript->{'_trans_exon_array'} = \@new_exons;
517  }
518 
519  return $new_transcript;
520 }
521 
522 =head2 get_all_Exons
523 
524  Arg [1] : none
525  Example : my @exons = @{$transcript->get_all_Exons()};
526  Description: Returns an listref of the exons in this transcipr in order.
527  i.e. the first exon in the listref is the 5prime most exon in
528  the transcript.
529  Returntype : a list reference to Bio::EnsEMBL::Exon objects
530  Exceptions : none
531  Caller : general
532  Status : Stable
533 
534 =cut
535 
536 sub get_all_Exons {
537  my ($self) = @_;
538  if( ! defined $self->{'_trans_exon_array'} && defined $self->adaptor() ) {
539  $self->{'_trans_exon_array'} = $self->adaptor()->db()->
540  get_PredictionExonAdaptor()->fetch_all_by_PredictionTranscript( $self );
541  }
542  return $self->{'_trans_exon_array'};
543 }
544 
545 =head2 display_id
546 
547  Arg [1] : none
548  Example : print $rf->display_id();
549  Description: This method returns a string that is considered to be
550  the 'display' identifier. For prediction transcripts this is
551  (depending on availability and in this order) the stable Id, the
552  dbID or an empty string.
553  Returntype : string
554  Exceptions : none
555  Caller : web drawing code
556  Status : Stable
557 
558 =cut
559 
560 sub display_id {
561  my $self = shift;
562  return $self->stable_id || $self->dbID || '';
563 }
564 
565 =head2 get_all_Attributes
566 
567  Arg [1] : none
568  Example :
569  Description: DOES NOTHING, Returns empty listref. Provided here to prevent
570  Transcript attributes being returned for PredictionTranscripts.
571  Returntype : EMPTY listref Bio::EnsEMBL::Attribute
572  Exceptions : none
573  Caller : general
574  Status : At risk
575 
576 =cut
577 
578 sub get_all_Attributes {
579  my $self = shift;
580 
581  return [];
582 }
583 
584 
585 1;
transcript
public transcript()
Bio::EnsEMBL::Translation
Definition: Translation.pm:32
Bio::EnsEMBL::Feature
Definition: Feature.pm:47
Bio::EnsEMBL::Feature::transform
public Bio::EnsEMBL::Feature transform()
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
exon
public exon()
Bio::EnsEMBL::Exon
Definition: Exon.pm:42
Bio::EnsEMBL::Transcript
Definition: Transcript.pm:44
Bio::EnsEMBL::DBSQL::BaseAdaptor::db
public Bio::EnsEMBL::DBSQL::DBAdaptor db()
Bio::EnsEMBL::Attribute
Definition: Attribute.pm:34
Bio::EnsEMBL::Translation::new
public Bio::EnsEMBL::Translation new()
Bio::EnsEMBL::PredictionTranscript::new
public Bio::EnsEMBL::PredictionTranscript new()
Bio::EnsEMBL::PredictionTranscript::transfer
public Bio::EnsEMBL::PredictionTranscript transfer()
Bio::EnsEMBL::PredictionTranscript
Definition: PredictionTranscript.pm:39
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68
Bio::EnsEMBL::Storable::adaptor
public Bio::EnsEMBL::DBSQL::BaseAdaptor adaptor()