3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
39 Container
for single
transcript ab initio gene prediction such as
40 GenScan or SNAP. Is directly storable/retrievable in Ensembl
using
41 PredictionTranscriptAdaptor.
46 $tran->add_Exon($pred_exon);
53 # Returns an array of PredictionExon objects
54 my @pred_exons = @{ $tran->get_all_Exons };
56 # Returns the peptide translation as string
57 my $pep = $tran->translate()->seq();
59 # Get the exon cdna sequence.
60 my $cdna = $trans->spliced_seq();
66 package Bio::EnsEMBL::PredictionTranscript;
85 string - a displayable identifier
for this prediction
88 (
'-start' => $seq_region_start,
89 '-end' => $seq_region_end,
90 '-strand' => $seq_region_strand,
93 '-analysis' => $analysis,
94 '-dbID' => $prediction_transcript_id,
95 '-display_label' => $display_label);
108 my $self = $class->SUPER::new(@_);
110 my ($display_label) = rearrange([
'DISPLAY_LABEL'], @_);
112 $self->{
'display_label'} = $display_label;
118 =head2 coding_region_start
121 Example : $coding_region_start = $pt->coding_region_start
122 Description: Retrieves the start of the coding region of
this transcript in
123 slice coordinates. For prediction transcripts
this
124 is always the start of the
transcript (i.e. there is no UTR).
125 By convention, the coding_region_start is always lower than
126 the value returned by the coding_end method.
127 The value returned by
this function is NOT the biological
128 coding start since on the reverse strand the biological coding
129 start would be the higher genomic value.
137 sub coding_region_start {
139 return $self->start();
143 =head2 coding_region_end
146 Example : $coding_region_end = $transcript->coding_region_end
147 Description: Retrieves the start of the coding region of
this prediction
148 transcript. For prediction transcripts
this is always the same
149 as the end since no UTRs are stored.
150 By convention, the coding_region_end is always higher than the
151 value returned by the coding_region_start method.
152 The value returned by
this function is NOT the biological
153 coding start since on the reverse strand the biological coding
154 end would be the lower genomic value.
162 sub coding_region_end {
169 =head2 get_all_translateable_Exons
172 Example : $exons = $self->get_all_translateable_Exons
173 Description: Retrieves the translateable portion of all exons in
this
174 transcript. For prediction transcripts
this means all exons
175 since no UTRs are stored
for them.
176 Returntype : listref of Bio::EnsEMBL::PredictionExons
183 sub get_all_translateable_Exons {
185 return $self->get_all_Exons();
191 Arg [1] :
string $newval (optional)
192 The
new value to set the display_label attribute to
193 Example : $display_label = $pt->display_label()
194 Description: Getter/Setter
for a displayable identifier
for this
205 return $self->{
'display_label'} = shift
if(@_);
206 return $self->{
'display_label'};
210 =head2 summary_as_hash
212 Example : my $hash = $misc_feature->summary_as_hash();
213 Description: Retrieves a textual summary of
this prediction.
214 Not inherited from Features.
215 Returntype : Hashref of arrays of descriptive strings
222 sub summary_as_hash {
225 $summary{
'id'} = $self->dbID;
226 $summary{
'Name'} = $self->display_id;
227 $summary{
'version'} = $self->version()
if $self->version();
228 $summary{
'start'} = $self->seq_region_start;
229 $summary{
'end'} = $self->seq_region_end;
230 $summary{
'strand'} = $self->strand;
231 $summary{
'seq_region_name'} = $self->seq_region_name;
232 $summary{
'source'} = $self->analysis->gff_source() ||
'ensembl';
241 Example : print $pt->stable_id();
242 Description: Gets a
'stable' identifier
for this prediction
transcript. Note
243 that prediction transcripts
do not have
true *stable*
244 identifiers (i.e. identifiers maintained between releases).
245 This method chains to the display_label method and is intended
246 to provide polymorphism with the Transcript
class.
254 sub stable_id {
return display_label(@_); }
256 sub get_all_DBEntries {
return []; }
258 sub get_all_DBLinks {
return []; }
262 sub external_db {
return undef; }
264 sub external_status {
return undef; }
266 sub external_name {
return undef; }
268 sub is_known {
return 0;}
274 Example : $translation = $pt->translation();
276 transcript. Note that
this translation is generated on the fly
277 and is not stored in the database. The translation always
278 spans the entire
transcript (no UTRs; all CDS) and does not
279 have an associated dbID, stable_id or adaptor.
290 #calculate translation on the fly
291 my $strand = $self->strand();
296 my @exons = @{$self->get_all_Exons()};
298 return undef
if(!@exons);
300 $start_exon = $exons[0];
301 $end_exon = $exons[-1];
305 if($self->adaptor()) {
306 $pta = $self->adaptor()->db()->get_TranslationAdaptor();
308 #warning("PredictionTranscript has no adaptor, may not be able to obtain " .
312 my $Xseq = $self->spliced_seq();
313 my $start_phase = $start_exon->phase;
314 if( $start_phase > 0 ) {
315 $Xseq =
"N"x$start_phase . $Xseq;
318 my $tmpSeq =
new Bio::Seq( -
id => $self->display_id,
321 -alphabet =>
'dna' );
325 -START_EXON => $start_exon,
326 -END_EXON => $end_exon,
328 -SEQ_END => $end_exon->length(),
329 -SEQ => $tmpSeq->translate()->seq());
336 Arg [1] : Boolean, emulate the behavior of old bioperl versions where
337 an incomplete
final codon of 2 characters is padded and guessed
338 Function : Give a peptide translation of all exons currently in
339 the PT. Gives empty
string when none is in.
340 Returntype: a Bio::Seq as in
transcript->translate()
349 my ($self, $complete_codon) = @_;
351 my $dna = $self->translateable_seq();
354 if ( defined( $self->slice() ) ) {
357 ($attrib) = @{ $self->slice()->get_all_Attributes(
'codon_table') };
358 if ( defined($attrib) ) {
359 $codon_table_id = $attrib->value();
362 $codon_table_id ||= 1; #
default will be vertebrates
364 # Remove the final stop codon from the mrna
365 # sequence produced if it is present, this is so any peptide produced
366 # won't have a terminal stop codon
367 # if you want to have a terminal stop codon either comment this line out
368 # or call translatable seq directly and produce a translation from it
369 if( CORE::length( $dna ) % 3 == 0 ) {
370 # $dna =~ s/TAG$|TGA$|TAA$//i;
371 my $codon_table = Bio::Tools::CodonTable->new( -
id => $codon_table_id );
373 if ( $codon_table->is_ter_codon( substr( $dna, -3, 3 ) ) ) {
374 substr( $dna, -3, 3,
'' );
376 } elsif ( CORE::length($dna) % 3 == 2 ) {
377 # If we have a partial codon of 2 bp we need to decide if we
378 # trim it or not to fix some bad behaviour in older bioperl
380 if ( $complete_codon ) {
381 # If we want to do the bad behavior of bioperl 1.6.1 and older
382 # where we guess the last codon if inomplete, pad an N
383 # to the mrna sequence
386 # Otherwise trim those last two bp off so the behavior is
387 # consistent across bioperl versions
388 substr( $dna, -2, 2,
'' );
392 my $bioseq =
new Bio::Seq( -
id => $self->display_id,
395 -alphabet =>
'dna' );
397 my $translation = $bioseq->translate(undef,undef,undef,$codon_table_id);
403 =head2 cdna_coding_start
406 Example : $relative_coding_start = $transcript->cdna_coding_start();
407 Description: Retrieves the position of the coding start of
this transcript
408 in cdna coordinates (relative to the start of the 5prime end of
409 the
transcript, excluding introns, including utrs). This is
410 always 1
for prediction transcripts because they have no UTRs.
413 Caller : five_prime_utr, get_all_snps, general
418 sub cdna_coding_start {
return 1 }
422 =head2 cdna_coding_end
425 Example : $relative_coding_start = $transcript->cdna_coding_end();
426 Description: Retrieves the position of the coding end of
this transcript
427 in cdna coordinates (relative to the start of the 5prime end of
428 the
transcript, excluding introns, including utrs). This is
429 always te length of the cdna
for prediction transcripts because
433 Caller : five_prime_utr, get_all_snps, general
438 sub cdna_coding_end {
440 return length( $self->spliced_seq() );
446 Arg 1 : String $coordinate_system_name
447 Arg [2] : String $coordinate_system_version
448 Example : $ptrans = $ptrans->transform(
'chromosome',
'NCBI33');
449 $ptrans = $ptrans->transform(
'clone');
450 Description: Moves
this PredictionTranscript to the given coordinate system.
451 If
this Transcript has Exons attached, they move as well.
452 A
new Transcript is returned or undefined
if this PT is not
453 defined in the
new coordinate system.
455 Exceptions : wrong parameters
464 # catch for old style transform calls
465 if( ref $_[0] && ($_[0]->isa(
"Bio::EnsEMBL::Slice" ) or $_[0]->isa(
"Bio::EnsEMBL::LRGSlice" ))) {
466 throw(
"transform needs coordinate systems details now," .
467 "please use transfer");
471 return undef unless $new_transcript;
473 #go through the _trans_exon_array so as not to prompt lazy-loading
474 if(exists($self->{
'_trans_exon_array'})) {
476 foreach my $old_exon ( @{$self->{
'_trans_exon_array'}} ) {
477 my $new_exon = $old_exon->transform(@_);
478 push(@new_exons, $new_exon);
480 $new_transcript->{
'_trans_exon_array'} = \@new_exons;
483 return $new_transcript;
491 Example : $ptrans = $ptrans->transfer($slice);
492 Description: Moves
this PredictionTranscript to the given slice.
493 If
this Transcripts has Exons attached, they move as well.
494 If
this transcript cannot be moved then undef is returned
506 my $new_transcript = $self->SUPER::transfer( @_ );
507 return undef unless $new_transcript;
509 if( exists $self->{
'_trans_exon_array'} ) {
511 for my $old_exon ( @{$self->{
'_trans_exon_array'}} ) {
512 my $new_exon = $old_exon->
transfer( @_ );
513 push( @new_exons, $new_exon );
516 $new_transcript->{
'_trans_exon_array'} = \@new_exons;
519 return $new_transcript;
525 Example : my @exons = @{$transcript->get_all_Exons()};
526 Description: Returns an listref of the exons in
this transcipr in order.
527 i.e. the first
exon in the listref is the 5prime most
exon in
538 if( ! defined $self->{
'_trans_exon_array'} && defined $self->adaptor() ) {
539 $self->{
'_trans_exon_array'} = $self->
adaptor()->
db()->
540 get_PredictionExonAdaptor()->fetch_all_by_PredictionTranscript( $self );
542 return $self->{
'_trans_exon_array'};
548 Example : print $rf->display_id();
549 Description: This method returns a
string that is considered to be
550 the
'display' identifier. For prediction transcripts
this is
551 (depending on availability and in
this order) the stable Id, the
552 dbID or an empty
string.
555 Caller : web drawing code
562 return $self->stable_id || $self->dbID ||
'';
565 =head2 get_all_Attributes
569 Description: DOES NOTHING, Returns empty listref. Provided here to prevent
570 Transcript attributes being returned
for PredictionTranscripts.
578 sub get_all_Attributes {