ensembl-hive  2.8.1
TranscriptFactory.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::TranscriptFactory - Module having the fset2transcript*
34 subroutines
35 
36 =head1 SYNOPSIS
37 
39 
41 
42 =head1 DESCRIPTION
43 
44 Module containing the subroutines fset2transcript*,
45 which create transcripts from features (formally housed in
46 Bio::EnsEMBL::DBSQL::Utils).
47 
48 =head1 METHODS
49 
50 =cut
51 
52 package Bio::EnsEMBL::TranscriptFactory;
53 
54 use strict;
55 
59 
60 sub fset2transcript {
61  my ($genscan,$contig)=@_;
62 
63 
64  unless ($genscan->isa ("Bio::EnsEMBL::SeqFeatureI"))
65  {print "$genscan must be Bio::EnsEMBL::SeqFeatureI\n";}
66 
67  my $transcript = new Bio::EnsEMBL::Transcript;
68  $transcript->temporary_id($contig->id . "." . $genscan->seqname);
69 
70  my @exons;
71  my $count= 1;
72 
73  foreach my $f ($genscan->sub_SeqFeature) {
74 
75  my $exon = new Bio::EnsEMBL::Exon;
76  $transcript->add_Exon($exon);
77  $exon->contig ($contig);
78  $exon->start ($f->start);
79  $exon->end ($f->end );
80  $exon->strand ($f->strand);
81  $exon->phase ($f->phase);
82  $exon->end_phase( ($exon->phase + $exon->length)%3 );
83  #$exon->score($f->score);
84 # $exon->p_value($f->p_value);
85  $exon->slice($contig->primary_seq);
86 
87  push(@exons,$exon);
88  $count++;
89 
90  }
91 
92  if( $count == 1 ) {
93  $genscan->throw("Got a 0 exon genscan");
94  }
95 
96  my $translation = new Bio::EnsEMBL::Translation;
97  #
98  # This code got changed due to Translation convention changing. Should work...
99  #
100 
101  if ($exons[0]->strand == 1) {
102  @exons = sort {$a->start <=> $b->start} @exons;
103  } else {
104  @exons = sort {$b->start <=> $a->start} @exons;
105  }
106 
107  $translation->start(1);
108  $translation->end($exons[scalar(@exons)-1]->length);
109 
110  $translation->start_Exon($exons[0]);
111  $translation->end_Exon($exons[$#exons]);
112 
113  my $endphase = $exons[0]->end_phase;
114 
115  foreach my $exon (@exons) {
116 
117  if ( $exon == $exons[0] ){
118  next;
119  }
120  $exon->phase ($endphase);
121  $endphase = $exon->end_phase;
122  }
123 
124  $transcript->translation($translation);
125 
126  return $transcript;
127 }
128 
129 sub fset2transcript_guess_phases {
130  my ($fset,$contig) = @_;
131 
132  my $transcript = new Bio::EnsEMBL::Transcript;
133 
134  $transcript->temporary_id($contig->id . "." . $fset->id);
135 
136 
137  my @exons;
138  my $count = 1;
139 
140  foreach my $f ($fset->sub_SeqFeature) {
141 
142  my $exon = new Bio::EnsEMBL::Exon;
143  $exon->contig ($contig);
144  $exon->start ($f->start);
145  $exon->end ($f->end );
146  $exon->strand ($f->strand);
147  #$exon->score($f->score);
148 # $exon->p_value($f->p_value);
149  $exon->slice($contig);
150  $exon->phase($f->phase);
151  push(@exons,$exon);
152  $count++;
153 
154  }
155 
156  my $translation = new Bio::EnsEMBL::Translation;
157 
158  if ($exons[0]->strand == 1) {
159  @exons = sort {$a->start <=> $b->start} @exons;
160  } else {
161  @exons = sort {$b->start <=> $a->start} @exons;
162  }
163 
164  $translation->start (1);
165  $translation->end ($exons[$#exons]->end - $exons[$#exons]->start + 1);
166  $translation->start_Exon($exons[0]);
167  $translation->end_Exon($exons[$#exons]);
168  $transcript->translation($translation);
169 
170  my $endphase = 0;
171 
172  foreach my $exon (@exons) {
173 
174  $exon ->phase ($endphase);
175  $transcript->add_Exon($exon);
176 
177  $endphase = $exon->end_phase(($exon->phase + $exon->length)%3);
178 
179  }
180 
181 
182  if ($transcript->translate->seq !~ /\*/) {
183  return $transcript;
184  }
185 
186  $endphase = 1;
187 
188  foreach my $exon (@exons) {
189  $exon->phase($endphase);
190  $endphase = $exon->end_phase(($exon->phase + $exon->length)%3);
191  }
192 
193  if ($transcript->translate->seq !~ /\*/) {
194  return $transcript;
195  }
196 
197  $endphase = 2;
198 
199  foreach my $exon (@exons) {
200  $exon->phase($endphase);
201  $endphase = $exon->end_phase(($exon->phase + $exon->length)%3);
202  }
203 
204  if ($transcript->translate->seq !~ /\*/) {
205  return $transcript;
206  }
207 }
208 
209 sub fset2transcript_3frame {
210  my ($fset,$contig) = @_;
211 
212  my @f = $fset->sub_SeqFeature;
213 
214  if ($f[0]->strand == 1) {
215  @f = sort {$a->start <=> $b->start} @f;
216  } else {
217  @f = sort {$b->start <=> $a->start} @f;
218  }
219 
220  my @transcripts;
221 
222  my $startphase = 0;
223 
224  while ($startphase < 3) {
225  my $endphase = $startphase;
226 
227  my $transcript = new Bio::EnsEMBL::Transcript;
228 
229  push(@transcripts,$transcript);
230 
231  $transcript->temporary_id($contig->id . "." . $endphase);
232 
233  my $count = 1;
234  my @exons;
235 
236 
237  foreach my $f (@f) {
238  #print "exon seqname = ".$f->seqname."\n";
239  my $exon = new Bio::EnsEMBL::Exon;
240  #print STDERR "exon ".$f->gffstring."\n";
241  push(@exons,$exon);
242  $exon->seqname($f->seqname);
243  $exon->temporary_id ($contig->id . ".$count");
244  $exon->contig ($contig);
245  $exon->start ($f->start);
246  $exon->end ($f->end );
247  $exon->strand ($f->strand);
248  $exon->slice($contig);
249  $exon->phase ($endphase);
250  $exon->end_phase( ($exon->phase + $exon->length)%3 );
251  #$exon->score ($f->score);
252 # $exon->p_value ($f->p_value);
253  $endphase = $exon->end_phase;
254 
255  $transcript->add_Exon($exon);
256  $count++;
257 
258  #print STDERR "Added exon start " . $exon->start . " end " . $exon->end . " strand " . $exon->strand . " score " . $exon->score . " pvalue " . $exon->p_value . "\n";
259  }
260 
261  my $translation = new Bio::EnsEMBL::Translation;
262 
263  my $contig_id = "";
264  my $fset_id = "";
265 
266  if (defined($contig->id)) {
267  $contig_id = $contig->id;
268  }
269  if (defined($fset->id)) {
270  $fset_id = $fset->id;
271  }
272 
273  $translation->temporary_id($contig_id . "." . $fset_id);
274  $translation->start (1);
275  $translation->end ($exons[$#exons]->end - $exons[$#exons]->start + 1);
276  $translation->start_Exon($exons[0]);
277  $translation->end_Exon ($exons[$#exons]);
278  $transcript->translation($translation);
279 
280  # print STDERR "Phase $startphase " . $transcript->translate->seq . "\n";
281 
282  $startphase++;
283  }
284  #print "finshed fset2transcript_3frame\n";
285  return @transcripts;
286 }
287 
288 
289 sub fset2transcript_with_seq {
290  my ($genscan,$seq)=@_;
291 
292 
293  unless ($genscan->isa ("Bio::EnsEMBL::SeqFeatureI"))
294  {print "$genscan must be Bio::EnsEMBL::SeqFeatureI\n";}
295  unless ($seq->isa ("Bio::PrimarySeqI") || $seq->isa ("Bio::SeqI"))
296  {print "$seq must be Bio::SeqI or a Bio::PrimarySeqI\n";}
297 
298  #print STDERR "running fset2transcript\n";
299  my $transcript = new Bio::EnsEMBL::Transcript;
300  $transcript->temporary_id($seq->id . "." . $genscan->seqname);
301 
302  my @exons;
303  my $count= 1;
304 
305  foreach my $f ($genscan->sub_SeqFeature) {
306 
307  my $exon = new Bio::EnsEMBL::Exon;
308  $exon->contig ($seq);
309  $exon->start ($f->start);
310  $exon->end ($f->end );
311  $exon->strand ($f->strand);
312  $exon->phase ($f->phase);
313  $exon->end_phase( ($exon->phase + $exon->length)%3 );
314  #$exon->score ($f->score);
315  #print STDERR "contig is a = ".$seq."\n";
316  $exon->slice($seq);
317 
318  push(@exons,$exon);
319  $count++;
320 
321  }
322 
323  foreach my $exon (@exons) {
324 
325  $transcript->add_Exon($exon);
326 
327 
328  }
329  return $transcript;
330 
331 }
332 
333 
334 
335 1;
Bio::EnsEMBL::Translation
Definition: Translation.pm:32
Bio::EnsEMBL::TranscriptFactory::fset2transcript
public fset2transcript()
Bio::EnsEMBL::Exon
Definition: Exon.pm:42
Bio::EnsEMBL::Transcript
Definition: Transcript.pm:44
Bio::EnsEMBL::Exon::start
public Int start()
Bio::EnsEMBL::TranscriptFactory
Definition: TranscriptFactory.pm:21
Bio::EnsEMBL::Translation::start
public Int start()