ensembl-hive  2.6
PairAlign.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2024] EMBL-European Bioinformatics Institute
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 PairAlign - Dna pairwise alignment module
34 
35 =head1 SYNOPSIS
36 
37 #To convert between coordinates:
38 
39  my $cdna_coord = $pair->genomic2cDNA($gen_coord);
40  my $gen_coord = $pair->cDNA2genomic($cdna_coord);
41 
42 =head1 DESCRIPTION
43 
44 Contains list of sub alignments making up a dna-dna alignment
45 
46 Creation:
47 
48  my $pair = new Bio::EnsEMBL::FeaturePair(
49  -start => $qstart,
50  -end => $qend,
51  -strand => $qstrand,
52  -hstart => $hstart,
53  -hend => $hend,
54  -hend => $hstrand,
55  );
56 
57  my $pairaln = new Bio::EnsEMBL::Analysis::PairAlign;
58  $pairaln->addFeaturePair($pair);
59 
60 Any number of pair alignments can be added to the PairAlign object
61 
62 =cut
63 
64 package Bio::EnsEMBL::Analysis::PairAlign;
65 
66 use vars qw(@ISA);
67 use strict;
68 
69 
70 sub new {
71  my($class,@args) = @_;
72  my $self = {};
73  bless $self, $class;
74 
75  $self->{'_homol'} = [];
76 
77  return $self; # success - we hope!
78 }
79 
80 sub addFeaturePair {
81  my ($self,$pair) = @_;
82 
83  $self->throw("Not a Bio::EnsEMBL::FeaturePair object") unless ($pair->isa("Bio::EnsEMBL::FeaturePair"));
84 
85  push(@{$self->{'_pairs'}},$pair);
86 
87 }
88 
89 
90 =head2 eachFeaturePair
91 
92  Title : eachFeaturePair
93  Example : my @pairs = $pair->eachFeaturePair
94  Returns : Array of Bio::SeqFeature::FeaturePair
95  Args : none
96 =cut
97 
98 sub eachFeaturePair {
99  my ($self) = @_;
100 
101  if (defined($self->{'_pairs'})) {
102  return @{$self->{'_pairs'}};
103  }
104 }
105 
106 sub get_hstrand {
107  my ($self) = @_;
108 
109  my @features = $self->eachFeaturePair;
110 
111  return $features[0]->hstrand;
112 }
113 
114 =head2 genomic2cDNA
115 
116  Title : genomic2cDNA
117  Usage : my $cdna_coord = $pair->genomic2cDNA($gen_coord)
118  Function: Converts a genomic coordinate to a cdna coordinate
119  Returns : int
120  Args : int
121 =cut
122 
123 sub genomic2cDNA {
124  my ($self,$coord) = @_;
125  my @pairs = $self->eachFeaturePair;
126 
127  @pairs = sort {$a->start <=> $b->start} @pairs;
128 
129  my $newcoord;
130 
131  HOMOL: while (my $sf1 = shift(@pairs)) {
132  next HOMOL unless ($coord >= $sf1->start && $coord <= $sf1->end);
133 
134  if ($sf1->strand == 1 && $sf1->hstrand == 1) {
135  $newcoord = $sf1->hstart + ($coord - $sf1->start);
136  last HOMOL;
137  } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) {
138  $newcoord = $sf1->hend - ($coord - $sf1->start);
139  last HOMOL;
140  } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) {
141  $newcoord = $sf1->hstart + ($sf1->end - $coord);
142  last HOMOL;
143  } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) {
144  $newcoord = $sf1->hend - ($sf1->end - $coord);
145  last HOMOL;
146  } else {
147  $self->throw("ERROR: Wrong strand value in FeaturePair (" . $sf1->strand . "/" . $sf1->hstrand . "\n");
148  }
149  }
150 
151  if (defined($newcoord)) {
152 
153  return $newcoord;
154  } else {
155  $self->throw("Couldn't convert $coord");
156  }
157 }
158 
159 =head2 cDNA2genomic
160 
161  Title : cDNA2genomic
162  Usage : my $gen_coord = $pair->genomic2cDNA($cdna_coord)
163  Function: Converts a cdna coordinate to a genomic coordinate
164  Example :
165  Returns : int
166  Args : int
167 
168 
169 =cut
170 
171 sub cDNA2genomic {
172  my ($self,$coord) = @_;
173 
174  my @pairs = $self->eachFeaturePair;
175 
176  my $newcoord;
177 
178  HOMOL: while (my $sf1 = shift(@pairs)) {
179  next HOMOL unless ($coord >= $sf1->hstart && $coord <= $sf1->hend);
180 
181  if ($sf1->strand == 1 && $sf1->hstrand == 1) {
182  $newcoord = $sf1->start + ($coord - $sf1->hstart);
183  last HOMOL;
184  } elsif ($sf1->strand == 1 && $sf1->hstrand == -1) {
185  $newcoord = $sf1->start +($sf1->hend - $coord);
186  last HOMOL;
187  } elsif ($sf1->strand == -1 && $sf1->hstrand == 1) {
188  $newcoord = $sf1->end - ($coord - $sf1->hstart);
189  last HOMOL;
190  } elsif ($sf1->strand == -1 && $sf1->hstrand == -1) {
191  $newcoord = $sf1->end - ($sf1->hend - $coord);
192  last HOMOL;
193  } else {
194  $self->throw("ERROR: Wrong strand value in homol (" . $sf1->strand . "/" . $sf1->hstrand . "\n");
195  }
196  }
197 
198  if (defined ($newcoord)) {
199  return $newcoord;
200  } else {
201  $self->throw("Couldn't convert $coord\n");
202  }
203 }
204 
205 sub find_Pair {
206  my ($self,$coord) = @_;
207 
208  foreach my $p ($self->eachFeaturePair) {
209  if ($coord >= $p->hstart && $coord <= $p->hend) {
210  return $p;
211  }
212  }
213 }
214 
215 =head2 convert_cDNA_feature
216 
217  Title : convert_cDNA_feature
218  Usage : my @newfeatures = $self->convert_cDNA_feature($f);
219  Function: Converts a feature on the cDNA into an array of
220  features on the genomic (for features that span across introns);
221  Returns : Array of Bio::EnsEMBL::FeaturePair
223 
224 =cut
225 
226 sub convert_cDNA_feature {
227  my ($self,$feature) = @_;
228 
229  my $foundstart = 0;
230  my $foundend = 0;
231 
232  my @pairs = $self->eachFeaturePair;
233  my @newfeatures;
234 
235  HOMOL: while (my $sf1 = shift(@pairs)) {
236  my $skip = 0;
237  #print STDERR "Looking at cDNA exon " . $sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n";
238 
239  $skip = 1 unless ($feature->start >= $sf1->hstart
240  && $feature->start <= $sf1->hend);
241 
242  if($skip){
243  #print STDERR "Skipping ".$sf1->hstart . "\t" . $sf1->hend . "\t" . $sf1->strand ."\n";
244  next HOMOL;
245  }
246  if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) {
247  $foundend = 1;
248  }
249 
250  my $startcoord = $self->cDNA2genomic($feature->start);
251  my $endcoord;
252 
253  if ($sf1->hstrand == 1) {
254  $endcoord = $sf1->end;
255  } else {
256  $endcoord = $sf1->start;
257  }
258 
259  if ($foundend) {
260  $endcoord = $self->cDNA2genomic($feature->end);
261  }
262 
263  #print STDERR "Making new genomic feature $startcoord\t$endcoord\n";
264 
265  my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname,
266  -start => $startcoord,
267  -end => $endcoord,
268  -strand => $feature->strand);
269  push(@newfeatures,$tmpf);
270  last;
271  }
272 
273  # Now the rest of the pairs until we find the endcoord
274 
275  while ((my $sf1 = shift(@pairs)) && ($foundend == 0)) {
276 
277  if ($feature->end >= $sf1->hstart && $feature->end <= $sf1->hend) {
278  $foundend = 1;
279  }
280 
281  my $startcoord;
282  my $endcoord;
283 
284  if ($sf1->hstrand == 1) {
285  $startcoord = $sf1->start;
286  $endcoord = $sf1->end;
287  } else {
288  $startcoord = $sf1->end;
289  $endcoord = $sf1->start;
290  }
291 
292  if ($foundend) {
293  $endcoord = $self->cDNA2genomic($feature->end);
294  }
295 
296  # #print STDERR "Making new genomic feature $startcoord\t$endcoord\n";
297 
298  my $tmpf = new Bio::EnsEMBL::Feature(-seqname => $feature->seqname,
299  -start => $startcoord,
300  -end => $endcoord,
301  -strand => $feature->strand);
302  push(@newfeatures,$tmpf);
303  }
304  #print STDERR "Have ".@newfeatures." features from ".$feature."\n";
305  return @newfeatures;
306 }
307 
308 
309 sub convert_FeaturePair {
310  my ($self,$pair) = @_;
311 
312  my $hstrand = $self->get_hstrand;
313 
314  my $feat = $self->create_Feature($pair->start, $pair->end, $pair->strand,
315  $pair->slice);
316  my @newfeatures = $self->convert_cDNA_feature($feat);
317  my @newpairs;
318 
319  my $hitpairaln = new Bio::EnsEMBL::Analysis::PairAlign;
320  $hitpairaln->addFeaturePair($pair);
321 
322  foreach my $new (@newfeatures) {
323 
324  # Now we want to convert these cDNA coords into hit coords
325 
326  my $hstart1 = $self->genomic2cDNA($new->start);
327  my $hend1 = $self->genomic2cDNA($new->end);
328 
329  my $hstart2 = $hitpairaln->genomic2cDNA($hstart1);
330  my $hend2 = $hitpairaln->genomic2cDNA($hend1);
331 
332  # We can now put the final feature together
333 
334  my $finalstrand = $hstrand * $pair->strand * $pair->hstrand;
335 
336  if ($hstart2 > $hend2) {
337  my $tmp = $hstart2;
338  $hstart2 = $hend2;
339  $hend2 = $tmp;
340  }
341 
342  my $finalpair = $self->create_FeaturePair($new->start, $new->end,
343  $new->strand,
344  $hstart2, $hend2,
345  $finalstrand, $pair->score);
346 
347  push(@newpairs,$finalpair);
348 
349  }
350 
351  return @newpairs;
352 }
353 
354 sub create_FeaturePair {
355  my ($self, $start, $end, $strand, $hstart, $hend,
356  $hstrand, $score) = @_;
357 
358  my $fp = Bio::EnsEMBL::FeaturePair->new(
359  -start => $start,
360  -end => $end,
361  -strand => $strand,
362  -hstart => $hstart,
363  -hend => $hend,
364  -hstrand => $hstrand,
365  -score => $score,
366  );
367 
368 
369  return $fp;
370 }
371 
372 sub create_Feature{
373  my ($self, $start, $end, $strand, $slice) = @_;
374 
375  my $feat = new Bio::EnsEMBL::Feature(-start => $start,
376  -end => $end,
377  -strand => $strand,
378  -slice => $slice,
379  );
380  return $feat;
381 }
382 
383 1;
384 
385 
386 
387 
388 
389 
390 
391 
Bio::EnsEMBL::Analysis::PairAlign
Definition: PairAlign.pm:34
Bio::EnsEMBL::Feature
Definition: Feature.pm:47
Bio::EnsEMBL::Analysis::PairAlign::addFeaturePair
public addFeaturePair()
Bio::EnsEMBL::FeaturePair
Definition: FeaturePair.pm:56
Bio::EnsEMBL::FeaturePair::new
public Bio::EnsEMBL::FeaturePair new()
Bio::EnsEMBL::Analysis::PairAlign::genomic2cDNA
public Int genomic2cDNA()