ensembl-hive  2.6
DnaDnaAlignFeature.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2024] EMBL-European Bioinformatics Institute
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::DnaDnaAlignFeature - Ensembl specific dna-dna pairwise
34 alignment feature
35 
36 =head1 SYNOPSIS
37 
39 
40 =cut
41 
42 
43 package Bio::EnsEMBL::DnaDnaAlignFeature;
44 
45 use strict;
46 
48 
49 use vars qw(@ISA);
50 use Bio::SimpleAlign;
51 use Bio::LocatableSeq;
52 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
53 use Bio::EnsEMBL::Utils::Exception qw(throw);
54 
56 
57 use constant SEQUENCE_ONTOLOGY => {
58  acc => 'SO:0000347',
59  term => 'nucleotide_match',
60 };
61 
62 =head2 new
63 
64  Arg [..] : List of named arguments. defined
65  in this constructor, others defined in BaseFeaturePair and
66  SeqFeature superclasses.
67  Example : $daf = new DnaDnaAlignFeature(-cigar_string => '3M3I12M');
68  Description: Creates a new DnaDnaAlignFeature using either a cigarstring or
69  a list of ungapped features.
71  Exceptions : none
72  Caller : general
73  Status : Stable
74 
75 =cut
76 
77 sub new {
78 
79  my $caller = shift;
80 
81  my $class = ref($caller) || $caller;
82 
83  my $self = $class->SUPER::new(@_);
84 
85  return $self;
86 }
87 
88 
89 =head2 _hit_unit
90 
91  Arg [1] : none
92  Description: PRIVATE implementation of abstract superclass method. Returns
93  1 as the 'unit' used for the hit sequence.
94  Returntype : int
95  Exceptions : none
97  Status : Stable
98 
99 =cut
100 
101 sub _hit_unit {
102  return 1;
103 }
104 
105 
106 
107 =head2 _query_unit
108 
109  Arg [1] : none
110  Description: PRIVATE implementation of abstract superclass method Returns
111  1 as the 'unit' used for the hit sequence.
112  Returntype : int
113  Exceptions : none
115  Status : Stable
116 
117 =cut
118 
119 sub _query_unit {
120  return 1;
121 }
122 
123 =head2 restrict_between_positions
124 
125  Arg [1] : int $start
126  Arg [2] : int $end
127  Arg [3] : string $flags
128  SEQ = $start and $end apply to the seq sequence
129  i.e. start and end methods
130  HSEQ = $start and $end apply to the hseq sequence
131  i.e. hstart and hend methods
132  Example : $daf->restrict_between_positions(150,543,"SEQ")
133  Description: Build a new DnaDnaAlignFeature object that fits within
134  the new specified coordinates and sequence reference, cutting
135  any pieces hanging upstream and downstream.
136  Returntype : Bio::EnsEMBL::DnaDnaAlignFeature object
137  Exceptions :
138  Caller :
139  Status : Stable
140 
141 =cut
142 
143 sub restrict_between_positions {
144  my ($self,$start,$end,$seqref) = @_;
145 
146  unless (defined $start && $start =~ /^\d+$/) {
147  $self->throw("The first argument is not defined or is not an integer");
148  }
149  unless (defined $end && $end =~ /^\d+$/) {
150  $self->throw("The second argument is not defined or is not an integer");
151  }
152  unless (defined $seqref &&
153  ($seqref eq "SEQ" || $seqref eq "HSEQ")) {
154  $self->throw("The third argument is not defined or is not equal to 'SEQ' or 'HSEQ'");
155  }
156 
157 # symbolic method references should be forbidden!
158 # need to be rewrite at some stage.
159 
160  my ($start_method1,$end_method1,$strand_method1,$start_method2,$end_method2,$strand_method2) =
161  qw(start end strand hstart hend hstrand);
162 
163  if ($seqref eq "HSEQ") {
164  ($start_method1,$end_method1,$strand_method1,$start_method2,$end_method2,$strand_method2) =
165  qw(hstart hend hstrand start end strand);
166  }
167 
168  my @restricted_features;
169 
170  foreach my $ungapped_feature ($self->ungapped_features) {
171 
172  if ($ungapped_feature->$start_method1() > $end ||
173  $ungapped_feature->$end_method1() < $start) {
174 
175  next;
176 
177  } elsif ($ungapped_feature->$end_method1() <= $end &&
178  $ungapped_feature->$start_method1() >= $start) {
179 
180  push @restricted_features, $ungapped_feature;
181 
182  } else {
183 
184  if ($ungapped_feature->$strand_method1() eq $ungapped_feature->$strand_method2()) {
185 
186  if ($ungapped_feature->$start_method1() < $start) {
187 
188  my $offset = $start - $ungapped_feature->$start_method1();
189  $ungapped_feature->$start_method1($start);
190  $ungapped_feature->$start_method2($ungapped_feature->$start_method2() + $offset);
191 
192  }
193  if ($ungapped_feature->$end_method1() > $end) {
194 
195  my $offset = $ungapped_feature->$end_method1() - $end;
196  $ungapped_feature->$end_method1($end);
197  $ungapped_feature->$end_method2($ungapped_feature->$end_method2() - $offset);
198 
199  }
200  } else {
201 
202  if ($ungapped_feature->$start_method1() < $start) {
203 
204  my $offset = $start - $ungapped_feature->$start_method1();
205  $ungapped_feature->$start_method1($start);
206  $ungapped_feature->$end_method2($ungapped_feature->$end_method2() - $offset);
207 
208  }
209  if ($ungapped_feature->$end_method1() > $end) {
210 
211  my $offset = $ungapped_feature->$end_method1() - $end;
212  $ungapped_feature->$end_method1($end);
213  $ungapped_feature->$start_method2($ungapped_feature->$start_method2() + $offset);
214 
215  }
216  }
217 
218  push @restricted_features, $ungapped_feature;
219  }
220  }
221 
222  if (scalar @restricted_features) {
223  my $DnaDnaAlignFeature = new Bio::EnsEMBL::DnaDnaAlignFeature('-features' =>\@restricted_features);
224  if (defined $self->slice) {
225  $DnaDnaAlignFeature->slice($self->slice);
226  }
227  if (defined $self->hslice) {
228  $DnaDnaAlignFeature->hslice($self->hslice);
229  }
230  return $DnaDnaAlignFeature;
231  } else {
232  return undef;
233  }
234 }
235 
236 =head2 alignment_strings
237 
238  Arg [1] : list of string $flags
239  FIX_SEQ = does not introduce gaps (dashes) in seq aligned sequence
240  and delete the corresponding insertions in hseq aligned sequence
241  FIX_HSEQ = does not introduce gaps (dashes) in hseq aligned sequence
242  and delete the corresponding insertions in seq aligned sequence
243  NO_SEQ = return the seq aligned sequence as an empty string
244  NO_HSEQ = return the hseq aligned sequence as an empty string
245  This 2 last flags would save a bit of time as doing so no querying to the core
246  database in done to get the sequence.
247  Example : $daf->alignment_strings or
248  $daf->alignment_strings("FIX_HSEQ") or
249  $daf->alignment_strings("NO_SEQ","FIX_SEQ")
250  Description: Allows to rebuild the alignment string of both the seq and hseq sequence
251  using the cigar_string information and the slice and hslice objects
252  Returntype : array reference containing 2 strings
253  the first corresponds to seq
254  the second corresponds to hseq
255  Exceptions :
256  Caller :
257  Status : Stable
258 
259 =cut
260 
261 
262 sub alignment_strings {
263  my ($self, @flags) = @_;
264  if ($self->align_type eq 'ensembl') {
265  return $self->_ensembl_alignment_strings(@flags);
266  } else {
267  throw("alignment_strings method not implemented for " . $self->align_type);
268  }
269 }
270 
271 =head2 _ensembl_alignment_strings
272 
273  Arg [1] : list of string $flags
274  Description: Allows to rebuild the alignment string of both the seq and hseq sequence
275  using the cigar_string information for ensembl cigar strings
276  Returntype : array reference containing 2 strings
277  the first corresponds to seq
278  the second corresponds to hseq
279  Exceptions :
280  Caller :
281  Status : Stable
282 
283 =cut
284 
285 
286 sub _ensembl_alignment_strings {
287  my ( $self, @flags ) = @_;
288 
289  # set the flags
290  my $seq_flag = 1;
291  my $hseq_flag = 1;
292  my $fix_seq_flag = 0;
293  my $fix_hseq_flag = 0;
294 
295  for my $flag ( @flags ) {
296  $seq_flag = 0 if ($flag eq "NO_SEQ");
297  $hseq_flag = 0 if ($flag eq "NO_HSEQ");
298  $fix_seq_flag = 1 if ($flag eq "FIX_SEQ");
299  $fix_hseq_flag = 1 if ($flag eq "FIX_HSEQ");
300  }
301 
302  my ($seq, $hseq);
303  $seq = $self->slice->subseq($self->start, $self->end, $self->strand) if ($seq_flag || $fix_seq_flag);
304  $hseq = $self->hslice->subseq($self->hstart, $self->hend, $self->hstrand) if ($hseq_flag || $fix_hseq_flag);
305 
306  my $rseq= "";
307  # rseq - result sequence
308  my $rhseq= "";
309  # rhseq - result hsequence
310 
311  my $seq_pos = 0;
312  my $hseq_pos = 0;
313 
314  my @cig = ( $self->cigar_string =~ /(\d*[DIM])/g );
315 
316  for my $cigElem ( @cig ) {
317  my $cigType = substr( $cigElem, -1, 1 );
318  my $cigCount = substr( $cigElem, 0 ,-1 );
319  $cigCount = 1 unless $cigCount;
320 
321  if( $cigType eq "M" ) {
322  $rseq .= substr( $seq, $seq_pos, $cigCount ) if ($seq_flag);
323  $rhseq .= substr( $hseq, $hseq_pos, $cigCount ) if ($hseq_flag);
324  $seq_pos += $cigCount;
325  $hseq_pos += $cigCount;
326  } elsif( $cigType eq "D" ) {
327  if( ! $fix_seq_flag ) {
328  $rseq .= "-" x $cigCount if ($seq_flag);
329  $rhseq .= substr( $hseq, $hseq_pos, $cigCount ) if ($hseq_flag);
330  }
331  $hseq_pos += $cigCount;
332  } elsif( $cigType eq "I" ) {
333  if( ! $fix_hseq_flag ) {
334  $rseq .= substr( $seq, $seq_pos, $cigCount ) if ($seq_flag);
335  $rhseq .= "-" x $cigCount if ($hseq_flag);
336  }
337  $seq_pos += $cigCount;
338  }
339  }
340  return [ $rseq,$rhseq ];
341 }
342 
343 =head2 get_SimpleAlign
344 
345  Arg [1] : list of string $flags
346  translated = by default, the sequence alignment will be on nucleotide. With translated flag
347  the aligned sequences are translated.
348  uc = by default aligned sequences are given in lower cases. With uc flag, the aligned
349  sequences are given in upper cases.
350  Example : $daf->get_SimpleAlign or
351  $daf->get_SimpleAlign("translated") or
352  $daf->get_SimpleAlign("translated","uc")
353  Description: Allows to rebuild the alignment string of both the seq and hseq sequence
354  using the cigar_string information and the slice and hslice objects
355  Returntype : a Bio::SimpleAlign object
356  Exceptions :
357  Caller :
358  Status : Stable
359 
360 =cut
361 
362 sub get_SimpleAlign {
363  my ( $self, @flags ) = @_;
364 
365  if ($self->align_type eq 'ensembl') {
366  return $self->_ensembl_SimpleAlign();
367  } else {
368  throw("No get_SimpleAlign method implemented for " . $self->align_type);
369  }
370 }
371 
372 =head2 _ensembl_SimpleAlign
373 
374  Arg [1] : list of string $flags
375  Description: Internal method to build alignment string
376  for ensembl type cigar strings
377  using the cigar_string information and the slice and hslice objects
378  Returntype : a Bio::SimpleAlign object
379  Exceptions :
380  Caller :
381  Status : Stable
382 
383 =cut
384 
385 sub _ensembl_SimpleAlign {
386  my ( $self, @flags ) = @_;
387 
388  # setting the flags
389  my $uc = 0;
390  my $translated = 0;
391 
392  for my $flag ( @flags ) {
393  $uc = 1 if ($flag =~ /^uc$/i);
394  $translated = 1 if ($flag =~ /^translated$/i);
395  }
396 
397  my $sa = Bio::SimpleAlign->new();
398 
399  #Hack to try to work with both bioperl 0.7 and 1.2:
400  #Check to see if the method is called 'addSeq' or 'add_seq'
401  my $bio07 = 0;
402  if(!$sa->can('add_seq')) {
403  $bio07 = 1;
404  }
405 
406  my ($sb_seq,$qy_seq) = @{$self->alignment_strings};
407 
408  my $loc_sb_seq = Bio::LocatableSeq->new(-SEQ => $uc ? uc $sb_seq : lc $sb_seq,
409  -START => $self->seq_region_start,
410  -END => $self->seq_region_end,
411  -ID => $self->seqname,
412  -STRAND => $self->strand);
413 
414  $loc_sb_seq->seq($uc ? uc $loc_sb_seq->translate->seq
415  : lc $loc_sb_seq->translate->seq) if ($translated);
416 
417  my $loc_qy_seq = Bio::LocatableSeq->new(-SEQ => $uc ? uc $qy_seq : lc $qy_seq,
418  -START => $self->hseq_region_start,
419  -END => $self->hseq_region_end,
420  -ID => $self->hseqname,
421  -STRAND => $self->hstrand);
422 
423  $loc_qy_seq->seq($uc ? uc $loc_qy_seq->translate->seq
424  : lc $loc_qy_seq->translate->seq) if ($translated);
425 
426  if($bio07) {
427  $sa->addSeq($loc_sb_seq);
428  $sa->addSeq($loc_qy_seq);
429  } else {
430  $sa->add_seq($loc_sb_seq);
431  $sa->add_seq($loc_qy_seq);
432  }
433 
434  return $sa;
435 }
436 
437 
438 =head2 get_all_Attributes
439 
440  Arg [1] : (optional) String $attrib_code
441  The code of the attribute type to retrieve values for
442  Example : my ($description) = @{ $feature->get_all_Attributes('description') };
443  my @attributes = @{ $feature->get_all_Attributes };
444  Description: Gets a list of Attributes of this gene.
445  Optionally just get Attributes for given code.
446  Returntype : Listref of Bio::EnsEMBL::Attribute
447  Exceptions :
448  Caller : general
449  Status : Stable
450 
451 =cut
452 
453 sub get_all_Attributes {
454  my $self = shift;
455  my $attrib_code = shift;
456 
457  if ( ! exists $self->{'attributes' } ) {
458  if (!$self->adaptor() ) {
459  return [];
460  }
461 
462  my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor();
463  $self->{'attributes'} = $attribute_adaptor->fetch_all_by_DnaDnaAlignFeature($self);
464  }
465 
466  if ( defined $attrib_code ) {
467  my @results = grep { uc($_->code()) eq uc($attrib_code) }
468  @{$self->{'attributes'}};
469  return \@results;
470  } else {
471  return $self->{'attributes'};
472  }
473 }
474 
475 
476 =head2 add_Attributes
477 
478  Arg [1-N] : list of Bio::EnsEMBL::Attribute's @attribs
479  Attribute(s) to add
480  Example : my $attrib = Bio::EnsEMBL::Attribute->new(...);
481  $gene->add_Attributes($attrib);
482  Description: Adds an Attribute to the feature.
483  Returntype : none
484  Exceptions : throw on incorrect arguments
485  Caller : general
486  Status : Stable
487 
488 =cut
489 
490 sub add_Attributes {
491  my $self = shift;
492  my @attribs = @_;
493 
494  if( ! exists $self->{'attributes'} ) {
495  $self->{'attributes'} = [];
496  }
497 
498  for my $attrib ( @attribs ) {
499  if( ! $attrib->isa( "Bio::EnsEMBL::Attribute" )) {
500  throw( "Argument to add_Attribute has to be an Bio::EnsEMBL::Attribute" );
501  }
502  push( @{$self->{'attributes'}}, $attrib );
503  }
504 
505  return;
506 }
507 =head2 transfer
508 
509  Arg [1] : Bio::EnsEMBL::Slice $destination_slice
510  Example : my $new_feature = $feature->transfer($slice);
511  Description: Moves this feature to given target slice coordinates.
512  Returns a new feature.
513  Returntype : Bio::EnsEMBL::DnaDnaAlignFeature
514  Exceptions : none
515  Caller : general
516  Status : Stable
517 
518 =cut
519 
520 sub transfer {
521  my $self = shift;
522 
523  my $new_feature = $self->SUPER::transfer( @_ );
524  return undef unless $new_feature;
525 
526  if(exists $self->{attributes}) {
527  $new_feature->{attributes} = [@{$self->{attributes}}];
528  }
529 
530  return $new_feature;
531 }
532 
533 1;
Bio::EnsEMBL::BaseAlignFeature
Definition: BaseAlignFeature.pm:90
Bio::EnsEMBL::SeqFeature
Definition: SeqFeature.pm:30
Bio::EnsEMBL::Attribute
Definition: Attribute.pm:34
Bio::EnsEMBL::DnaDnaAlignFeature
Definition: DnaDnaAlignFeature.pm:15
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68