3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2024] EMBL-European Bioinformatics Institute
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
43 package Bio::EnsEMBL::DnaDnaAlignFeature;
51 use Bio::LocatableSeq;
57 use constant SEQUENCE_ONTOLOGY => {
59 term =>
'nucleotide_match',
64 Arg [..] : List of named arguments. defined
65 in
this constructor, others defined in BaseFeaturePair and
69 a list of ungapped features.
81 my $class = ref($caller) || $caller;
83 my $self = $class->SUPER::new(@_);
92 Description: PRIVATE implementation of
abstract superclass method. Returns
93 1 as the
'unit' used
for the hit sequence.
110 Description: PRIVATE implementation of
abstract superclass method Returns
111 1 as the
'unit' used
for the hit sequence.
123 =head2 restrict_between_positions
127 Arg [3] :
string $flags
128 SEQ = $start and $end apply to the seq sequence
129 i.e. start and end methods
130 HSEQ = $start and $end apply to the hseq sequence
131 i.e. hstart and hend methods
132 Example : $daf->restrict_between_positions(150,543,
"SEQ")
134 the
new specified coordinates and sequence reference, cutting
135 any pieces hanging upstream and downstream.
143 sub restrict_between_positions {
144 my ($self,$start,$end,$seqref) = @_;
146 unless (defined $start && $start =~ /^\d+$/) {
147 $self->throw(
"The first argument is not defined or is not an integer");
149 unless (defined $end && $end =~ /^\d+$/) {
150 $self->throw(
"The second argument is not defined or is not an integer");
152 unless (defined $seqref &&
153 ($seqref eq
"SEQ" || $seqref eq
"HSEQ")) {
154 $self->throw(
"The third argument is not defined or is not equal to 'SEQ' or 'HSEQ'");
157 # symbolic method references should be forbidden!
158 # need to be rewrite at some stage.
160 my ($start_method1,$end_method1,$strand_method1,$start_method2,$end_method2,$strand_method2) =
161 qw(start end strand hstart hend hstrand);
163 if ($seqref eq
"HSEQ") {
164 ($start_method1,$end_method1,$strand_method1,$start_method2,$end_method2,$strand_method2) =
165 qw(hstart hend hstrand start end strand);
168 my @restricted_features;
170 foreach my $ungapped_feature ($self->ungapped_features) {
172 if ($ungapped_feature->$start_method1() > $end ||
173 $ungapped_feature->$end_method1() < $start) {
177 } elsif ($ungapped_feature->$end_method1() <= $end &&
178 $ungapped_feature->$start_method1() >= $start) {
180 push @restricted_features, $ungapped_feature;
184 if ($ungapped_feature->$strand_method1() eq $ungapped_feature->$strand_method2()) {
186 if ($ungapped_feature->$start_method1() < $start) {
188 my $offset = $start - $ungapped_feature->$start_method1();
189 $ungapped_feature->$start_method1($start);
190 $ungapped_feature->$start_method2($ungapped_feature->$start_method2() + $offset);
193 if ($ungapped_feature->$end_method1() > $end) {
195 my $offset = $ungapped_feature->$end_method1() - $end;
196 $ungapped_feature->$end_method1($end);
197 $ungapped_feature->$end_method2($ungapped_feature->$end_method2() - $offset);
202 if ($ungapped_feature->$start_method1() < $start) {
204 my $offset = $start - $ungapped_feature->$start_method1();
205 $ungapped_feature->$start_method1($start);
206 $ungapped_feature->$end_method2($ungapped_feature->$end_method2() - $offset);
209 if ($ungapped_feature->$end_method1() > $end) {
211 my $offset = $ungapped_feature->$end_method1() - $end;
212 $ungapped_feature->$end_method1($end);
213 $ungapped_feature->$start_method2($ungapped_feature->$start_method2() + $offset);
218 push @restricted_features, $ungapped_feature;
222 if (scalar @restricted_features) {
224 if (defined $self->slice) {
225 $DnaDnaAlignFeature->slice($self->slice);
227 if (defined $self->hslice) {
228 $DnaDnaAlignFeature->hslice($self->hslice);
230 return $DnaDnaAlignFeature;
236 =head2 alignment_strings
238 Arg [1] : list of
string $flags
239 FIX_SEQ = does not introduce gaps (dashes) in seq aligned sequence
240 and
delete the corresponding insertions in hseq aligned sequence
241 FIX_HSEQ = does not introduce gaps (dashes) in hseq aligned sequence
242 and
delete the corresponding insertions in seq aligned sequence
243 NO_SEQ =
return the seq aligned sequence as an empty
string
244 NO_HSEQ =
return the hseq aligned sequence as an empty
string
245 This 2 last flags would save a bit of time as doing so no querying to the core
246 database in done to get the sequence.
247 Example : $daf->alignment_strings or
248 $daf->alignment_strings(
"FIX_HSEQ") or
249 $daf->alignment_strings(
"NO_SEQ",
"FIX_SEQ")
250 Description: Allows to rebuild the alignment
string of both the seq and hseq sequence
251 using the cigar_string information and the slice and hslice objects
252 Returntype : array reference containing 2 strings
253 the first corresponds to seq
254 the second corresponds to hseq
262 sub alignment_strings {
263 my ($self, @flags) = @_;
264 if ($self->align_type eq
'ensembl') {
265 return $self->_ensembl_alignment_strings(@flags);
267 throw(
"alignment_strings method not implemented for " . $self->align_type);
271 =head2 _ensembl_alignment_strings
273 Arg [1] : list of
string $flags
274 Description: Allows to rebuild the alignment
string of both the seq and hseq sequence
275 using the cigar_string information
for ensembl cigar strings
276 Returntype : array reference containing 2 strings
277 the first corresponds to seq
278 the second corresponds to hseq
286 sub _ensembl_alignment_strings {
287 my ( $self, @flags ) = @_;
292 my $fix_seq_flag = 0;
293 my $fix_hseq_flag = 0;
295 for my $flag ( @flags ) {
296 $seq_flag = 0
if ($flag eq
"NO_SEQ");
297 $hseq_flag = 0
if ($flag eq
"NO_HSEQ");
298 $fix_seq_flag = 1
if ($flag eq
"FIX_SEQ");
299 $fix_hseq_flag = 1
if ($flag eq
"FIX_HSEQ");
303 $seq = $self->slice->subseq($self->start, $self->end, $self->strand)
if ($seq_flag || $fix_seq_flag);
304 $hseq = $self->hslice->subseq($self->hstart, $self->hend, $self->hstrand)
if ($hseq_flag || $fix_hseq_flag);
307 # rseq - result sequence
309 # rhseq - result hsequence
314 my @cig = ( $self->cigar_string =~ /(\d*[DIM])/g );
316 for my $cigElem ( @cig ) {
317 my $cigType = substr( $cigElem, -1, 1 );
318 my $cigCount = substr( $cigElem, 0 ,-1 );
319 $cigCount = 1 unless $cigCount;
321 if( $cigType eq
"M" ) {
322 $rseq .= substr( $seq, $seq_pos, $cigCount )
if ($seq_flag);
323 $rhseq .= substr( $hseq, $hseq_pos, $cigCount )
if ($hseq_flag);
324 $seq_pos += $cigCount;
325 $hseq_pos += $cigCount;
326 } elsif( $cigType eq
"D" ) {
327 if( ! $fix_seq_flag ) {
328 $rseq .=
"-" x $cigCount
if ($seq_flag);
329 $rhseq .= substr( $hseq, $hseq_pos, $cigCount )
if ($hseq_flag);
331 $hseq_pos += $cigCount;
332 } elsif( $cigType eq
"I" ) {
333 if( ! $fix_hseq_flag ) {
334 $rseq .= substr( $seq, $seq_pos, $cigCount )
if ($seq_flag);
335 $rhseq .=
"-" x $cigCount
if ($hseq_flag);
337 $seq_pos += $cigCount;
340 return [ $rseq,$rhseq ];
343 =head2 get_SimpleAlign
345 Arg [1] : list of
string $flags
346 translated = by
default, the sequence alignment will be on nucleotide. With translated flag
347 the aligned sequences are translated.
348 uc = by
default aligned sequences are given in lower cases. With uc flag, the aligned
349 sequences are given in upper cases.
350 Example : $daf->get_SimpleAlign or
351 $daf->get_SimpleAlign(
"translated") or
352 $daf->get_SimpleAlign(
"translated",
"uc")
353 Description: Allows to rebuild the alignment
string of both the seq and hseq sequence
354 using the cigar_string information and the slice and hslice objects
355 Returntype : a Bio::SimpleAlign
object
362 sub get_SimpleAlign {
363 my ( $self, @flags ) = @_;
365 if ($self->align_type eq
'ensembl') {
366 return $self->_ensembl_SimpleAlign();
368 throw(
"No get_SimpleAlign method implemented for " . $self->align_type);
372 =head2 _ensembl_SimpleAlign
374 Arg [1] : list of
string $flags
375 Description: Internal method to build alignment
string
376 for ensembl type cigar strings
377 using the cigar_string information and the slice and hslice objects
378 Returntype : a Bio::SimpleAlign
object
385 sub _ensembl_SimpleAlign {
386 my ( $self, @flags ) = @_;
392 for my $flag ( @flags ) {
393 $uc = 1
if ($flag =~ /^uc$/i);
394 $translated = 1
if ($flag =~ /^translated$/i);
397 my $sa = Bio::SimpleAlign->new();
399 #Hack to try to work with both bioperl 0.7 and 1.2:
400 #Check to see if the method is called 'addSeq' or 'add_seq'
402 if(!$sa->can(
'add_seq')) {
406 my ($sb_seq,$qy_seq) = @{$self->alignment_strings};
408 my $loc_sb_seq = Bio::LocatableSeq->new(-SEQ => $uc ? uc $sb_seq : lc $sb_seq,
409 -START => $self->seq_region_start,
410 -END => $self->seq_region_end,
411 -ID => $self->seqname,
412 -STRAND => $self->strand);
414 $loc_sb_seq->seq($uc ? uc $loc_sb_seq->translate->seq
415 : lc $loc_sb_seq->translate->seq)
if ($translated);
417 my $loc_qy_seq = Bio::LocatableSeq->new(-SEQ => $uc ? uc $qy_seq : lc $qy_seq,
418 -START => $self->hseq_region_start,
419 -END => $self->hseq_region_end,
420 -ID => $self->hseqname,
421 -STRAND => $self->hstrand);
423 $loc_qy_seq->seq($uc ? uc $loc_qy_seq->translate->seq
424 : lc $loc_qy_seq->translate->seq)
if ($translated);
427 $sa->addSeq($loc_sb_seq);
428 $sa->addSeq($loc_qy_seq);
430 $sa->add_seq($loc_sb_seq);
431 $sa->add_seq($loc_qy_seq);
438 =head2 get_all_Attributes
440 Arg [1] : (optional) String $attrib_code
441 The code of the attribute type to retrieve values
for
442 Example : my ($description) = @{ $feature->get_all_Attributes(
'description') };
443 my @attributes = @{ $feature->get_all_Attributes };
444 Description: Gets a list of Attributes of
this gene.
445 Optionally just get Attributes
for given code.
453 sub get_all_Attributes {
455 my $attrib_code = shift;
457 if ( ! exists $self->{
'attributes' } ) {
458 if (!$self->adaptor() ) {
462 my $attribute_adaptor = $self->adaptor->db->get_AttributeAdaptor();
463 $self->{
'attributes'} = $attribute_adaptor->fetch_all_by_DnaDnaAlignFeature($self);
466 if ( defined $attrib_code ) {
467 my @results = grep { uc($_->code()) eq uc($attrib_code) }
468 @{$self->{
'attributes'}};
471 return $self->{
'attributes'};
476 =head2 add_Attributes
480 Example : my $attrib = Bio::EnsEMBL::Attribute->new(...);
481 $gene->add_Attributes($attrib);
482 Description: Adds an Attribute to the feature.
484 Exceptions : throw on incorrect arguments
494 if( ! exists $self->{'attributes
'} ) {
495 $self->{'attributes
'} = [];
498 for my $attrib ( @attribs ) {
499 if( ! $attrib->isa( "Bio::EnsEMBL::Attribute" )) {
500 throw( "Argument to add_Attribute has to be an Bio::EnsEMBL::Attribute" );
502 push( @{$self->{'attributes
'}}, $attrib );
509 Arg [1] : Bio::EnsEMBL::Slice $destination_slice
510 Example : my $new_feature = $feature->transfer($slice);
511 Description: Moves this feature to given target slice coordinates.
512 Returns a new feature.
513 Returntype : Bio::EnsEMBL::DnaDnaAlignFeature
523 my $new_feature = $self->SUPER::transfer( @_ );
524 return undef unless $new_feature;
526 if(exists $self->{attributes}) {
527 $new_feature->{attributes} = [@{$self->{attributes}}];