3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
39 # add a coodinate mapping - supply two pairs or coordinates
40 $map->add_map_coordinates(
41 $contig_id, $contig_start, $contig_end, $contig_ori,
42 $chr_name, chr_start, $chr_end
45 # map from one coordinate system to another
47 $mapper->map_coordinates( 627012, 2, 5, -1,
"rawcontig" );
51 Generic mapper to provide coordinate transforms between two disjoint
52 coordinate systems. This mapper is intended to be
'context neutral' - in
53 that it does not contain any code relating to any particular coordinate
56 Mappings consist of pairs of
'to-' and
'from-' contigs with coordinates on each.
57 Orientation is abbreviated to
'ori',
59 The contig pair hash is divided into mappings per seq_region, the code
60 below makes assumptions
about how to filter these results, thus the comparisons
61 for some properties are absent in the code but implicit by data structure.
63 The assembly mapping hash
'_pair_last' orders itself by the target seq region and looks like
this:
65 1 => ARRAY(0x1024c79c0)
86 2 => ARRAY(0x1025ee460)
108 The other mapping hash available is the reverse sense, putting the
'from' seq_region as the
109 sorting key. Here is an excerpt.
112 4 => ARRAY(0x1025ee028)
139 package Bio::EnsEMBL::Mapper;
158 Arg [1] :
string $from
159 The name of the
'from' coordinate system
161 The name of the
'to' coordinate system
163 The
'from' coordinate system
174 my ( $proto, $from, $to, $from_cs, $to_cs ) = @_;
176 if ( !defined($to) || !defined($from) ) {
177 throw(
"Must supply 'to' and 'from' tags");
180 my $class = ref($proto) || $proto;
182 my $self = bless( {
"_pair_$from" => {},
190 'from_cs' => $from_cs
194 # do sql to get any componente with muliple assemblys.
203 Description: removes all cached information out of
this mapper
212 my $from = $self->from();
213 my $to = $self->to();
215 $self->{
"_pair_$from"} = {};
216 $self->{
"_pair_$to"} = {};
217 $self->{
"_tree_$from"} = {};
218 $self->{
"_tree_$to"} = {};
220 $self->{
'pair_count'} = 0;
225 =head2 map_coordinates
228 id of
'source' sequence
230 start coordinate of
'source' sequence
232 end coordinate of
'source' sequence
234 raw contig orientation (+/- 1)
236 nature of transform - gives the type of
237 coordinates to be transformed *from*
238 Arg 6
boolean (0 or 1) $include_original_region
239 option to include original input coordinate region mappings in the result
240 Arg 7
int $cdna_coding_start
242 Function generic
map method
243 Returntype if $include_original_region == 0
244 array of mappped
Bio::
EnsEMBL::Mapper::Coordinate
246 if $include_original_region == 1
247 hash of mapped and original
Bio::
EnsEMBL::Mapper::Coordinate
254 sub map_coordinates {
255 my ( $self, $id, $start, $end, $strand, $type, $include_original_region, $cdna_coding_start ) = @_;
256 unless ( defined($id)
262 throw(
"Expecting at least 5 arguments");
265 $cdna_coding_start = defined $cdna_coding_start ? $cdna_coding_start : 1;
267 # special case for handling inserts:
268 if ( $start == $end + 1 ) {
269 return $self->map_insert( $id, $start, $end, $strand, $type, undef, $include_original_region);
272 if ( !$self->{
'_is_sorted'} ) { $self->_sort() }
274 my $hash = $self->{
"_pair_$type"};
276 my ( $from, $to, $cs );
278 if ( $type eq $self->{
'to'} ) {
281 $cs = $self->{
'from_cs'};
285 $cs = $self->{
'to_cs'};
288 unless ( defined $hash ) {
289 throw(
"Type $type is neither to or from coordinate systems");
295 if ( !defined $hash->{ uc($id) } ) {
298 if ($include_original_region) {
299 push @paired_result, {
'original' => $gap,
'mapped' => $gap };
300 return @paired_result;
309 # if we don't already have an interval tree built for this id,
311 if ( !defined( $self->{
"_tree_$type"}->{ uc($id) } )) {
312 $self->{
"_tree_$type"}->{ uc($id) } = _build_immutable_tree($from,
315 # query the interval tree (either cached or created new) for overlapping intervals
316 my $overlap = $self->{
"_tree_$type"}->{ uc($id) }->query($start, $end);
319 my $orig_start = $start;
320 my $last_target_coord = undef;
321 foreach my $i (@{$overlap}) {
323 my $self_coord = $pair->{$from};
324 my $target_coord = $pair->{$to};
327 # But not the case for haplotypes!! need to test for this case???
328 # so removing this till a better solution is found
331 # if($self_coord->{'start'} < $start){
332 # $start = $orig_start;
336 if ( defined($last_target_coord) && $target_coord->{
'id'} ne $last_target_coord ) {
337 if ( $self_coord->{
'start'} < $start ) {
338 # i.e. the same bit is being mapped to another assembled bit
339 $start = $orig_start;
342 $last_target_coord = $target_coord->{
'id'};
345 if ( $start < $self_coord->{
'start'} ) {
348 push( @result, $gap );
349 push(@paired_result, {
'original' => $gap,
'mapped' => $gap });
351 $start = $gap->{
'end'} + 1;
353 my ( $target_start, $target_end);
354 my ( $ori_start, $ori_end);
357 if ( exists $pair->{
'indel'} ) {
358 # When next pair is an IndelPair and not a Coordinate, create the
359 # new mapping Coordinate, the IndelCoordinate.
360 $target_start = $target_coord->{
'start'};
361 $target_end = $target_coord->{
'end'};
363 #original coordinates
364 $ori_start = $self_coord->{
'start'};
365 $ori_end = $self_coord->{
'end'};
369 ( $self_coord->{
'end'} < $end ? $self_coord->{
'end'} : $end ) );
370 #create the Coordinate object
373 $target_start, $target_end, $pair->{
'ori'}*$strand, $cs );
374 #and finally, the IndelCoordinate object with
378 # start is somewhere inside the region
379 if ( $pair->{
'ori'} == 1 ) {
380 $target_start = $target_coord->{
'start'} + ( $start - $self_coord->{
'start'} );
382 $target_end = $target_coord->{
'end'} - ( $start - $self_coord->{
'start'} );
385 # Either we are enveloping this map or not. If yes, then end
386 # point (self perspective) is determined solely by target. If
387 # not we need to adjust.
389 if ( $end > $self_coord->{
'end'} ) {
391 if ( $pair->{
'ori'} == 1 ) {
392 $target_end = $target_coord->{
'end'};
394 $target_start = $target_coord->{
'start'};
398 if ( $pair->{
'ori'} == 1 ) {
400 $target_coord->{
'start'} +
401 ( $end - $self_coord->{
'start'} );
405 $target_coord->{
'end'} - ( $end - $self_coord->{
'start'} );
411 $target_start, $target_end, $pair->{
'ori'}*$strand,$cs, $rank );
413 #save the ori coordinate info
414 $ori_start = ($start - $cdna_coding_start) + 1;
415 $ori_end = $ori_start + ($target_end - $target_start);
418 } ## end
else [
if ( exists $pair->{
'indel'...})]
420 push( @result, $res );
422 $ori_start, $ori_end, $pair->{
'ori'}*$strand,$cs, $rank);
423 push(@paired_result, {
'original' => $res_ori,
'mapped' => $res });
426 $last_used_pair = $pair;
427 $start = $self_coord->{
'end'} + 1;
429 } ## end
for ( my $i = $start_idx...)
431 if ( !defined $last_used_pair ) {
433 push( @result, $gap );
434 push(@paired_result, {
'original' => $gap,
'mapped' => $gap });
437 } elsif ( $last_used_pair->{$from}->{
'end'} < $end ) {
440 $last_used_pair->{$from}->{
'end'} + 1,
442 push( @result, $gap );
444 push(@paired_result, {
'original' => $gap,
'mapped' => $gap });
447 if ( $strand == -1 ) {
448 @result = reverse(@result);
449 @paired_result = reverse(@paired_result);
452 if ($include_original_region){
453 return @paired_result;
458 } ## end sub map_coordinates
465 Arg [2] :
int $start - start coord. Since
this is an insert should always
466 be one greater than end.
467 Arg [3] :
int $end - end coord. Since
this is an insert should always
468 be one less than start.
469 Arg [4] :
int $strand (0, 1, -1)
470 Arg [5] :
string $type - the coordinate system name the coords are from.
471 Arg [6] :
boolean $fastmap - if specified, this is being called from
472 the fastmap call. The mapping done is not any faster for
473 inserts, but the return value is different.
475 Description: This is in internal function which handles the special mapping
476 case for inserts (start = end +1). This function will be called
477 automatically by the
map function so there is no reason to
479 Returntype : list of
Bio::
EnsEMBL::Mapper::Coordinate and/or Gap objects
481 Caller : map_coordinates()
486 my ($self, $id, $start, $end, $strand, $type, $fastmap, $include_original_region) = @_;
488 # swap start/end and map the resultant 2bp coordinate
489 ($start, $end) =($end,$start);
491 my @coords = $self->map_coordinates($id, $start, $end, $strand, $type, $include_original_region);
495 if ($include_original_region) {
496 my $m = $c->{
'mapped'};
497 my $orig = $c->{
'original'};
498 ($m->{
'start'}, $m->{
'end'}) = ($m->{
'end'}, $m->{
'start'});
499 ($orig->{
'start'}, $orig->{
'end'}) = ($orig->{
'end'}, $orig->{
'start'});
501 ($c->{
'start'}, $c->{
'end'}) = ($c->{
'end'}, $c->{
'start'});
504 throw(
"Unexpected: Got ",scalar(@coords),
" expected 2.")
if(@coords != 2);
506 # adjust coordinates, remove gaps
512 ($c1, $c2) = @coords;
517 if ($include_original_region) {
518 ($m1, $m2) = ($c1->{
'mapped'}, $c2->{
'mapped'});
520 ($m1, $m2) = ($c1, $c2);
522 if(ref($m1) eq
'Bio::EnsEMBL::Mapper::Coordinate') {
523 # insert is after first coord
524 if($m1->{
'strand'} * $strand == -1) {
529 if ($include_original_region) {
530 $c1->{
'mapped'} = $m1;
536 if(ref($m2) eq
'Bio::EnsEMBL::Mapper::Coordinate') {
537 # insert is before second coord
538 if($m2->{
'strand'} * $strand == -1) {
544 if ($include_original_region) {
545 $c2->{
'mapped'} = $m2;
546 unshift @coords, $c2;
548 unshift @coords, $m2;
551 if ($include_original_region) {
552 $c2->{
'mapped'} = $m2;
560 # as with @coords == 1 above, if we are in
561 # include_original_region mode, then we have
562 # to flip back the original region as well
563 if ($include_original_region) {
564 foreach my $coord (@coords) {
565 my $orig = $coord->{
'original'};
566 ($orig->{
'start'}, $orig->{
'end'}) =
567 ($orig->{
'end'}, $orig->{
'start'});
573 return undef
if(@coords != 1);
574 my $c = $include_original_region ? $coords[0]->{
'mapped'} : $coords[0];
575 return ($c->{
'id'}, $c->{
'start'}, $c->{
'end'},
576 $c->{
'strand'}, $c->{
'coord_system'});
590 id of
'source' sequence
592 start coordinate of
'source' sequence
594 end coordinate of
'source' sequence
596 raw contig orientation (+/- 1)
598 nature of transform - gives the type of
599 coordinates to be transformed *from*
600 Function inferior
map method. Will only do ungapped unsplit mapping.
601 Will return
id, start, end strand in a list.
602 Returntype list of results
609 my ($self, $id, $start, $end, $strand, $type) = @_;
611 my ($from, $to, $cs);
613 if($end+1 == $start) {
614 return $self->map_insert($id, $start, $end, $strand, $type, 1);
617 if( ! $self->{
'_is_sorted'} ) { $self->_sort() }
619 if($type eq $self->{
'to'}) {
622 $cs = $self->{
'from_cs'};
626 $cs = $self->{
'to_cs'};
629 my $hash = $self->{
"_pair_$type"} or
630 throw(
"Type $type is neither to or from coordinate systems");
633 my $pairs = $hash->{uc($id)};
635 foreach my $pair (@$pairs) {
636 my $self_coord = $pair->{$from};
637 my $target_coord = $pair->{$to};
639 # only super easy mapping is done
640 if( $start < $self_coord->{
'start'} ||
641 $end > $self_coord->{
'end'} ) {
645 if( $pair->{
'ori'} == 1 ) {
646 return ( $target_coord->{
'id'},
647 $target_coord->{
'start'}+$start-$self_coord->{
'start'},
648 $target_coord->{
'start'}+$end-$self_coord->{
'start'},
651 return ( $target_coord->{
'id'},
652 $target_coord->{
'end'} - ($end - $self_coord->{
'start'}),
653 $target_coord->{
'end'} - ($start - $self_coord->{
'start'}),
663 =head2 add_map_coordinates
666 id of
'source' sequence
668 start coordinate of
'source' sequence
670 end coordinate of
'source' sequence
672 relative orientation of source and target (+/- 1)
674 id of 'target' sequence
676 start coordinate of 'target' sequence
678 end coordinate of 'target' sequence
679 Function Stores details of mapping between
680 'source' and 'target' regions.
687 sub add_map_coordinates {
688 my ( $self, $contig_id, $contig_start, $contig_end, $contig_ori,
689 $chr_name, $chr_start, $chr_end )
692 unless ( defined($contig_id)
693 && defined($contig_start)
694 && defined($contig_end)
695 && defined($contig_ori)
696 && defined($chr_name)
697 && defined($chr_start)
698 && defined($chr_end) )
700 throw(
"7 arguments expected");
703 if ( ( $chr_end > $chr_start ) and ( $contig_end - $contig_start ) != ( $chr_end - $chr_start ) ) {
704 throw(
"Cannot deal with mis-lengthed mappings so far");
714 # place into hash on both ids
715 my $map_to = $self->{
'to'};
716 my $map_from = $self->{
'from'};
718 push( @{ $self->{
"_pair_$map_to"}->{ uc($chr_name) } }, $pair );
719 push( @{ $self->{
"_pair_$map_from"}->{ uc($contig_id) } }, $pair );
721 # any interval trees for this set of intervals are now invalid
723 $self->{
"_tree_$map_to"}->{ uc($contig_id) } = undef;
724 $self->{
"_tree_$map_from"}->{ uc($contig_id) } = undef;
726 $self->{
'pair_count'}++;
727 $self->{
'_is_sorted'} = 0;
728 } ## end sub add_map_coordinates
731 =head2 add_indel_coordinates
734 id of
'source' sequence
736 start coordinate of
'source' sequence
738 end coordinate of
'source' sequence
740 relative orientation of source and target (+/- 1)
742 id of 'targe' sequence
744 start coordinate of 'targe' sequence
746 end coordinate of 'targe' sequence
747 Function stores details of mapping between two regions:
748 'source' and 'target'. Returns 1 if the pair was added, 0 if it
749 was already in. Used when adding an indel
756 sub add_indel_coordinates{
757 my ($self, $contig_id, $contig_start, $contig_end,
758 $contig_ori, $chr_name, $chr_start, $chr_end) = @_;
760 unless(defined($contig_id) && defined($contig_start) && defined($contig_end)
761 && defined($contig_ori) && defined($chr_name) && defined($chr_start)
762 && defined($chr_end)) {
763 throw(
"7 arguments expected");
766 #we need to create the IndelPair object to add to both lists, to and from
774 # place into hash on both ids
775 my $map_to = $self->{
'to'};
776 my $map_from = $self->{
'from'};
778 push( @{$self->{
"_pair_$map_to"}->{uc($chr_name)}}, $pair );
779 push( @{$self->{
"_pair_$map_from"}->{uc($contig_id)}}, $pair );
781 # any interval trees for this set of intervals are now invalid
783 $self->{
"_tree_$map_to"}->{ uc($chr_name) } = undef;
784 $self->{
"_tree_$map_from"}->{ uc($contig_id) } = undef;
786 $self->{
'pair_count'}++;
788 $self->{
'_is_sorted'} = 0;
796 Arg [2] :
int $start - start coord. Since
this is an indel should always
797 be one greater than end.
798 Arg [3] :
int $end - end coord. Since
this is an indel should always
799 be one less than start.
800 Arg [4] :
int $strand (0, 1, -1)
801 Arg [5] :
string $type - the coordinate system name the coords are from.
802 Example : @coords = $mapper->map_indel();
803 Description: This is in internal function which handles the special mapping
804 case for indels (start = end +1). It will be used to
map from
805 a coordinate system with a gap to another that contains an
806 insertion. It will be mainly used by the Variation API.
807 Returntype :
Bio::
EnsEMBL::Mapper::Unit objects
814 my ( $self, $id, $start, $end, $strand, $type ) = @_;
816 # swap start/end and map the resultant 2bp coordinate
817 ( $start, $end ) = ( $end, $start );
819 if ( !$self->{
'_is_sorted'} ) { $self->_sort() }
821 my $hash = $self->{
"_pair_$type"};
823 my ( $from, $to, $cs );
825 if ( $type eq $self->{
'to'} ) {
828 $cs = $self->{
'from_cs'};
832 $cs = $self->{
'to_cs'};
835 unless ( defined $hash ) {
836 throw(
"Type $type is neither to or from coordinate systems");
838 my @indel_coordinates;
840 # if we don't already have an interval tree built for this id,
842 if ( !defined $self->{
"_tree_$type"}->{ uc($id) } ) {
843 $self->{
"_tree_$type"}->{ uc($id) } = _build_immutable_tree($from,
846 # query the interval tree (either cached or created new) for overlapping intervals
847 my $overlap = $self->{
"_tree_$type"}->{ uc($id) }->query($start, $end);
849 foreach my $i (@{$overlap}) {
851 my $self_coord = $pair->{$from};
852 my $target_coord = $pair->{$to};
854 if ( exists $pair->{
'indel'} ) {
855 #need to return unit coordinate
858 $target_coord->{
'start'},
859 $target_coord->{
'end'}, );
860 push @indel_coordinates, $to;
865 return @indel_coordinates;
866 } ## end sub map_indel
872 Example $mapper->add_Mapper($mapper2)
873 Function add all the
map coordinates from $mapper to
this mapper.
874 This
object will contain mapping pairs from both the old
877 Exceptions
throw if 'to' and
'from' from both Bio::EnsEMBL::Mappers
879 Caller $mapper->methodname()
884 my ($self, $mapper) = @_;
886 my $mapper_to = $mapper->{
'to'};
887 my $mapper_from = $mapper->{
'from'};
888 if ($mapper_to ne $self->{
'to'} or $mapper_from ne $self->{
'from'}) {
889 throw(
"Trying to add an incompatible Mapper");
893 foreach my $seq_name (keys %{$mapper->{
"_pair_$mapper_to"}}) {
894 push(@{$self->{
"_pair_$mapper_to"}->{$seq_name}},
895 @{$mapper->{
"_pair_$mapper_to"}->{$seq_name}});
896 $self->{
"_tree_$mapper_to"}->{ uc($seq_name) } = undef;
897 $count_a += scalar(@{$mapper->{
"_pair_$mapper_to"}->{$seq_name}});
900 foreach my $seq_name (keys %{$mapper->{
"_pair_$mapper_from"}}) {
901 push(@{$self->{
"_pair_$mapper_from"}->{$seq_name}},
902 @{$mapper->{
"_pair_$mapper_from"}->{$seq_name}});
903 $self->{
"_tree_$mapper_from"}->{ uc($seq_name) } = undef;
904 $count_b += scalar(@{$mapper->{
"_pair_$mapper_from"}->{$seq_name}});
907 if ($count_a == $count_b) {
908 $self->{
'pair_count'} += $count_a;
910 throw(
"Trying to add a funny Mapper");
913 $self->{
'_is_sorted'} = 0;
922 id of
'source' sequence
924 start coordinate of
'source' sequence
926 end coordinate of
'source' sequence
928 nature of transform - gives the type of
929 coordinates to be transformed *from*
930 Function list all pairs of mappings in a region
938 my ( $self, $id, $start, $end, $type ) = @_;
940 if ( !$self->{
'_is_sorted'} ) { $self->_sort() }
942 if ( !defined $type ) {
943 throw(
"Expected 4 arguments");
946 if ( $start > $end ) {
947 throw(
"Start is greater than end "
948 .
"for id $id, start $start, end $end\n" );
951 my $hash = $self->{
"_pair_$type"};
955 if ( $type eq $self->{
'to'} ) {
963 unless ( defined $hash ) {
964 throw(
"Type $type is neither to or from coordinate systems");
969 unless ( exists $hash->{ uc($id) } ) {
973 @list = @{ $hash->{ uc($id) } };
976 if ( $start == -1 && $end == -1 ) {
980 foreach my $p (@list) {
982 if ( $p->{$from}->{
'end'} < $start ) {
985 if ( $p->{$from}->{
'start'} > $end ) {
992 } ## end sub list_pairs
998 id of
'source' sequence
999 Function accessor method form the
'source'
1000 and
'target' in a Mapper::Pair
1008 my ( $self, $value ) = @_;
1010 if ( defined($value) ) {
1011 $self->{
'to'} = $value;
1014 return $self->{
'to'};
1020 id of
'source' sequence
1021 Function accessor method form the
'source'
1022 and
'target' in a Mapper::Pair
1029 my ( $self, $value ) = @_;
1031 if ( defined($value) ) {
1032 $self->{
'from'} = $value;
1035 return $self->{
'from'};
1041 # Arg 1 *FileHandle $fh
1042 # Function convenience dump function
1043 # possibly useful for debugging
1050 my ($self,$fh) = @_;
1052 if( !defined $fh ) {
1056 my $from = $self->{
'from'};
1057 my $to = $self->{
'to'};
1059 print $fh
"dumping from-hash _pair_$from\n";
1060 foreach my $id ( keys %{$self->{
"_pair_$from"}} ) {
1061 print $fh
"{_pair_$from}->{" . uc($id) .
"}:\n";
1062 foreach my $pair ( @{$self->{
"_pair_$from"}->{uc($id)}} ) {
1063 print $fh
" ",$pair->from->start,
" ",$pair->from->end,
":",$pair->to->start,
" ",$pair->to->end,
" ",$pair->to->id,
"\n";
1065 if (defined($self->{
"_tree_$from"}->{uc($id)})) {
1066 print $fh
"{_tree_$from}->{" . uc($id) .
"} instantiated\n";
1068 print $fh
"{_tree_$from}->{" . uc($id) .
"} empty\n";
1071 print $fh
"dumping to-hash _pair_$to\n";
1072 foreach my $id ( keys %{$self->{
"_pair_$to"}} ) {
1073 print $fh
"{_pair_$to}->{" . uc($id) .
"}:\n";
1074 foreach my $pair ( @{$self->{
"_pair_$to"}->{uc($id)}} ) {
1075 print $fh
" ",$pair->to->start,
" ",$pair->to->end,
":",$pair->from->start,
" ",$pair->from->end,
" ",$pair->from->id,
"\n";
1077 if (defined($self->{
"_tree_$to"}->{uc($id)})) {
1078 print $fh
"{_tree_$to}->{" . uc($id) .
"} instantiated\n";
1080 print $fh
"{_tree_$to}->{" . uc($id) .
"} empty\n";
1088 # Function sort function so that all
1089 # mappings are sorted by
1099 my $to = $self->{
'to'};
1100 my $from = $self->{
'from'};
1102 foreach my $id ( keys %{ $self->{
"_pair_$from"} } ) {
1103 @{ $self->{
"_pair_$from"}->{$id} } =
1104 sort { $a->{
'from'}->{
'start'} <=> $b->{
'from'}->{
'start'} }
1105 @{ $self->{
"_pair_$from"}->{$id} };
1108 foreach my $id ( keys %{ $self->{
"_pair_$to"} } ) {
1109 @{ $self->{
"_pair_$to"}->{$id} } =
1110 sort { $a->{
'to'}->{
'start'} <=> $b->{
'to'}->{
'start'} }
1111 @{ $self->{
"_pair_$to"}->{$id} };
1114 $self->_merge_pairs();
1115 $self->_is_sorted(1);
1118 # this function merges pairs that are adjacent into one
1122 my ( $lr, $lr_from, $del_pair, $next_pair, $current_pair );
1124 my $map_to = $self->{
'to'};
1125 my $map_from = $self->{
'from'};
1126 $self->{
'pair_count'} = 0;
1128 for my $key ( keys %{$self->{
"_pair_$map_to"}} ) {
1129 # merging pairs means all interval trees for
1130 # this set of intervals are now invalid, so delete them
1131 $self->{
"_tree_$map_to"}->{ uc($key) } = undef;
1132 $lr = $self->{
"_pair_$map_to"}->{$key};
1136 my $length = $#{$lr};
1137 while( $next <= $length ) {
1138 $current_pair = $lr->[$i];
1139 $next_pair = $lr->[$next];
1142 if(exists $current_pair->{
'indel'} || exists $next_pair->{
'indel'}){
1143 #necessary to modify the merge function to not merge indels
1149 if( $current_pair->{
'to'}->{
'start'} == $next_pair->{
'to'}->{
'start'}
1150 && $current_pair->{
'from'}->{
'id'} == $next_pair->{
'from'}->{
'id'}
1151 && $current_pair->{
'from'}->{
'start'} == $next_pair->{
'from'}->{
'start'}) {
1152 # Modified in e75 to support GRCh38. Contigs used repeatedly in one seq region were
1153 # being pre-emptively deleted as copies. Extra from-start condition above ensures copy
1154 # deletion is restricted to same-location copies. Even more stringent checks can be made
1156 $del_pair = $next_pair;
1157 } elsif ( ( $current_pair->{
'from'}->{
'id'} eq $next_pair->{
'from'}->{
'id'} ) &&
1158 ( $next_pair->{
'ori'} == $current_pair->{
'ori'} ) &&
1159 ( $next_pair->{
'to'}->{
'start'} -1 == $current_pair->{
'to'}->{
'end'} )) {
1161 if( $current_pair->{
'ori'} == 1 ) {
1162 # check forward strand merge
1163 if( $next_pair->{
'from'}->{
'start'} - 1 == $current_pair->{
'from'}->{
'end'} ) {
1164 # normal merge with previous element
1165 $current_pair->{
'to'}->{
'end'} = $next_pair->{
'to'}->{
'end'};
1166 $current_pair->{
'from'}->{
'end'} = $next_pair->{
'from'}->{
'end'};
1167 $del_pair = $next_pair;
1170 # check backward strand merge
1171 if( $next_pair->{
'from'}->{
'end'} + 1 == $current_pair->{
'from'}->{
'start'} ) {
1173 $current_pair->{
'to'}->{
'end'} = $next_pair->{
'to'}->{
'end'};
1174 $current_pair->{
'from'}->{
'start'} = $next_pair->{
'from'}->{
'start'};
1175 $del_pair = $next_pair;
1180 if( defined $del_pair ) {
1181 splice( @$lr, $next, 1 );
1182 $lr_from = $self->{
"_pair_$map_from"}->{uc($del_pair->{
'from'}->{
'id'})};
1183 for( my $j=0; $j <= $#{$lr_from}; $j++ ) {
1184 if( $lr_from->[$j] == $del_pair ) {
1185 splice( @$lr_from, $j, 1 );
1190 if( $length < $next ) { last; }
1199 $self->{
'pair_count'} += scalar( @$lr );
1207 # Function toggle for whether the (internal)
1208 # map data are sorted
1215 my ($self, $value) = @_;
1216 $self->{
'_is_sorted'} = $value
if (defined($value));
1217 return $self->{
'_is_sorted'};
1220 # _build_immutable_tree
1222 # Arg 1 string $pair_side - the from or to half of each pair to be
1223 # the source of intervals
1224 # Arg 2 listref $pair_list - a list of Bio::EnsEMBL::Mapper::Pair
1225 # Function builds a Bio::EnsEMBL::Utils::Tree::Interval::Immutable with
1226 # intervals corresponding to the chosen side (from or to) of
1227 # each Pair in $pair_list and a pointer to each Pair
1228 # Returntype Bio::EnsEMBL::Utils::Tree::Interval::Immutable
1232 sub _build_immutable_tree {
1233 my ($pair_side, $pair_list) = @_;
1234 # create set of intervals for the tree
1237 foreach my $i (@{$pair_list}) {
1238 my $start = $i->{$pair_side}{start};
1239 my $end = $i->{$pair_side}{end};
1241 if ($end < $start) {
1242 ($end, $start) = ($start, $end);
1248 # Create the interval tree defined on the above set of intervals
1250 # As of release/99, we have more experience with the immutable
1251 # interval tree, so we will stick with this one. A future
1252 # refactoring effort may wish to replace this with a dynamically
1253 # maintained mutable interval tree, rather than simply throwing
1254 # trees away and rebuilding when the underlying interval set changes