3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
39 # add a coodinate mapping - supply two pairs or coordinates
40 $map->add_map_coordinates(
41 $contig_id, $contig_start, $contig_end, $contig_ori,
42 $chr_name, chr_start, $chr_end
45 # map from one coordinate system to another
47 $mapper->map_coordinates( 627012, 2, 5, -1,
"rawcontig" );
51 Generic mapper to provide coordinate transforms between two disjoint
52 coordinate systems. This mapper is intended to be
'context neutral' - in
53 that it does not contain any code relating to any particular coordinate
56 Mappings consist of pairs of
'to-' and
'from-' contigs with coordinates on each.
57 Orientation is abbreviated to
'ori',
59 The contig pair hash is divided into mappings per seq_region, the code
60 below makes assumptions
about how to filter these results, thus the comparisons
61 for some properties are absent in the code but implicit by data structure.
63 The assembly mapping hash
'_pair_last' orders itself by the target seq region and looks like
this:
65 1 => ARRAY(0x1024c79c0)
86 2 => ARRAY(0x1025ee460)
108 The other mapping hash available is the reverse sense, putting the
'from' seq_region as the
109 sorting key. Here is an excerpt.
112 4 => ARRAY(0x1025ee028)
139 package Bio::EnsEMBL::Mapper;
157 Arg [1] :
string $from
158 The name of the
'from' coordinate system
160 The name of the
'to' coordinate system
162 The
'from' coordinate system
173 my ( $proto, $from, $to, $from_cs, $to_cs ) = @_;
175 if ( !defined($to) || !defined($from) ) {
176 throw(
"Must supply 'to' and 'from' tags");
179 my $class = ref($proto) || $proto;
181 my $self = bless( {
"_pair_$from" => {},
189 'from_cs' => $from_cs
193 # do sql to get any componente with muliple assemblys.
202 Description: removes all cached information out of
this mapper
211 my $from = $self->from();
212 my $to = $self->to();
214 $self->{
"_pair_$from"} = {};
215 $self->{
"_pair_$to"} = {};
216 $self->{
"_tree_$from"} = {};
217 $self->{
"_tree_$to"} = {};
219 $self->{
'pair_count'} = 0;
224 =head2 map_coordinates
227 id of
'source' sequence
229 start coordinate of
'source' sequence
231 end coordinate of
'source' sequence
233 raw contig orientation (+/- 1)
235 nature of transform - gives the type of
236 coordinates to be transformed *from*
237 Arg 6
boolean (0 or 1) $include_original_region
238 option to include original input coordinate region mappings in the result
239 Arg 7
int $cdna_coding_start
241 Function generic
map method
242 Returntype if $include_original_region == 0
243 array of mappped
Bio::
EnsEMBL::Mapper::Coordinate
245 if $include_original_region == 1
246 hash of mapped and original
Bio::
EnsEMBL::Mapper::Coordinate
253 sub map_coordinates {
254 my ( $self, $id, $start, $end, $strand, $type, $include_original_region, $cdna_coding_start ) = @_;
255 unless ( defined($id)
261 throw(
"Expecting at least 5 arguments");
264 $cdna_coding_start = defined $cdna_coding_start ? $cdna_coding_start : 1;
266 # special case for handling inserts:
267 if ( $start == $end + 1 ) {
268 return $self->map_insert( $id, $start, $end, $strand, $type, undef, $include_original_region);
271 if ( !$self->{
'_is_sorted'} ) { $self->_sort() }
273 my $hash = $self->{
"_pair_$type"};
275 my ( $from, $to, $cs );
277 if ( $type eq $self->{
'to'} ) {
280 $cs = $self->{
'from_cs'};
284 $cs = $self->{
'to_cs'};
287 unless ( defined $hash ) {
288 throw(
"Type $type is neither to or from coordinate systems");
294 if ( !defined $hash->{ uc($id) } ) {
297 if ($include_original_region) {
298 push @paired_result, {
'original' => $gap,
'mapped' => $gap };
299 return @paired_result;
308 # if we don't already have an interval tree built for this id,
310 if ( !defined( $self->{
"_tree_$type"}->{ uc($id) } )) {
311 $self->{
"_tree_$type"}->{ uc($id) } = _build_immutable_tree($from,
314 # query the interval tree (either cached or created new) for overlapping intervals
315 my $overlap = $self->{
"_tree_$type"}->{ uc($id) }->query($start, $end);
318 my $orig_start = $start;
319 my $last_target_coord = undef;
320 foreach my $i (@{$overlap}) {
322 my $self_coord = $pair->{$from};
323 my $target_coord = $pair->{$to};
326 # But not the case for haplotypes!! need to test for this case???
327 # so removing this till a better solution is found
330 # if($self_coord->{'start'} < $start){
331 # $start = $orig_start;
335 if ( defined($last_target_coord) && $target_coord->{
'id'} ne $last_target_coord ) {
336 if ( $self_coord->{
'start'} < $start ) {
337 # i.e. the same bit is being mapped to another assembled bit
338 $start = $orig_start;
341 $last_target_coord = $target_coord->{
'id'};
344 if ( $start < $self_coord->{
'start'} ) {
347 push( @result, $gap );
348 push(@paired_result, {
'original' => $gap,
'mapped' => $gap });
350 $start = $gap->{
'end'} + 1;
352 my ( $target_start, $target_end);
353 my ( $ori_start, $ori_end);
356 if ( exists $pair->{
'indel'} ) {
357 # When next pair is an IndelPair and not a Coordinate, create the
358 # new mapping Coordinate, the IndelCoordinate.
359 $target_start = $target_coord->{
'start'};
360 $target_end = $target_coord->{
'end'};
362 #original coordinates
363 $ori_start = $self_coord->{
'start'};
364 $ori_end = $self_coord->{
'end'};
368 ( $self_coord->{
'end'} < $end ? $self_coord->{
'end'} : $end ) );
369 #create the Coordinate object
372 $target_start, $target_end, $pair->{
'ori'}*$strand, $cs );
373 #and finally, the IndelCoordinate object with
377 # start is somewhere inside the region
378 if ( $pair->{
'ori'} == 1 ) {
379 $target_start = $target_coord->{
'start'} + ( $start - $self_coord->{
'start'} );
381 $target_end = $target_coord->{
'end'} - ( $start - $self_coord->{
'start'} );
384 # Either we are enveloping this map or not. If yes, then end
385 # point (self perspective) is determined solely by target. If
386 # not we need to adjust.
388 if ( $end > $self_coord->{
'end'} ) {
390 if ( $pair->{
'ori'} == 1 ) {
391 $target_end = $target_coord->{
'end'};
393 $target_start = $target_coord->{
'start'};
397 if ( $pair->{
'ori'} == 1 ) {
399 $target_coord->{
'start'} +
400 ( $end - $self_coord->{
'start'} );
404 $target_coord->{
'end'} - ( $end - $self_coord->{
'start'} );
410 $target_start, $target_end, $pair->{
'ori'}*$strand,$cs, $rank );
412 #save the ori coordinate info
413 $ori_start = ($start - $cdna_coding_start) + 1;
414 $ori_end = $ori_start + ($target_end - $target_start);
417 } ## end
else [
if ( exists $pair->{
'indel'...})]
419 push( @result, $res );
421 $ori_start, $ori_end, $pair->{
'ori'}*$strand,$cs, $rank);
422 push(@paired_result, {
'original' => $res_ori,
'mapped' => $res });
425 $last_used_pair = $pair;
426 $start = $self_coord->{
'end'} + 1;
428 } ## end
for ( my $i = $start_idx...)
430 if ( !defined $last_used_pair ) {
432 push( @result, $gap );
433 push(@paired_result, {
'original' => $gap,
'mapped' => $gap });
436 } elsif ( $last_used_pair->{$from}->{
'end'} < $end ) {
439 $last_used_pair->{$from}->{
'end'} + 1,
441 push( @result, $gap );
443 push(@paired_result, {
'original' => $gap,
'mapped' => $gap });
446 if ( $strand == -1 ) {
447 @result = reverse(@result);
448 @paired_result = reverse(@paired_result);
451 if ($include_original_region){
452 return @paired_result;
457 } ## end sub map_coordinates
464 Arg [2] :
int $start - start coord. Since
this is an insert should always
465 be one greater than end.
466 Arg [3] :
int $end - end coord. Since
this is an insert should always
467 be one less than start.
468 Arg [4] :
int $strand (0, 1, -1)
469 Arg [5] :
string $type - the coordinate system name the coords are from.
470 Arg [6] :
boolean $fastmap - if specified, this is being called from
471 the fastmap call. The mapping done is not any faster for
472 inserts, but the return value is different.
474 Description: This is in internal function which handles the special mapping
475 case for inserts (start = end +1). This function will be called
476 automatically by the
map function so there is no reason to
478 Returntype : list of
Bio::
EnsEMBL::Mapper::Coordinate and/or Gap objects
480 Caller : map_coordinates()
485 my ($self, $id, $start, $end, $strand, $type, $fastmap, $include_original_region) = @_;
487 # swap start/end and map the resultant 2bp coordinate
488 ($start, $end) =($end,$start);
490 my @coords = $self->map_coordinates($id, $start, $end, $strand, $type, $include_original_region);
494 if ($include_original_region) {
495 my $m = $c->{
'mapped'};
496 my $orig = $c->{
'original'};
497 ($m->{
'start'}, $m->{
'end'}) = ($m->{
'end'}, $m->{
'start'});
498 ($orig->{
'start'}, $orig->{
'end'}) = ($orig->{
'end'}, $orig->{
'start'});
500 ($c->{
'start'}, $c->{
'end'}) = ($c->{
'end'}, $c->{
'start'});
503 throw(
"Unexpected: Got ",scalar(@coords),
" expected 2.")
if(@coords != 2);
505 # adjust coordinates, remove gaps
511 ($c1, $c2) = @coords;
516 if ($include_original_region) {
517 ($m1, $m2) = ($c1->{
'mapped'}, $c2->{
'mapped'});
519 ($m1, $m2) = ($c1, $c2);
521 if(ref($m1) eq
'Bio::EnsEMBL::Mapper::Coordinate') {
522 # insert is after first coord
523 if($m1->{
'strand'} * $strand == -1) {
528 if ($include_original_region) {
529 $c1->{
'mapped'} = $m1;
535 if(ref($m2) eq
'Bio::EnsEMBL::Mapper::Coordinate') {
536 # insert is before second coord
537 if($m2->{
'strand'} * $strand == -1) {
543 if ($include_original_region) {
544 $c2->{
'mapped'} = $m2;
545 unshift @coords, $c2;
547 unshift @coords, $m2;
550 if ($include_original_region) {
551 $c2->{
'mapped'} = $m2;
559 # as with @coords == 1 above, if we are in
560 # include_original_region mode, then we have
561 # to flip back the original region as well
562 if ($include_original_region) {
563 foreach my $coord (@coords) {
564 my $orig = $coord->{
'original'};
565 ($orig->{
'start'}, $orig->{
'end'}) =
566 ($orig->{
'end'}, $orig->{
'start'});
572 return undef
if(@coords != 1);
573 my $c = $include_original_region ? $coords[0]->{
'mapped'} : $coords[0];
574 return ($c->{
'id'}, $c->{
'start'}, $c->{
'end'},
575 $c->{
'strand'}, $c->{
'coord_system'});
589 id of
'source' sequence
591 start coordinate of
'source' sequence
593 end coordinate of
'source' sequence
595 raw contig orientation (+/- 1)
597 nature of transform - gives the type of
598 coordinates to be transformed *from*
599 Function inferior
map method. Will only do ungapped unsplit mapping.
600 Will return
id, start, end strand in a list.
601 Returntype list of results
608 my ($self, $id, $start, $end, $strand, $type) = @_;
610 my ($from, $to, $cs);
612 if($end+1 == $start) {
613 return $self->map_insert($id, $start, $end, $strand, $type, 1);
616 if( ! $self->{
'_is_sorted'} ) { $self->_sort() }
618 if($type eq $self->{
'to'}) {
621 $cs = $self->{
'from_cs'};
625 $cs = $self->{
'to_cs'};
628 my $hash = $self->{
"_pair_$type"} or
629 throw(
"Type $type is neither to or from coordinate systems");
632 my $pairs = $hash->{uc($id)};
634 foreach my $pair (@$pairs) {
635 my $self_coord = $pair->{$from};
636 my $target_coord = $pair->{$to};
638 # only super easy mapping is done
639 if( $start < $self_coord->{
'start'} ||
640 $end > $self_coord->{
'end'} ) {
644 if( $pair->{
'ori'} == 1 ) {
645 return ( $target_coord->{
'id'},
646 $target_coord->{
'start'}+$start-$self_coord->{
'start'},
647 $target_coord->{
'start'}+$end-$self_coord->{
'start'},
650 return ( $target_coord->{
'id'},
651 $target_coord->{
'end'} - ($end - $self_coord->{
'start'}),
652 $target_coord->{
'end'} - ($start - $self_coord->{
'start'}),
662 =head2 add_map_coordinates
665 id of
'source' sequence
667 start coordinate of
'source' sequence
669 end coordinate of
'source' sequence
671 relative orientation of source and target (+/- 1)
673 id of 'target' sequence
675 start coordinate of 'target' sequence
677 end coordinate of 'target' sequence
678 Function Stores details of mapping between
679 'source' and 'target' regions.
686 sub add_map_coordinates {
687 my ( $self, $contig_id, $contig_start, $contig_end, $contig_ori,
688 $chr_name, $chr_start, $chr_end )
691 unless ( defined($contig_id)
692 && defined($contig_start)
693 && defined($contig_end)
694 && defined($contig_ori)
695 && defined($chr_name)
696 && defined($chr_start)
697 && defined($chr_end) )
699 throw(
"7 arguments expected");
702 if ( ( $chr_end > $chr_start ) and ( $contig_end - $contig_start ) != ( $chr_end - $chr_start ) ) {
703 throw(
"Cannot deal with mis-lengthed mappings so far");
713 # place into hash on both ids
714 my $map_to = $self->{
'to'};
715 my $map_from = $self->{
'from'};
717 push( @{ $self->{
"_pair_$map_to"}->{ uc($chr_name) } }, $pair );
718 push( @{ $self->{
"_pair_$map_from"}->{ uc($contig_id) } }, $pair );
720 # any interval trees for this set of intervals are now invalid
722 $self->{
"_tree_$map_to"}->{ uc($contig_id) } = undef;
723 $self->{
"_tree_$map_from"}->{ uc($contig_id) } = undef;
725 $self->{
'pair_count'}++;
726 $self->{
'_is_sorted'} = 0;
727 } ## end sub add_map_coordinates
730 =head2 add_indel_coordinates
733 id of
'source' sequence
735 start coordinate of
'source' sequence
737 end coordinate of
'source' sequence
739 relative orientation of source and target (+/- 1)
741 id of 'targe' sequence
743 start coordinate of 'targe' sequence
745 end coordinate of 'targe' sequence
746 Function stores details of mapping between two regions:
747 'source' and 'target'. Returns 1 if the pair was added, 0 if it
748 was already in. Used when adding an indel
755 sub add_indel_coordinates{
756 my ($self, $contig_id, $contig_start, $contig_end,
757 $contig_ori, $chr_name, $chr_start, $chr_end) = @_;
759 unless(defined($contig_id) && defined($contig_start) && defined($contig_end)
760 && defined($contig_ori) && defined($chr_name) && defined($chr_start)
761 && defined($chr_end)) {
762 throw(
"7 arguments expected");
765 #we need to create the IndelPair object to add to both lists, to and from
773 # place into hash on both ids
774 my $map_to = $self->{
'to'};
775 my $map_from = $self->{
'from'};
777 push( @{$self->{
"_pair_$map_to"}->{uc($chr_name)}}, $pair );
778 push( @{$self->{
"_pair_$map_from"}->{uc($contig_id)}}, $pair );
780 # any interval trees for this set of intervals are now invalid
782 $self->{
"_tree_$map_to"}->{ uc($chr_name) } = undef;
783 $self->{
"_tree_$map_from"}->{ uc($contig_id) } = undef;
785 $self->{
'pair_count'}++;
787 $self->{
'_is_sorted'} = 0;
795 Arg [2] :
int $start - start coord. Since
this is an indel should always
796 be one greater than end.
797 Arg [3] :
int $end - end coord. Since
this is an indel should always
798 be one less than start.
799 Arg [4] :
int $strand (0, 1, -1)
800 Arg [5] :
string $type - the coordinate system name the coords are from.
801 Example : @coords = $mapper->map_indel();
802 Description: This is in internal function which handles the special mapping
803 case for indels (start = end +1). It will be used to
map from
804 a coordinate system with a gap to another that contains an
805 insertion. It will be mainly used by the Variation API.
806 Returntype :
Bio::
EnsEMBL::Mapper::Unit objects
813 my ( $self, $id, $start, $end, $strand, $type ) = @_;
815 # swap start/end and map the resultant 2bp coordinate
816 ( $start, $end ) = ( $end, $start );
818 if ( !$self->{
'_is_sorted'} ) { $self->_sort() }
820 my $hash = $self->{
"_pair_$type"};
822 my ( $from, $to, $cs );
824 if ( $type eq $self->{
'to'} ) {
827 $cs = $self->{
'from_cs'};
831 $cs = $self->{
'to_cs'};
834 unless ( defined $hash ) {
835 throw(
"Type $type is neither to or from coordinate systems");
837 my @indel_coordinates;
839 # if we don't already have an interval tree built for this id,
841 if ( !defined $self->{
"_tree_$type"}->{ uc($id) } ) {
842 $self->{
"_tree_$type"}->{ uc($id) } = _build_immutable_tree($from,
845 # query the interval tree (either cached or created new) for overlapping intervals
846 my $overlap = $self->{
"_tree_$type"}->{ uc($id) }->query($start, $end);
848 foreach my $i (@{$overlap}) {
850 my $self_coord = $pair->{$from};
851 my $target_coord = $pair->{$to};
853 if ( exists $pair->{
'indel'} ) {
854 #need to return unit coordinate
857 $target_coord->{
'start'},
858 $target_coord->{
'end'}, );
859 push @indel_coordinates, $to;
864 return @indel_coordinates;
865 } ## end sub map_indel
871 Example $mapper->add_Mapper($mapper2)
872 Function add all the
map coordinates from $mapper to
this mapper.
873 This
object will contain mapping pairs from both the old
876 Exceptions
throw if 'to' and
'from' from both Bio::EnsEMBL::Mappers
878 Caller $mapper->methodname()
883 my ($self, $mapper) = @_;
885 my $mapper_to = $mapper->{
'to'};
886 my $mapper_from = $mapper->{
'from'};
887 if ($mapper_to ne $self->{
'to'} or $mapper_from ne $self->{
'from'}) {
888 throw(
"Trying to add an incompatible Mapper");
892 foreach my $seq_name (keys %{$mapper->{
"_pair_$mapper_to"}}) {
893 push(@{$self->{
"_pair_$mapper_to"}->{$seq_name}},
894 @{$mapper->{
"_pair_$mapper_to"}->{$seq_name}});
895 $self->{
"_tree_$mapper_to"}->{ uc($seq_name) } = undef;
896 $count_a += scalar(@{$mapper->{
"_pair_$mapper_to"}->{$seq_name}});
899 foreach my $seq_name (keys %{$mapper->{
"_pair_$mapper_from"}}) {
900 push(@{$self->{
"_pair_$mapper_from"}->{$seq_name}},
901 @{$mapper->{
"_pair_$mapper_from"}->{$seq_name}});
902 $self->{
"_tree_$mapper_from"}->{ uc($seq_name) } = undef;
903 $count_b += scalar(@{$mapper->{
"_pair_$mapper_from"}->{$seq_name}});
906 if ($count_a == $count_b) {
907 $self->{
'pair_count'} += $count_a;
909 throw(
"Trying to add a funny Mapper");
912 $self->{
'_is_sorted'} = 0;
921 id of
'source' sequence
923 start coordinate of
'source' sequence
925 end coordinate of
'source' sequence
927 nature of transform - gives the type of
928 coordinates to be transformed *from*
929 Function list all pairs of mappings in a region
937 my ( $self, $id, $start, $end, $type ) = @_;
939 if ( !$self->{
'_is_sorted'} ) { $self->_sort() }
941 if ( !defined $type ) {
942 throw(
"Expected 4 arguments");
945 if ( $start > $end ) {
946 throw(
"Start is greater than end "
947 .
"for id $id, start $start, end $end\n" );
950 my $hash = $self->{
"_pair_$type"};
954 if ( $type eq $self->{
'to'} ) {
962 unless ( defined $hash ) {
963 throw(
"Type $type is neither to or from coordinate systems");
968 unless ( exists $hash->{ uc($id) } ) {
972 @list = @{ $hash->{ uc($id) } };
975 if ( $start == -1 && $end == -1 ) {
979 foreach my $p (@list) {
981 if ( $p->{$from}->{
'end'} < $start ) {
984 if ( $p->{$from}->{
'start'} > $end ) {
991 } ## end sub list_pairs
997 id of
'source' sequence
998 Function accessor method form the
'source'
999 and
'target' in a Mapper::Pair
1007 my ( $self, $value ) = @_;
1009 if ( defined($value) ) {
1010 $self->{
'to'} = $value;
1013 return $self->{
'to'};
1019 id of
'source' sequence
1020 Function accessor method form the
'source'
1021 and
'target' in a Mapper::Pair
1028 my ( $self, $value ) = @_;
1030 if ( defined($value) ) {
1031 $self->{
'from'} = $value;
1034 return $self->{
'from'};
1040 # Arg 1 *FileHandle $fh
1041 # Function convenience dump function
1042 # possibly useful for debugging
1049 my ($self,$fh) = @_;
1051 if( !defined $fh ) {
1055 my $from = $self->{
'from'};
1056 my $to = $self->{
'to'};
1058 print $fh
"dumping from-hash _pair_$from\n";
1059 foreach my $id ( keys %{$self->{
"_pair_$from"}} ) {
1060 print $fh
"{_pair_$from}->{" . uc($id) .
"}:\n";
1061 foreach my $pair ( @{$self->{
"_pair_$from"}->{uc($id)}} ) {
1062 print $fh
" ",$pair->from->start,
" ",$pair->from->end,
":",$pair->to->start,
" ",$pair->to->end,
" ",$pair->to->id,
"\n";
1064 if (defined($self->{
"_tree_$from"}->{uc($id)})) {
1065 print $fh
"{_tree_$from}->{" . uc($id) .
"} instantiated\n";
1067 print $fh
"{_tree_$from}->{" . uc($id) .
"} empty\n";
1070 print $fh
"dumping to-hash _pair_$to\n";
1071 foreach my $id ( keys %{$self->{
"_pair_$to"}} ) {
1072 print $fh
"{_pair_$to}->{" . uc($id) .
"}:\n";
1073 foreach my $pair ( @{$self->{
"_pair_$to"}->{uc($id)}} ) {
1074 print $fh
" ",$pair->to->start,
" ",$pair->to->end,
":",$pair->from->start,
" ",$pair->from->end,
" ",$pair->from->id,
"\n";
1076 if (defined($self->{
"_tree_$to"}->{uc($id)})) {
1077 print $fh
"{_tree_$to}->{" . uc($id) .
"} instantiated\n";
1079 print $fh
"{_tree_$to}->{" . uc($id) .
"} empty\n";
1087 # Function sort function so that all
1088 # mappings are sorted by
1098 my $to = $self->{
'to'};
1099 my $from = $self->{
'from'};
1101 foreach my $id ( keys %{ $self->{
"_pair_$from"} } ) {
1102 @{ $self->{
"_pair_$from"}->{$id} } =
1103 sort { $a->{
'from'}->{
'start'} <=> $b->{
'from'}->{
'start'} }
1104 @{ $self->{
"_pair_$from"}->{$id} };
1107 foreach my $id ( keys %{ $self->{
"_pair_$to"} } ) {
1108 @{ $self->{
"_pair_$to"}->{$id} } =
1109 sort { $a->{
'to'}->{
'start'} <=> $b->{
'to'}->{
'start'} }
1110 @{ $self->{
"_pair_$to"}->{$id} };
1113 $self->_merge_pairs();
1114 $self->_is_sorted(1);
1117 # this function merges pairs that are adjacent into one
1121 my ( $lr, $lr_from, $del_pair, $next_pair, $current_pair );
1123 my $map_to = $self->{
'to'};
1124 my $map_from = $self->{
'from'};
1125 $self->{
'pair_count'} = 0;
1127 for my $key ( keys %{$self->{
"_pair_$map_to"}} ) {
1128 # merging pairs means all interval trees for
1129 # this set of intervals are now invalid, so delete them
1130 $self->{
"_tree_$map_to"}->{ uc($key) } = undef;
1131 $lr = $self->{
"_pair_$map_to"}->{$key};
1135 my $length = $#{$lr};
1136 while( $next <= $length ) {
1137 $current_pair = $lr->[$i];
1138 $next_pair = $lr->[$next];
1141 if(exists $current_pair->{
'indel'} || exists $next_pair->{
'indel'}){
1142 #necessary to modify the merge function to not merge indels
1148 if( $current_pair->{
'to'}->{
'start'} == $next_pair->{
'to'}->{
'start'}
1149 && $current_pair->{
'from'}->{
'id'} == $next_pair->{
'from'}->{
'id'}
1150 && $current_pair->{
'from'}->{
'start'} == $next_pair->{
'from'}->{
'start'}) {
1151 # Modified in e75 to support GRCh38. Contigs used repeatedly in one seq region were
1152 # being pre-emptively deleted as copies. Extra from-start condition above ensures copy
1153 # deletion is restricted to same-location copies. Even more stringent checks can be made
1155 $del_pair = $next_pair;
1156 } elsif ( ( $current_pair->{
'from'}->{
'id'} eq $next_pair->{
'from'}->{
'id'} ) &&
1157 ( $next_pair->{
'ori'} == $current_pair->{
'ori'} ) &&
1158 ( $next_pair->{
'to'}->{
'start'} -1 == $current_pair->{
'to'}->{
'end'} )) {
1160 if( $current_pair->{
'ori'} == 1 ) {
1161 # check forward strand merge
1162 if( $next_pair->{
'from'}->{
'start'} - 1 == $current_pair->{
'from'}->{
'end'} ) {
1163 # normal merge with previous element
1164 $current_pair->{
'to'}->{
'end'} = $next_pair->{
'to'}->{
'end'};
1165 $current_pair->{
'from'}->{
'end'} = $next_pair->{
'from'}->{
'end'};
1166 $del_pair = $next_pair;
1169 # check backward strand merge
1170 if( $next_pair->{
'from'}->{
'end'} + 1 == $current_pair->{
'from'}->{
'start'} ) {
1172 $current_pair->{
'to'}->{
'end'} = $next_pair->{
'to'}->{
'end'};
1173 $current_pair->{
'from'}->{
'start'} = $next_pair->{
'from'}->{
'start'};
1174 $del_pair = $next_pair;
1179 if( defined $del_pair ) {
1180 splice( @$lr, $next, 1 );
1181 $lr_from = $self->{
"_pair_$map_from"}->{uc($del_pair->{
'from'}->{
'id'})};
1182 for( my $j=0; $j <= $#{$lr_from}; $j++ ) {
1183 if( $lr_from->[$j] == $del_pair ) {
1184 splice( @$lr_from, $j, 1 );
1189 if( $length < $next ) { last; }
1198 $self->{
'pair_count'} += scalar( @$lr );
1206 # Function toggle for whether the (internal)
1207 # map data are sorted
1214 my ($self, $value) = @_;
1215 $self->{
'_is_sorted'} = $value
if (defined($value));
1216 return $self->{
'_is_sorted'};
1219 # _build_immutable_tree
1221 # Arg 1 string $pair_side - the from or to half of each pair to be
1222 # the source of intervals
1223 # Arg 2 listref $pair_list - a list of Bio::EnsEMBL::Mapper::Pair
1224 # Function builds a Bio::EnsEMBL::Utils::Tree::Interval::Immutable with
1225 # intervals corresponding to the chosen side (from or to) of
1226 # each Pair in $pair_list and a pointer to each Pair
1227 # Returntype Bio::EnsEMBL::Utils::Tree::Interval::Immutable
1231 sub _build_immutable_tree {
1232 my ($pair_side, $pair_list) = @_;
1233 # create set of intervals for the tree
1236 foreach my $i (@{$pair_list}) {
1237 my $start = $i->{$pair_side}{start};
1238 my $end = $i->{$pair_side}{end};
1240 if ($end < $start) {
1241 ($end, $start) = ($start, $end);
1247 # Create the interval tree defined on the above set of intervals
1249 # As of release/99, we have more experience with the immutable
1250 # interval tree, so we will stick with this one. A future
1251 # refactoring effort may wish to replace this with a dynamically
1252 # maintained mutable interval tree, rather than simply throwing
1253 # trees away and rebuilding when the underlying interval set changes