my ( $self, $id, $start, $end, $rstart, $rend ) = @_;
$rstart = $start if ( !defined($rstart) );
$rend = $end if ( !defined($rend) );
#
# Sanity checks
#
if ( !defined($id) || !defined($start) || !defined($end) ) {
throw("ID, start, end arguments are required");
}
# The following was commented out due to Ensembl Genomes requirements
# for bacterial genomes.
#
## if ( $start > $end ) {
## throw( "start argument [$start] must be less than "
## . "(or equal to) end argument [$end]" );
## }
##
## if ( $rstart > $rend ) {
## throw( "rstart argument [$rstart] must be less than "
## . "(or equal to) rend argument [$rend] argument" );
## }
if ( $rstart > $start ) {
throw("rstart [$rstart] must be less than or equal to start [$start]");
}
if ( $rend < $end ) {
throw("rend [$rend] must be greater than or equal to end [$end]");
}
my $reg = $self->{'registry'};
my $list = $reg->{$id} ||= [];
my @gap_pairs;
my $len = scalar(@$list);
if ( $len == 0 ) {
# this is the first request for this id, return a gap pair for the
# entire range and register it as seen
$list->[0] = [ $rstart, $rend ];
return [ [ $rstart, $rend ] ];
}
#####
# loop through the list of existing ranges recording any "gaps" where
# the existing range does not cover part of the requested range
#
my $start_idx = 0;
my $end_idx = $#$list;
my ( $mid_idx, $range );
# binary search the relevant pairs
# helps if the list is big
while ( ( $end_idx - $start_idx ) > 1 ) {
$mid_idx = ( $start_idx + $end_idx ) >> 1;
$range = $list->[$mid_idx];
if ( $range->[1] < $rstart ) {
$start_idx = $mid_idx;
} else {
$end_idx = $mid_idx;
}
}
my ( $gap_start, $gap_end, $r_idx, $rstart_idx, $rend_idx );
$gap_start = $rstart;
for ( my $CUR = $start_idx ; $CUR < $len ; $CUR++ ) {
my ( $pstart, $pend ) = @{ $list->[$CUR] };
# no work needs to be done at all if we find a range pair that
# entirely overlaps the requested region
if ( $pstart <= $start && $pend >= $end ) {
return undef;
}
# find adjacent or overlapping regions already registered
if ( $pend >= ( $rstart - 1 ) && $pstart <= ( $rend + 1 ) ) {
if ( !defined($rstart_idx) ) {
$rstart_idx = $CUR;
}
$rend_idx = $CUR;
}
if ( $pstart > $rstart ) {
$gap_end = ( $rend < $pstart ) ? $rend : $pstart - 1;
push @gap_pairs, [ $gap_start, $gap_end ];
}
$gap_start = ( $rstart > $pend ) ? $rstart : $pend + 1;
# if($pstart > $rend && !defined($r_idx)) {
if ( $pend >= $rend && !defined($r_idx) ) {
$r_idx = $CUR;
last;
}
} ## end for ( my $CUR = $start_idx...
#
do we have
to make another gap?
if ( $gap_start <= $rend ) {
push @gap_pairs, [ $gap_start, $rend ];
}
#
# Merge the new range into the registered list
#
if ( defined($rstart_idx) ) {
my ( $new_start, $new_end );
if ( $rstart < $list->[$rstart_idx]->[0] ) {
$new_start = $rstart;
} else {
$new_start = $list->[$rstart_idx]->[0];
}
if ( $rend > $list->[$rend_idx]->[1] ) {
$new_end = $rend;
} else {
$new_end = $list->[$rend_idx]->[1];
}
splice( @$list, $rstart_idx,
$rend_idx - $rstart_idx + 1,
[ $new_start, $new_end ] );
} elsif ( defined($r_idx) ) {
splice( @$list, $r_idx, 0, [ $rstart, $rend ] );
} else {
push( @$list, [ $rstart, $rend ] );
}
return \@gap_pairs;