3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
41 # Get a fixed width chunk around the range of intereset. This
42 # will be used if any registration is actually necessary.
43 $chunk_start = ( $start >> 20 ) << 20 + 1;
44 $chunk_end = ( ( $end >> 20 ) + 1 ) << 20;
46 # Check if any registration is necessary for the range. If it is
47 # register a large chunked area instead and return a listref of
48 # unregistered areas that need to be loaded.
50 $pairs = $rr->check_and_register(
51 $id, $start, $end, $chunk_start, $chunk_end
54 foreach my $pair (@$pairs) {
55 my ( $pair_start, $pair_end ) = @$pair;
56 # Fetch mappings for these regions from the assembly table and
57 # load them into the mapper.
61 # The range ($start - $end) is already registered
65 # Check if any registration is necessary. If it is register the
66 # region and return a listref of pairs that need to be loaded.
67 if ( $pairs = $rr->check_and_register( $id, $start, $end ) ) {
73 This module maintains an
internal list of registered regions and is
74 used to quickly ascertain
if and what regions of a provided range need
81 package Bio::EnsEMBL::Mapper::RangeRegistry;
92 Description: Creates a
new RangeRegistry
object
95 Caller : AssemblyMapperAdaptor
103 my $class = ref($proto) || $proto;
105 return bless( {
'registry' => {} }, $class );
110 $self->{
'registry'} = {};
113 =head2 check_and_register
116 The
id of the range to be checked/registered (e.g. a sequenceid)
118 The start of the range to be checked
120 The end of the range to be checked
121 Arg [4] : (optional)
int $rstart
122 The start of the range to be registered
123 if the checked range was not fully registered
124 Arg [5] : (optional)
int $rend
125 The end of the range to be registerd
126 if the checked range was not fully registered
128 Description: Checks the range registry to see
if the entire range
129 denoted by ($id : $start-$end) is already registered. If
130 it already is, then undef is returned. If it is not then
131 the range specified by $rstart and $rend is registered, and
132 a list of regions that were required to completely
register
133 $rstart-$rend is returned. If $rstart and $rend are not
134 defined they
default to $start and $end respectively.
136 The reason there is a single call to
do both the checking and
137 registering is to reduce the overhead. Much of the work to
138 check
if a range is registered is the same as registering a
139 region around that range.
140 Returntype : undef or listref of [start,end] range pairs
141 Exceptions :
throw if rstart is greater than start
142 throw if rend is less than end
143 throw if end is less than start
144 throw if id, start, or end are not defined
145 Caller : AssemblyMapperAdaptor
154 sub check_and_register {
155 my ( $self, $id, $start, $end, $rstart, $rend ) = @_;
157 $rstart = $start
if ( !defined($rstart) );
158 $rend = $end
if ( !defined($rend) );
163 if ( !defined($id) || !defined($start) || !defined($end) ) {
164 throw(
"ID, start, end arguments are required");
167 # The following was commented out due to Ensembl Genomes requirements
168 # for bacterial genomes.
170 ## if ( $start > $end ) {
171 ## throw( "start argument [$start] must be less than "
172 ## . "(or equal to) end argument [$end]" );
175 ## if ( $rstart > $rend ) {
176 ## throw( "rstart argument [$rstart] must be less than "
177 ## . "(or equal to) rend argument [$rend] argument" );
180 if ( $rstart > $start ) {
181 throw(
"rstart [$rstart] must be less than or equal to start [$start]");
184 if ( $rend < $end ) {
185 throw(
"rend [$rend] must be greater than or equal to end [$end]");
188 my $reg = $self->{
'registry'};
189 my $list = $reg->{$id} ||= [];
193 my $len = scalar(@$list);
196 # this is the first request for this id, return a gap pair for the
197 # entire range and register it as seen
198 $list->[0] = [ $rstart, $rend ];
199 return [ [ $rstart, $rend ] ];
203 # loop through the list of existing ranges recording any "gaps" where
204 # the existing range does not cover part of the requested range
208 my $end_idx = $#$list;
209 my ( $mid_idx, $range );
211 # binary search the relevant pairs
212 # helps if the list is big
213 while ( ( $end_idx - $start_idx ) > 1 ) {
214 $mid_idx = ( $start_idx + $end_idx ) >> 1;
215 $range = $list->[$mid_idx];
217 if ( $range->[1] < $rstart ) {
218 $start_idx = $mid_idx;
224 my ( $gap_start, $gap_end, $r_idx, $rstart_idx, $rend_idx );
225 $gap_start = $rstart;
227 for ( my $CUR = $start_idx ; $CUR < $len ; $CUR++ ) {
228 my ( $pstart, $pend ) = @{ $list->[$CUR] };
230 # no work needs to be done at all if we find a range pair that
231 # entirely overlaps the requested region
232 if ( $pstart <= $start && $pend >= $end ) {
236 # find adjacent or overlapping regions already registered
237 if ( $pend >= ( $rstart - 1 ) && $pstart <= ( $rend + 1 ) ) {
238 if ( !defined($rstart_idx) ) {
244 if ( $pstart > $rstart ) {
245 $gap_end = ( $rend < $pstart ) ? $rend : $pstart - 1;
246 push @gap_pairs, [ $gap_start, $gap_end ];
249 $gap_start = ( $rstart > $pend ) ? $rstart : $pend + 1;
251 # if($pstart > $rend && !defined($r_idx)) {
252 if ( $pend >= $rend && !defined($r_idx) ) {
256 } ## end
for ( my $CUR = $start_idx...
258 #
do we have to make another gap?
259 if ( $gap_start <= $rend ) {
260 push @gap_pairs, [ $gap_start, $rend ];
264 # Merge the new range into the registered list
266 if ( defined($rstart_idx) ) {
267 my ( $new_start, $new_end );
269 if ( $rstart < $list->[$rstart_idx]->[0] ) {
270 $new_start = $rstart;
272 $new_start = $list->[$rstart_idx]->[0];
275 if ( $rend > $list->[$rend_idx]->[1] ) {
278 $new_end = $list->[$rend_idx]->[1];
281 splice( @$list, $rstart_idx,
282 $rend_idx - $rstart_idx + 1,
283 [ $new_start, $new_end ] );
285 } elsif ( defined($r_idx) ) {
286 splice( @$list, $r_idx, 0, [ $rstart, $rend ] );
288 push( @$list, [ $rstart, $rend ] );
292 } ## end sub check_and_register
294 # overlap size is just added to make RangeRegistry generally more useful
301 Example : my $overlap_size = $rr->(
"chr1", 1, 100_000_000 )
302 Description: finds out how many bases of the given range are registered
311 my ( $self, $id, $start, $end ) = @_;
315 if ( $start > $end ) {
return 0 }
317 my $reg = $self->{
'registry'};
318 my $list = $reg->{$id} ||= [];
320 my $len = scalar(@$list);
323 # this is the first request for this id, return a gap pair for the
324 # entire range and register it as seen
329 # loop through the list of existing ranges recording any "gaps" where
330 # the existing range does not cover part of the requested range
334 my $end_idx = $#$list;
335 my ( $mid_idx, $range );
337 # binary search the relevant pairs
338 # helps if the list is big
339 while ( ( $end_idx - $start_idx ) > 1 ) {
340 $mid_idx = ( $start_idx + $end_idx ) >> 1;
341 $range = $list->[$mid_idx];
342 if ( $range->[1] < $start ) {
343 $start_idx = $mid_idx;
349 for ( my $CUR = $start_idx ; $CUR < $len ; $CUR++ ) {
350 my ( $pstart, $pend ) = @{ $list->[$CUR] };
352 if ( $pstart > $end ) {
353 # No more interesting ranges here.
357 # no work needs to be done at all if we find a range pair that
358 # entirely overlaps the requested region
359 if ( $pstart <= $start && $pend >= $end ) {
360 $overlap = $end - $start + 1;
364 my $mstart = ( $start < $pstart ? $pstart : $start );
365 my $mend = ( $end < $pend ? $end : $pend );
367 if ( $mend - $mstart >= 0 ) {
368 $overlap += ( $mend - $mstart + 1 );
373 } ## end sub overlap_size
376 # low level function to access the ranges
377 # only use for read access
380 my ( $self, $id ) = @_;
382 return $self->{
'registry'}->{$id};