3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
37 # define a set of intervals to be added to the tree
42 # initialise the tree with the above intervals
46 my $results = $tree->query(121779004);
47 if (scalar @$results) {
48 print
"Intervals contain 121779004\n";
51 # same query, but use interval query
52 my $results = $tree->query(121779004, 121779004);
53 if (scalar @$results) {
54 print
"Found containing interval: [", $result->[0]->start,
', ', $result->[0]->end,
"\n";
59 An implementation of an immutable interval tree. Immutable means the tree is
60 initialised with a fixed set of intervals at creation time. Intervals cannot
61 be added to or removed from the tree during its life cycle.
63 Implementation heavily inspired by https:
65 This implementation does not support Intervals having a start > end - i.e.
66 intervals spanning the origin of a circular chromosome.
72 package Bio::EnsEMBL::Utils::Tree::Interval::Immutable;
87 Example : my $tree = Bio::EnsEMBL::Utils::Tree::Immutable([ $i1, $i2, $i3 ]);
88 Description : Constructor. Creates a
new immutable tree instance
97 my $class = ref($caller) || $caller;
99 my $intervals = shift;
100 if (defined $intervals ) {
101 assert_ref($intervals,
'ARRAY');
104 my $self = bless({}, $class);
106 $self->{top_node} = $self->_divide_intervals($intervals);
114 Example : my $root = $tree->root();
115 Description : Return the tree top node
123 return shift->{top_node};
128 Arg [1] : scalar, $start
129 Where the query interval begins
130 Arg [2] : (optional) scalar, $end
131 Where the query interval ends
132 Example : my $results = $tree->query(121626874, 122092717);
133 Description : Query the tree
if its intervals overlap the interval whose start
134 and end points are specified by the argument list.
135 If end is not specified, it is assumed to be the same as start
136 so effectively making a point query.
144 my ($self, $start, $end) = @_;
147 if (defined $start) {
148 $end = $start unless defined $end;
152 return [] unless $interval;
153 return $self->_query_point($self->root, $interval->start, [])
if $interval->
is_point;
156 return $result unless $self->root or $interval->is_empty;
158 my $node = $self->root;
160 if ($interval->contains($node->x_center)) {
161 push @{$result}, @{$node->s_center_beg};
162 $self->_range_query_left($node->left, $interval, $result);
163 $self->_range_query_right($node->right, $interval, $result);
166 if ($interval->is_left_of($node->x_center)) {
167 foreach my $s_beg (@{$node->s_center_beg}) {
168 last unless $interval->intersects($s_beg);
169 push @{$result}, $s_beg;
173 foreach my $s_end (@{$node->s_center_end}) {
174 last unless $interval->intersects($s_end);
175 push @{$result}, $s_end;
177 $node = $node->right;
181 return sort_by_begin(uniq($result));
185 my ($self, $node, $point, $result) = @_;
187 return $result unless $node;
189 # if x is less than x_center, the leftmost set of intervals, S_left, is considered
190 if ($point <= $node->x_center) {
191 # if x is less than x_center, we know that all intervals in S_center end after x,
192 # or they could not also overlap x_center. Therefore, we need only find those intervals
193 # in S_center that begin before x. We can consult the lists of S_center that have already
194 # been constructed. Since we only care about the interval beginnings in this scenario,
195 # we can consult the list sorted by beginnings.
196 # Suppose we find the closest number no greater than x in this list. All ranges from the
197 # beginning of the list to that found point overlap x because they begin before x and end
198 # after x (as we know because they overlap x_center which is larger than x).
199 # Thus, we can simply start enumerating intervals in the list until the startpoint value exceeds x.
200 foreach my $s_beg (@{$node->s_center_beg}) {
201 last
if $s_beg->is_right_of($point);
202 push @{$result}, $s_beg;
205 # since x < x_center, we also consider the leftmost set of intervals
206 return $self->_query_point($node->left, $point, $result);
208 # if x is greater than x_center, we know that all intervals in S_center must begin before x,
209 # so we find those intervals that end after x using the list sorted by interval endings.
210 foreach my $s_end (@{$node->s_center_end}) {
211 last
if $s_end->is_left_of($point);
212 push @{$result}, $s_end;
215 # since x > x_center, we also consider the rightmost set of intervals
216 return $self->_query_point($node->right, $point, $result);
219 return sort_by_begin(uniq($result));
222 # This corresponds to the left branch of the range search, once we find a node, whose
223 # midpoint is contained in the query interval. All intervals in the left subtree of that node
224 # are guaranteed to intersect with the query, if they have an endpoint greater or equal than
225 # the start of the query interval. Basically, this means that every time we branch to the left
226 # in the binary search, we need to add the whole right subtree to the result set.
228 sub _range_query_left {
229 my ($self, $node, $interval, $result) = @_;
232 if ($interval->contains($node->x_center)) {
233 push @{$result}, @{$node->s_center_beg};
235 # in-order traversal of the right subtree to add all its intervals
236 $self->_in_order_traversal($node->right, $result);
240 foreach my $seg_end (@{$node->s_center_end}) {
241 last
if $seg_end->is_left_of($interval);
242 push @{$result}, $seg_end;
244 $node = $node->right;
249 # This corresponds to the right branch of the range search, once we find a node, whose
250 # midpoint is contained in the query interval. All intervals in the right subtree of that node
251 # are guaranteed to intersect with the query, if they have an endpoint smaller or equal than
252 # the end of the query interval. Basically, this means that every time we branch to the right
253 # in the binary search, we need to add the whole left subtree to the result set.
255 sub _range_query_right {
256 my ($self, $node, $interval, $result) = @_;
259 if ($interval->contains($node->x_center)) {
260 push @{$result}, @{$node->s_center_beg};
262 # in-order traversal of the left subtree to add all its intervals
263 $self->_in_order_traversal($node->left, $result);
265 $node = $node->right;
267 foreach my $seg_beg (@{$node->s_center_beg}) {
268 last
if $seg_beg->is_right_of($interval);
269 push @{$result}, $seg_beg;
276 sub in_order_traversal {
280 $self->_in_order_traversal($self->root, $result);
285 sub _in_order_traversal {
286 my ($self, $node, $result) = @_;
291 $self->_in_order_traversal($node->left, $result);
292 push @{$result}, @{$node->s_center_beg};
293 $self->_in_order_traversal($node->right, $result);
296 sub _divide_intervals {
297 my ($self, $intervals, $sorted) = @_;
299 return undef unless scalar @{$intervals};
301 my $sorted_intervals;
303 $sorted_intervals = $intervals;
305 $sorted_intervals = sort_by_begin($intervals);
308 my $x_center = $self->_center_sorted($sorted_intervals);
309 my ($s_center, $s_left, $s_right) = ([], [], []);
311 foreach my $sorted_interval (@{$sorted_intervals}) {
312 if ($sorted_interval->spans_origin) {
313 throw "Cannot build a tree containing an interval that spans the origin";
315 if ($sorted_interval->end < $x_center) {
316 push @{$s_left}, $sorted_interval;
317 } elsif ($sorted_interval->start > $x_center) {
318 push @{$s_right}, $sorted_interval;
320 push @{$s_center}, $sorted_interval;
326 $self->_divide_intervals($s_left, 1),
327 $self->_divide_intervals($s_right, 1));
331 my ($self, $intervals) = @_;
333 my $sorted_intervals = sort_by_begin($intervals);
334 my $len = scalar @{$sorted_intervals};
336 return $sorted_intervals->[int($len/2)]->start;
340 my ($self, $sorted_intervals) = @_;
341 my $len = scalar @{$sorted_intervals};
343 return $sorted_intervals->[int($len/2)]->start;
347 my $intervals = shift;
349 return [
map { $_->[1] } sort { $a->[0] <=> $b->[0] }
map { [ $_->start, $_ ] } @{$intervals} ];
353 my $intervals = shift;
355 tie my %seen,
'Tie::RefHash';
357 return [ grep { ! $seen{ $_ }++ } @{$intervals} ];