ensembl-hive  2.7.0
Node.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 =head1 CONTACT
21 
22  Please email comments or questions to the public Ensembl
23  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
24 
25  Questions may also be sent to the Ensembl help desk at
26  <http://www.ensembl.org/Help/Contact>.
27 
28 =cut
29 
30 =head1 NAME
31 
33 
34 =head1 DESCRIPTION
35 
36 Represents a node in the mutable interval tree pure perl implementation.
37 
38 =head1 METHODS
39 
40 =cut
41 
42 package Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node;
43 
44 use strict;
45 
46 use Scalar::Util qw(looks_like_number weaken);
47 use List::Util qw(max);
48 use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
49 use Bio::EnsEMBL::Utils::Exception qw(throw);
50 
51 =head1 METHODS
52 
53 =head2 new
54 
56  The tree to which the node belongs
58  Description : Constructor. Creates a new mutable tree instance node
59  associated with the given interval
61  Exceptions : none
62  Caller : general
63 
64 =cut
65 
66 sub new {
67  my $caller = shift;
68  my $class = ref($caller) || $caller;
69 
70  my ($tree, $interval) = @_;
71  throw 'Node constructor takes (tree, interval) as arguments'
72  unless $tree and $interval;
73 
74  my $self = bless({ tree => $tree,
75  intervals => [ $interval ], # the array of all records with the same key
76  key => $interval->start,
77  max => $interval->end,
78  parent => undef,
79  height => 0,
80  left => undef,
81  right => undef }, $class);
82 
83  return $self;
84 }
85 
86 =head2 tree
87 
88  Arg [] : none
89  Example : my $tree = $node->root;
90  Description : Returns the tree to which the node belongs
92  Exceptions : none
93  Caller : general
94 
95 =cut
96 
97 sub tree {
98  return shift->{tree};
99 }
100 
101 =head2 key
102 
103  Arg [] : none
104  Example : my $key = $node->key;
105  Description : Returns the key associated with the node
106  Returntype : scalar
107  Exceptions : none
108  Caller : general
109 
110 =cut
111 
112 sub key {
113  my $self = shift;
114  $self->{key} = shift if( @_ );
115 
116  return $self->{key};
117 }
118 
119 =head2 intervals
120 
121  Arg [] : none
122  Example : my $intervals = $node->intervals;
123  Description : Returns the intervals associated with the node
124  Returntype : Arrayref of Bio::EnsEMBL::Utils::Interval
125  Exceptions : none
126  Caller : general
127 
128 =cut
129 
130 sub intervals {
131  return shift->{intervals};
132 }
133 
134 =head add_interval
135 
136  Arg [] : none
137  Description : Add an interval to the node's set of intervals
138  Returntype : none
139  Exceptions : none
140  Caller : general
141 
142 =cut
143 
144 sub add_interval {
145  push @{shift->{intervals}}, shift;
146 }
147 
148 =head2 parent
149 
150  Arg [] : none
151  Description : Return the parent of the node in the tree
152  Returntype : none
153  Exceptions : none
154  Caller : general
155 
156 =cut
157 
158 sub parent {
159  my $self = shift;
160  if (@_) {
161  $self->{parent} = shift;
162  weaken($self->{parent});
163  }
164 
165  return $self->{parent};
166 }
167 
168 =head2 height
169 
170  Arg [] : none
171  Description : Return the height of the node
172  Returntype : scalar, positive or 0
173  Exceptions : none
174  Caller : general
175 
176 =cut
177 
178 sub height {
179  my $self = shift;
180  $self->{height} = shift if( @_ );
181 
182  return $self->{height};
183 }
184 
185 =head2 left
186 
187  Arg [] : none
188  Description : Return the node's left child
190  Exceptions : none
191  Caller : general
192 
193 =cut
194 
195 sub left {
196  my $self = shift;
197  $self->{left} = shift if( @_ );
198 
199  return $self->{left};
200 }
201 
202 =head2 right
203 
204  Arg [] : none
205  Description : Return the node's right child
206  Returntype : Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node
207  Exceptions : none
208  Caller : general
209 
210 =cut
211 
212 sub right {
213  my $self = shift;
214  $self->{right} = shift if( @_ );
215 
216  return $self->{right};
217 }
218 
219 =head2 search
220 
221  Arg [1] : Bio::EnsEMBL::Utils::Interval
222  The interval to search for overlaps in the tree
223  Description : Search the node and its successors for overlapping
224  intervals with the query
225  Returntype : Arrayref of Bio::EnsEMBL::Utils::Interval
226  Exceptions : none
227  Caller : general
228 
229 =cut
230 
231 sub search {
232  my ($self, $i) = @_;
233 
234  # if interval is to the right of the rightmost point of any interval in this node and
235  # all its children, there won't be any matches
236  return [] if $i->start > $self->{max};
237 
238  my $results = [];
239 
240  # search left subtree
241  if ($self->left and $self->left->{max} >= $i->start) {
242  push @{$results}, @{$self->left->search($i)};
243  }
244 
245  # search this node
246  push @{$results}, @{$self->_overlapping_intervals($i)};
247 
248  # if interval is to the left of the start of this interval, then
249  # it can't be in any child to the right
250  return $results if $i->end < $self->key;
251 
252  # search right subtree
253  push @{$results}, @{$self->right->search($i)} if $self->right;
254 
255  return $results;
256 }
257 
258 =head2 search_by_key
259 
260  Arg [1] : scalar, $key
261  The key to search the node for
262  Description : Searches for a node by a 'key' value
264  Exceptions : none
265  Caller : general
266 
267 =cut
268 
269 sub search_by_key {
270  my ($self, $key) = @_;
271 
272  if ($self->key == $key) {
273  return $self;
274  } elsif ($key < $self->key) {
275  return $self->left->search_by_key($key) if $self->left;
276  } else {
277  return $self->right->search_by_key($key) if $self->right;
278  }
279 
280 }
281 
282 =head2 insert
283 
285  The interval to insert
286  Description : Insert an interval in the node or in its successors
287  Returntype : none
288  Exceptions : none
289  Caller : general
290 
291 =cut
292 
293 sub insert {
294  my ($self, $i) = @_;
295 
296  if ($i->start < $self->key) {
297  # insert into left subtree
298  unless (defined $self->left) {
299  $self->left(Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node->new($self->tree, $i));
300  $self->left->parent($self);
301  } else {
302  $self->left->insert($i);
303  }
304  } else {
305  # insert into right subtree
306  unless (defined $self->right) {
307  $self->right(Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node->new($self->tree, $i));
308  $self->right->parent($self);
309  } else {
310  $self->right->insert($i);
311  }
312  }
313 
314  # update max value if needed
315  $self->{max} = $i->end if $self->{max} < $i->end;
316 
317  # update node's height
318  $self->_update_height;
319 
320  # rebalance to ensure O(logn) time operations
321  $self->_rebalance;
322 }
323 
324 =head2 remove
325 
327  The node to remove from the tree
328  Description : Remove a node from the tree
329  Returntype : none
330  Exceptions : none
331  Caller : general
332 
333 =cut
334 
335 sub remove {
336  my ($self, $node) = @_;
337 
338  return unless $node;
339 
340  my $parent = $self->parent;
341 
342  if ($node->key < $self->key) {
343  # node to be removed is in left subtree
344  return $self->left->remove($node) if $self->left;
345  return;
346  } elsif ($node->key > $self->key) {
347  # node to be removed is in right subtree
348  return $self->right->remove($node) if $self->right;
349  return;
350  } else {
351  if ($self->left and $self->right) {
352  # node has two children
353  my $lowest = $self->right->_lowest;
354  $self->key($lowest->key);
355  $self->{intervals} = $lowest->{intervals};
356  return $self->right->remove($self);
357  } elsif ($parent->left == $self) {
358  # one child or no child case on left side
359  if ($self->right) {
360  $parent->left($self->right);
361  $self->right->parent($parent);
362  } else {
363  $parent->left($self->left);
364  $self->left->parent($parent) if $self->left
365  }
366  $parent->_update_parents_max;
367  $parent->_update_height;
368  $parent->_rebalance;
369 
370  return $self;
371 
372  } elsif ($parent->right == $self) {
373  # one child or no child case on right side
374  if ($self->right) {
375  $parent->right($self->right);
376  $self->right->parent($parent);
377  } else {
378  $parent->right($self->left);
379  $self->left->parent($parent) if $self->left;
380  }
381  $parent->_update_parents_max;
382  $parent->_update_height;
383  $parent->_rebalance;
384 
385  return $self;
386  }
387  }
388 }
389 
390 =head1 PRIVATE METHODS
391 
392 =head2 _height
393 
394 Not a method since code could invoke method on undefined instances, e.g. _rebalance
395 
396 =cut
397 
398 sub _height {
399  my $node = shift;
400 
401  return -1 unless $node;
402  return $node->height;
403 }
404 
405 =head2 _lowest
406 
407 Returns the 'smallest' node in the tree
408 
409 =cut
410 
411 sub _lowest {
412  my $self = shift;
413 
414  return $self unless $self->left;
415  return $self->left->_lowest;
416 }
417 
418 sub _highest_end {
419  my $self = shift;
420 
421  my $high = $self->{intervals}[0]->end;
422  map { $high = $_->end if $high < $_->end } @{$self->{intervals}};
423 
424  return $high;
425 }
426 
427 sub _update_height {
428  my $self = shift;
429 
430  $self->height(List::Util::max $self->left?$self->left->height:0, $self->right?$self->right->height:0 + 1);
431 }
432 
433 sub _update_parents_max {
434  my $self = shift;
435  # updates the max value of all the parents after inserting into already existing node, as well as
436  # removing the node completely or removing the record of an already existing node. Starts with
437  # the parent of an affected node and bubbles up to root
438  my $high = $self->_highest_end;
439  if ($self->left and $self->right) {
440  $self->{max} = max $self->left->{max}, $self->right-{max}, $high;
441  } elsif ($self->left and !$self->right) {
442  $self->{max} = max $self->left->{max}, $high;
443  } elsif (!$self->left and $self->right) {
444  $self->{max} = max $self->right->{max}, $high;
445  } else {
446  $self->{max} = $high;
447  }
448 
449  $self->parent->_update_parents_max if $self->parent;
450 }
451 
452 =head2 _rebalance
453 
454 Rebalances the tree if the height value between two nodes of the same parent is greater than two.
455 There are 4 cases that can happen:
456 
457 Left-Left case:
458  z y
459  / \ / \
460  y T4 Right Rotate (z) x z
461  / \ - - - - - - - - -> / \ / \
462  x T3 T1 T2 T3 T4
463  / \
464  T1 T2
465 
466 Left-Right case:
467  z z x
468  / \ / \ / \
469  y T4 Left Rotate (y) x T4 Right Rotate(z) y z
470  / \ - - - - - - - - -> / \ - - - - - - - -> / \ / \
471  T1 x y T3 T1 T2 T3 T4
472  / \ / \
473  T2 T3 T1 T2
474 
475 Right-Right case:
476  z y
477  / \ / \
478  T1 y Left Rotate(z) z x
479  / \ - - - - - - - -> / \ / \
480  T2 x T1 T2 T3 T4
481  / \
482  T3 T4
483 
484 Right-Left case:
485  z z x
486  / \ / \ / \
487  T1 y Right Rotate (y) T1 x Left Rotate(z) z y
488  / \ - - - - - - - - -> / \ - - - - - - - -> / \ / \
489  x T4 T2 y T1 T2 T3 T4
490  / \ / \
491  T2 T3 T3 T4
492 
493 =cut
494 
495 sub _rebalance {
496  my $self = shift;
497 
498  my ($left, $right) = ($self->left, $self->right);
499 
500  if (_height($left) - _height($right) >= 2) {
501  if (_height($left->left) >= _height($left->right)) {
502  # Left-Left case
503  $self->_right_rotate;
504  $self->_update_max_right_rotate;
505  } else {
506  # Left-Right case
507  $left->_left_rotate;
508  $self->_right_rotate;
509  $self->_update_max_right_rotate;
510  }
511  } elsif (_height($right) - _height($left) >= 2) {
512  if (_height($right->right) >= _height($right->left)) {
513  # Right-Right case
514  $self->_left_rotate;
515  $self->_update_max_left_rotate;
516  } else {
517  # Right-Left case
518  $right->_right_rotate;
519  $self->_left_rotate;
520  $self->_update_max_left_rotate;
521  }
522  }
523 }
524 
525 sub _left_rotate {
526  my $self = shift;
527 
528  my $right = $self->right;
529 
530  $right->parent($self->parent);
531  unless (defined $right->parent) {
532  $self->tree->root($right);
533  } else {
534  if ($right->parent->left == $self) {
535  $right->parent->left($right);
536  } elsif ($right->parent->right == $self) {
537  $right->parent->right($right);
538  }
539  }
540 
541  $self->right($right->left);
542  $self->right->parent($self) if $self->right;
543 
544  $right->left($self);
545  $self->parent($right);
546 
547  $self->_update_height;
548  $right->_update_height;
549 
550 }
551 
552 sub _update_max_left_rotate { # handles Right-Right case and Right-Left case in rebalancing AVL tree
553  my $self = shift;
554 
555  # update max of left sibling (x in first case, y in second)
556  my $parent = $self->parent;
557  my $parent_right = $parent->right;
558  if ($parent_right) {
559  my $parent_right_high = $parent_right->_highest_end;
560  if (!$parent_right->left and $parent_right->right) {
561  $parent_right->{max} = max $parent_right_high, $parent_right->right->{max};
562  } elsif ($parent_right->left and !$parent_right->right) {
563  $parent_right->{max} = max $parent_right_high, $parent_right->left->{max};
564  } elsif (!$parent_right->left and !$parent_right->right) {
565  $parent_right->{max} = $parent_right_high;
566  } else {
567  $parent_right->{max} = max $parent_right_high, $parent_right->left->{max}, $parent_right->right->{max};
568  }
569  }
570 
571  # update max of itself (z)
572  my $high = $self->_highest_end;
573  if (!$self->left and $self->right) {
574  $self->{max} = max $high, $self->right->{max};
575  } elsif ($self->left and !$self->right) {
576  $self->{max} = max $high, $self->left->{max};
577  } elsif (!$self->left and !$self->right) {
578  $self->{max} = $high;
579  } else {
580  $self->{max} = max $high, $self->left->{max}, $self->right->{max};
581  }
582 
583  # update max of parent (y in first case, x in second)
584  $parent->{max} = max $parent->left?$parent->left->{max}:0, $parent->right?$parent->right->{max}:0, $parent->_highest_end
585  if $parent;
586 
587 }
588 
589 sub _right_rotate {
590  my $self = shift;
591 
592  my $left = $self->left;
593 
594  $left->parent($self->parent);
595  unless (defined $left->parent) {
596  $self->tree->root($left);
597  } else {
598  if ($left->parent->left == $self) {
599  $left->parent->left($left);
600  } elsif ($left->parent->right == $self) {
601  $left->parent->right($left);
602  }
603  }
604 
605  $self->left($left->right);
606  $self->left->parent($self) if $self->left;
607 
608  $left->right($self);
609  $self->parent($left);
610 
611  $self->_update_height;
612  $left->_update_height;
613 }
614 
615 sub _update_max_right_rotate { # handles Left-Left case and Left-Right case after rebalancing AVL tree
616  my $self = shift;
617 
618  # update max of left sibling (x in first case, y in second)
619  my $parent = $self->parent;
620  my $parent_left = $parent->left;
621  if ($parent_left) {
622  my $parent_left_high = $parent_left->_highest_end;
623  if (!$parent_left->left and $parent_left->right) {
624  $parent_left->{max} = max $parent_left_high, $parent_left->right->{max};
625  } elsif ($parent_left->left and !$parent_left->right) {
626  $parent_left->{max} = max $parent_left_high, $parent_left->left->{max};
627  } elsif (!$parent_left->left and !$parent_left->right) {
628  $parent_left->{max} = $parent_left_high;
629  } else {
630  $parent_left->{max} = max $parent_left_high, $parent_left->left->{max}, $parent_left->right->{max};
631  }
632  }
633 
634  # update max of itself (z)
635  my $high = $self->_highest_end;
636  if (!$self->left and $self->right) {
637  $self->{max} = max $high, $self->right->{max};
638  } elsif ($self->left and !$self->right) {
639  $self->{max} = max $high, $self->left->{max};
640  } elsif (!$self->left and !$self->right) {
641  $self->{max} = $high;
642  } else {
643  $self->{max} = max $high, $self->left->{max}, $self->right->{max};
644  }
645 
646  # update max of parent (y in first case, x in second)
647  $parent->{max} = max $parent->left?$parent->left->{max}:0, $parent->right?$parent->right->{max}:0, $parent->_highest_end
648  if $parent;
649 }
650 
651 sub _overlapping_intervals {
652  my ($self, $i) = @_;
653 
654  my $results = [];
655  if ($self->key <= $i->end and $i->start <= $self->_highest_end) {
656  # node's interval overlap: check individual intervals
657  map { push @{$results}, $_ if $i->start <= $_->end } @{$self->{intervals}}
658  }
659 
660  return $results;
661 }
662 
663 1;
664 
Bio::EnsEMBL::Utils::Interval
Definition: Interval.pm:41
map
public map()
Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node::left
public Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node left()
Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node::search
public Arrayref search()
Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node
Definition: Node.pm:16
Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node::parent
public void parent()
Bio::EnsEMBL::Utils::Tree::Interval::Mutable::PP
Definition: PP.pm:17
Bio::EnsEMBL::Utils::Scalar
Definition: Scalar.pm:66
Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node::right
public Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node right()
Bio::EnsEMBL::Utils::Interval::end
public Scalar end()
Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node::search_by_key
public Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node search_by_key()
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68