ensembl-hive  2.7.0
PP.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
34 
35 =head1 DESCRIPTION
36 
37 Pure Perl fall back implementation of a mutable interval tree, uses
38 augmented AVL binary balanced trees.
39 
40 =head1 METHODS
41 
42 =cut
43 
44 package Bio::EnsEMBL::Utils::Tree::Interval::Mutable::PP;
45 
46 use strict;
47 use Carp;
48 
51 use Bio::EnsEMBL::Utils::Exception qw(throw);
52 
53 =head2 new
54 
55  Arg [] : none
56  Example : my $tree = Bio::EnsEMBL::Utils::Tree::Mutable();
57  Description : Constructor. Creates a new mutable tree instance
59  Exceptions : none
60  Caller : general
61 
62 =cut
63 
64 sub new {
65  my $caller = shift;
66  my $class = ref($caller) || $caller;
67 
68  # mmhh, should probably return just the hash ref
69  return bless({ _root => undef, _size => 0 }, $class);
70 }
71 
72 =head2 root
73 
74  Arg [] : none
75  Example : my $root = $tree->root;
76  Description : Returns the tree top node
78  Exceptions : none
79  Caller : general
80 
81 =cut
82 
83 sub root {
84  my $self = shift;
85  $self->{_root} = shift if( @_ );
86 
87  return $self->{_root};
88 }
89 
90 =head2 size
91 
92  Arg [] : none
93  Example : print "Tree size is ", $tree->size, "\n";
94  Description : Return the size of the tree, i.e. the number of nodes
95  Returntype : scalar
96  Exceptions : none
97  Caller : general
98 
99 =cut
100 
101 sub size {
102  return shift->{_size};
103 }
104 
105 =head2 insert
106 
108  Interval to insert
109  Example : $tree->insert(Bio::EnsEMBL::Utils::Interval->new(10, 20, 'data'));
110  Description : Insert an interval in the tree
111  Returntype : scalar (1), upon success
112  Exceptions : thrown if Interval spans origin (has start > end)
113  Caller : general
114 
115 =cut
116 
117 sub insert {
118  my ($self, $i) = @_;
119 
120  if ($i->spans_origin) {
121  throw "Cannot insert an interval that spans the origin into a mutable tree";
122  }
123  # base case: empty tree, assign new node to root
124  unless (defined $self->root) {
125  $self->root(Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node->new($self, $i));
126  $self->{_size}++;
127 
128  return 1;
129  }
130 
131  # check if node exists with the same key
132  my $node = $self->root->search_by_key($i->start);
133  if ($node) {
134  # check the node's intervals if there's already one
135  # which is the same as the interval we're trying to insert
136  map { return 0 if $_->start == $i->start and $_->end == $i->end } @{$node->intervals};
137 
138  # add the interval to the node
139  $node->add_interval($i);
140 
141  # update max of node and its ancestors if necessary
142  if ($node->max < $i->end) {
143  $node->max($i->end);
144  $node->_update_parents_max if $node->parent;
145  }
146 
147  $self->{_size}++;
148 
149  return 1;
150  } else {
151  # node with the interval's key doesn't exist
152  # insert from root node
153  $self->root->insert($i);
154  $self->{_size}++;
155 
156  return 1;
157  }
158 
159  croak "Shouldn't be here";
160 }
161 
162 =head2 search
163 
164  Arg [1] : scalar, $start
165  the starting point of the interval to search
166  Arg [2] : scalar, $end
167  the end point of the interval to search
168  Example : my $result = $tree->search(85, 100);
169  Description : Search the intervals in the tree overlapping the query
170  Returntype : Arrayref of Bio::EnsEMBL::Utils::Interval instances
171  Exceptions : none
172  Caller : general
173 
174 =cut
175 
176 sub search {
177  my ($self, $start, $end) = @_;
178 
179  return unless $self->root; # empty tree
180  return $self->root->search(Bio::EnsEMBL::Utils::Interval->new($start, $end));
181 
182 }
183 
184 =head2 remove
185 
187  The interval to remove from the tree
188  Example : $tree->remove($i);
189  Description : Remove an interval from the tree
190  Returntype : scalar, 1 if removal is successful, 0 otherwise
191  Exceptions : none
192  Caller : general
193 
194 =cut
195 
196 sub remove {
197  my ($self, $i) = @_;
198 
199  return 0 unless $self->root; # empty tree
200 
201  my $node = $self->root->search_by_key($i->start);
202  return 0 unless $node;
203 
204  my $node_intervals = $node->intervals;
205  if (scalar @{$node_intervals} > 1) {
206  # node with this key has more than this interval. Find it and remove
207  my $removed = 0;
208  foreach my $j (0 .. $#{$node_intervals}) {
209  if ($node_intervals->[$j]->start == $i->start and $node_intervals->[$j]->end == $i->end) {
210  splice @{$node_intervals}, $j, 1;
211  $removed = 1;
212  last;
213  }
214  }
215 
216  if ($removed) {
217  $removed = 0;
218 
219  # update max of node and its ancestors, if necessary
220  if ($i->end == $node->max) {
221  my $node_highest_end = $node->_highest_end;
222  if ($node->left and $node->right) {
223  $node->{max} = max $node->left->max, $node->rigth->max, $node_highest_end;
224  } elsif ($node->left and not $node->right) {
225  $node->{max} = max $node->left->max, $node_highest_end;
226  } elsif ($node->right and not $node->left) {
227  $node->{max} = max $node->right->max, $node_highest_end;
228  } else {
229  $node->{max} = $node_highest_end;
230  }
231 
232  $node->parent->_update_parents_max if $node->parent;
233  }
234 
235  $self->{_size}--;
236 
237  return 1;
238  } else {
239  return 0;
240  }
241  } elsif (scalar @{$node_intervals}) {
242  # node with this key has only this interval
243  # check if the remaining node's interval is the one we want to remove
244  if ($node_intervals->[0]->start == $i->start and $node_intervals->[0]->end == $i->end) {
245  # remove the whole node from the tree
246  if ($node->key == $self->root->key) {
247  # we're removing the root node
248  # create dummy node temporarily taking root's parent node
249  my $root_parent =
251  Bio::EnsEMBL::Utils::Interval->new($i->start, $i->end));
252  $root_parent->left($self->root);
253  $self->root->parent($root_parent);
254 
255  my $removed_node = $self->root->remove($node);
256  $self->root($root_parent->left);
257 
258  $self->root->parent(undef) if $self->root;
259  if ($removed_node) {
260  $removed_node = undef;
261  $self->{_size}--;
262 
263  return 1;
264  } else {
265  return 0;
266  }
267  } else {
268  my $removed_node = $self->root->remove($node);
269  if ($removed_node) {
270  $removed_node = undef;
271  $self->{_size}--;
272 
273  return 1;
274  } else {
275  return 0;
276  }
277  }
278  } else {
279  # the remaining record is not the one we want to remove
280  return 0;
281  }
282 
283  } else {
284  # no records at all in this node, shouldn't happen
285  croak "This should not happen";
286  }
287 }
288 
289 sub _in_order_traversal_delete {
290  my ($self, $node) = @_;
291 
292  return unless $node;
293 
294  $self->_in_order_traversal_delete($node->left);
295  $node->left(undef);
296  $self->_in_order_traversal_delete($node->right);
297  $node->right(undef);
298 
299  $node->parent(undef);
300 
301 }
302 
303 sub DESTROY {
304  my $self = shift;
305 
306  $self->_in_order_traversal_delete($self->root);
307 }
308 
309 1;
310 
Bio::EnsEMBL::Utils::Interval
Definition: Interval.pm:41
map
public map()
Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node
Definition: Node.pm:16
Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node::new
public new()
Bio::EnsEMBL::Utils::Tree::Interval::Mutable::PP
Definition: PP.pm:17
Bio::EnsEMBL::Utils::Interval::start
public Scalar start()
Bio::EnsEMBL::Utils::Tree::Interval::Mutable::PP::root
public Bio::EnsEMBL::Utils::Tree::Interval::Mutable::Node root()
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68