ensembl-hive  2.6
Iterator.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2024] EMBL-European Bioinformatics Institute
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package Bio::EnsEMBL::Utils::Iterator;
21 
22 
23 =head1 CONTACT
24 
25  Please email comments or questions to the public Ensembl
26  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
27 
28  Questions may also be sent to the Ensembl help desk at
29  <http://www.ensembl.org/Help/Contact>.
30 
31 =cut
32 
33 =head1 NAME
34 
36 
37 =head1 SYNOPSIS
38 
39  my $variation_iterator =
40  $variation_adaptor->fetch_Iterator_by_VariationSet($1kg_set);
41 
42  while ( my $variation = $variation_iterator->next ) {
43  # operate on variation object
44  print $variation->name, "\n";
45  }
46 
47 =head1 DESCRIPTION
48 
49  Some adaptor methods may return more objects than can fit in memory at once, in these cases
50  you can fetch an iterator object instead of the usual array reference. The iterator object
51  allows you to iterate over the set of objects (using the next() method) without loading the
52  entire set into memory at once. You can tell if an iterator is exhausted with the has_next()
53  method. The peek() method allows you to fetch the next object from the iterator without
54  advancing the iterator - this is useful if you want to check some property of en element in
55  the set while leaving the iterator unchanged.
56 
57  You can filter and transform an iterator in an analogous way to using map and grep on arrays
58  using the provided map() and grep() methods. These methods return another iterator, and only
59  perform the filtering and transformation on each element as it is requested, so again these
60  can be used without loading the entire set into memory.
61 
62  Iterators can be combined together with the append() method which merges together the
63  iterator it is called on with the list of iterators passed in as arguments. This is
64  somewhat analogous to concatenating arrays with the push function. append() returns a new
65  iterator which iterates over each component iterator until it is exhausted before moving
66  on to the next iterator, in the order in which they are supplied to the method.
67 
68  An iterator can be converted to an array (reference) containing all the elements in the
69  set with the to_arrayref() method, but note that this array may consume a lot of memory if
70  the set the iterator is iterating over is large and it is recommended that you do not call
71  this method unless there is no way of working with each element at a time.
72 
73 =head1 METHODS
74 
75 =cut
76 
77 use strict;
78 use warnings;
79 
80 use Bio::EnsEMBL::Utils::Exception qw(throw);
81 
82 =head2 new
83 
84  Argument : either a coderef representing the iterator, in which case this
85  anonymous subroutine is assumed to return the next object in the
86  set when called and to return undef when the set is exhausted,
87  or an arrayref, in which case we return an iterator over this
88  array. If the argument is not defined then we return an 'empty'
89  iterator that immediately returns undef
90 
91  Example :
92 
93  my @dbIDs = fetch_relevant_dbIDs();
94 
95  my $iterator = Bio::EnsEMBL::Utils::Iterator->new(
96  sub { return $self->fetch_by_dbID(shift @dbIDs) }
97  );
98 
99  NB: this is a very simple example showing how to call the constructor
100  that would be rather inefficient in practice, real examples should
101  probably be smarter about batching up queries to minimise trips to
102  the database. See examples in the Variation API.
103 
104  Description: Constructor, creates a new iterator object
105  Returntype : Bio::EnsEMBL::Utils::Iterator instance
106  Exceptions : thrown if the supplied argument is not the expected
107  Caller : general
108  Status : Experimental
109 
110 =cut
111 
112 sub new {
113  my $class = shift;
114 
115  my $arg = shift;
116 
117  my $coderef;
118 
119  if (not defined $arg) {
120  # if the user doesn't supply an argument, we create a
121  # simple 'empty' iterator that immediately returns undef
122 
123  $coderef = sub { return undef };
124  }
125  elsif (ref $arg eq 'ARRAY') {
126  # if the user supplies an arrayref as an argument, we
127  # create an iterator over this array
128 
129  $coderef = sub { return shift @$arg };
130  }
131  elsif (ref $arg eq 'CODE'){
132  $coderef = $arg;
133  }
134  else {
135  throw("The supplied argument does not look like an arrayref or a coderef ".(ref $arg))
136  }
137 
138  my $self = {sub => $coderef};
139 
140  return bless $self, $class;
141 }
142 
143 
144 =head2 _fetch_next_value_if_needed
145 
146  Example : $iterator->_fetch_next_value_if_needed
147  Description: Helper method used to fill the internal buffer with the next value of the iterator
148  Returntype : none
149  Exceptions : none
150  Caller : general
151  Status : Experimental
152 
153 =cut
154 
155 sub _fetch_next_value_if_needed {
156  my $self = shift;
157 
158  unless (exists $self->{next}) {
159  $self->{next} = $self->{sub}->();
160  }
161 }
162 
163 
164 =head2 next
165 
166  Example : $obj = $iterator->next
167  Description: returns the next object from this iterator, or undef if the iterator is exhausted
168  Returntype : Object type will depend on what this iterator is iterating over
169  Exceptions : none
170  Caller : general
171  Status : Experimental
172 
173 =cut
174 
175 sub next {
176  my $self = shift;
177 
178  $self->_fetch_next_value_if_needed();
179 
180  if (defined $self->{next}) {
181  return delete $self->{next};
182  }
183  return;
184 }
185 
186 =head2 has_next
187 
188  Example : if ($iterator->has_next) { my $obj = $iterator->next }
189  Description: Boolean - true if this iterator has more elements to fetch, false when
190  it is exhausted
191  Returntype : boolean
192  Exceptions : none
193  Caller : general
194  Status : Experimental
195 
196 =cut
197 
198 sub has_next {
199  my $self = shift;
200 
201  $self->_fetch_next_value_if_needed();
202 
203  return defined $self->{next};
204 }
205 
206 =head2 peek
207 
208  Example : $obj = $iterator->peek
209  Description: returns the next object from this iterator, or undef if the iterator is exhausted,
210  much like next but does not advance the iterator (so the same object will be
211  returned on the following call to next or peek)
212  Returntype : Object type will depend on what this iterator is iterating over
213  Exceptions : none
214  Caller : general
215  Status : Experimental
216 
217 =cut
218 
219 sub peek {
220  my $self = shift;
221 
222  $self->_fetch_next_value_if_needed();
223 
224  return $self->{next};
225 }
226 
227 =head2 grep
228 
229  Example : my $filtered_iterator = $original_iterator->grep(sub {$_->name =~ /^rs/});
230  Description: filter this iterator, returning another iterator
231  Argument : a coderef which returns true if the element should be included in the
232  filtered set, or false if the element should be filtered out. $_ will be
233  set locally to each element in turn so you should be able to write a block
234  in a similar way as for the perl grep function (although it will need to be
235  preceded with the sub keyword). Otherwise you can pass in a reference to a
236  subroutine which expects a single argument with the same behaviour.
237  Returntype : Bio::EnsEMBL::Utils::Iterator
238  Exceptions : thrown if the argument is not a coderef
239  Caller : general
240  Status : Experimental
241 
242 =cut
243 
244 sub grep {
245  my ($self, $coderef) = @_;
246 
247  throw('Argument should be a coderef') unless ref $coderef eq 'CODE';
248 
249  return Bio::EnsEMBL::Utils::Iterator->new(sub {
250  while ($self->has_next) {
251  local $_ = $self->next;
252  return $_ if $coderef->($_);
253  }
254  return undef;
255  });
256 }
257 
258 =head2 map
259 
260  Example : my $transformed_iterator = $original_iterator->map(sub {$_->name});
261  Description: transform the elements of this iterator, returning another iterator
262  Argument : a coderef which returns the desired transformation of each element.
263  $_ will be set locally set to each original element in turn so you
264  should be able to write a block in a similar way as for the perl map
265  function (although it will need to be preceded with the sub keyword).
266  Otherwise you can pass in a reference to a subroutine which expects a
267  single argument with the same behaviour.
268  Returntype : Bio::EnsEMBL::Utils::Iterator
269  Exceptions : thrown if the argument is not a coderef
270  Caller : general
271  Status : Experimental
272 
273 =cut
274 
275 
276 sub map {
277  my ($self, $coderef) = @_;
278 
279  throw('Argument should be a coderef') unless ref $coderef eq 'CODE';
280 
281  return Bio::EnsEMBL::Utils::Iterator->new(sub {
282  local $_ = $self->next;
283  return defined $_ ? $coderef->($_) : undef;
284  });
285 }
286 
287 
288 =head2 each
289 
290  Example : $iterator->each(sub { print $_->name, "\n"; });
291  Description: Performs a full iteration of the current iterator instance.
292  Argument : a coderef which returns the desired transformation of each element.
293  $_ will be set locally set to each element.
294  Returntype : None
295  Exceptions : thrown if the argument is not a coderef
296  Caller : general
297  Status : Experimental
298 
299 =cut
300 
301 
302 sub each {
303  my ($self, $coderef) = @_;
304  throw('Argument should be a coderef') unless ref $coderef eq 'CODE';
305  while($self->has_next()) {
306  local $_ = $self->next();
307  $coderef->($_);
308  }
309  return;
310 }
311 
312 
313 =head2 to_arrayref
314 
315  Example : my $arrayref = $iterator->to_arrayref;
316  Description: return a reference to an array containing all elements from the
317  iterator. This is created by simply iterating over the iterator
318  until it is exhausted and adding each element in turn to an array.
319  Note that this may consume a lot of memory for iterators over
320  large collections
321  Returntype : arrayref
322  Exceptions : none
323  Caller : general
324  Status : Experimental
325 
326 =cut
327 
328 sub to_arrayref {
329  my ($self) = @_;
330 
331  my @array;
332 
333  while ($self->has_next) {
334  push @array, $self->next;
335  }
336 
337  return \@array;
338 }
339 
340 =head2 append
341 
342  Example : my $combined_iterator = $iterator1->append($iterator2, $iterator3);
343  Description: return a new iterator that combines this iterator with the others
344  passed as arguments, this new iterator will iterate over each
345  component iterator (in the order supplied here) until it is
346  exhausted and then move on to the next iterator until all are
347  exhausted
348  Argument : an array of Bio::EnsEMBL::Utils::Iterator objects
349  Returntype : Bio::EnsEMBL::Utils::Iterator
350  Exceptions : thrown if any of the arguments are not iterators
351  Caller : general
352  Status : Experimental
353 
354 =cut
355 
356 sub append {
357  my ($self, @queue) = @_;
358 
359  for my $iterator (@queue) {
360  throw("Argument to append doesn't look like an iterator")
361  unless UNIVERSAL::can($iterator, 'has_next') && UNIVERSAL::can($iterator, 'next');
362  }
363 
364  # push ourselves onto the front of the queue
365  unshift @queue, $self;
366 
367  return Bio::EnsEMBL::Utils::Iterator->new(sub {
368  # shift off any exhausted iterators
369  while (@queue && not $queue[0]->has_next) {
370  shift @queue;
371  }
372 
373  # and return the next object from the iterator at the
374  # head of the queue, or undef if the queue is empty
375  return @queue ? $queue[0]->next : undef;
376  });
377 }
378 
379 =head2 take
380 
381  Example : my $limited_iterator = $iterator->take(5);
382  Description: return a new iterator that only iterates over the
383  first n elements of this iterator
384  Argument : a positive integer
385  Returntype : Bio::EnsEMBL::Utils::Iterator
386  Exceptions : thrown if the argument is negative
387  Caller : general
388  Status : Experimental
389 
390 =cut
391 
392 sub take {
393  my ($self, $n) = @_;
394 
395  throw("Argument cannot be negative") if $n < 0;
396 
397  my $cnt = 0;
398 
399  return Bio::EnsEMBL::Utils::Iterator->new(sub {
400  return $cnt++ >= $n ? undef : $self->next;
401  });
402 }
403 
404 =head2 skip
405 
406  Example : my $limited_iterator = $iterator->skip(5);
407  Description: skip over the first n elements of this iterator (and then return
408  the same iterator for your method chaining convenience)
409  Argument : a positive integer
410  Returntype : Bio::EnsEMBL::Utils::Iterator
411  Exceptions : thrown if the argument is negative
412  Caller : general
413  Status : Experimental
414 
415 =cut
416 
417 sub skip {
418  my ($self, $n) = @_;
419 
420  throw("Argument cannot be negative") if $n < 0;
421 
422  $self->next for (0 .. $n-1);
423 
424  return $self;
425 }
426 
427 =head2 reduce
428 
429  Example : my $tot_length = $iterator->reduce(sub { $_[0] + $_[1]->length }, 0);
430  Description: reduce this iterator with the provided coderef, using the (optional)
431  second argument as the initial value of the accumulator
432  Argument[1]: a coderef that expects 2 arguments, the current accumulator
433  value and the next element in the set, and returns the next
434  accumulator value. Unless the optional second argument is
435  provided the first accumulator value passed in will be the
436  first element in the set
437  Argument[2]: (optional) an initial value to use for the accumulator instead
438  of the first value of the set
439  Returntype : returntype of the coderef
440  Exceptions : thrown if the argument is not a coderef
441  Caller : general
442  Status : Experimental
443 
444 =cut
445 
446 sub reduce {
447  my ($self, $coderef, $init_val) = @_;
448 
449  throw('Argument should be a coderef') unless ref $coderef eq 'CODE';
450 
451  my $result = defined $init_val ? $init_val : $self->next;
452 
453  while ($self->has_next) {
454  $result = $coderef->($result, $self->next);
455  }
456 
457  return $result;
458 }
459 
460 1;
461 
EnsEMBL
Definition: Filter.pm:1
Bio::EnsEMBL::Utils::Iterator::skip
public Bio::EnsEMBL::Utils::Iterator skip()
Bio::EnsEMBL::Utils::Iterator::next
public Object next()
map
public map()
Bio::EnsEMBL::Utils::Iterator::each
public void each()
Bio::EnsEMBL::Utils::Iterator
Definition: Iterator.pm:44
about
public about()
Bio::EnsEMBL::Utils::Iterator::_fetch_next_value_if_needed
protected void _fetch_next_value_if_needed()
Bio::EnsEMBL::Utils::Iterator::new
public Bio::EnsEMBL::Utils::Iterator new()
Bio::EnsEMBL::Utils::Iterator::take
public Bio::EnsEMBL::Utils::Iterator take()
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68