3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2024] EMBL-European Bioinformatics Institute
6 Licensed under the Apache License,
Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
28 package Bio::EnsEMBL::Hive::Utils::Collection;
36 my $self = bless {}, $class;
38 $self->
listref( shift @_ || [] );
48 $self->{
'_listref'} = shift @_;
50 return $self->{
'_listref'};
57 return @{ $self->listref };
63 my $candidate = shift @_;
65 foreach my $element (@{ $self->listref }) {
66 return 1
if($element eq $candidate);
75 push @{ $self->listref }, @_;
81 my $candidate = shift @_;
83 unless( $self->present( $candidate ) ) {
84 $self->add( $candidate );
91 my $candidate = shift @_;
93 my $listref = $self->listref;
95 for(my $i=scalar(@$listref)-1;$i>=0;$i--) {
96 if($listref->[$i] eq $candidate) {
97 splice @$listref, $i, 1;
104 my ($self, %method_to_filter_value) = @_;
106 ELEMENT:
foreach my $element (@{ $self->listref }) {
107 keys %method_to_filter_value; # sic! This is to
"rewind" the each%
operator to the beginning each time
108 while(my ($filter_name, $filter_value) = each %method_to_filter_value) {
109 my $actual_value = (ref($element) eq
'HASH') ? $element->{$filter_name} : $element->$filter_name();
110 next ELEMENT unless( defined($actual_value) # either both defined and equal or neither defined
111 ? defined($filter_value) && ( (ref($filter_value) eq
'CODE')
112 ? &$filter_value( $actual_value )
113 : ( $filter_value eq $actual_value )
115 : !defined($filter_value)
121 return undef; # have to be
explicit here to avoid surprises
126 my ($self, %method_to_filter_value) = @_;
128 my @filtered_elements = ();
130 ELEMENT:
foreach my $element (@{ $self->listref }) {
131 keys %method_to_filter_value; # sic! This is to
"rewind" the each%
operator to the beginning each time
132 while(my ($filter_name, $filter_value) = each %method_to_filter_value) {
133 my $actual_value = (ref($element) eq
'HASH') ? $element->{$filter_name} : $element->$filter_name();
134 next ELEMENT unless( defined($actual_value) # either both defined and equal or neither defined
135 ? defined($filter_value) && ( (ref($filter_value) eq
'CODE')
136 ? &$filter_value( $actual_value )
137 : ( $filter_value eq $actual_value )
139 : !defined($filter_value)
142 push @filtered_elements, $element;
145 return \@filtered_elements;
149 sub _find_all_by_subpattern { # subpatterns can be combined into full patterns
using +-,
150 my ($self, $pattern) = @_;
152 my $filtered_elements = [];
155 if( $pattern=~/^\d+$/ ) {
157 $filtered_elements = $self->find_all_by(
'dbID', $pattern );
159 } elsif( $pattern=~/^(\d+)\.\.(\d+)$/ ) {
161 $filtered_elements = $self->find_all_by(
'dbID', sub {
return $1<=$_[0] && $_[0]<=$2; } );
163 } elsif( $pattern=~/^(\d+)\.\.$/ ) {
165 $filtered_elements = $self->find_all_by(
'dbID', sub {
return $1<=$_[0]; } );
167 } elsif( $pattern=~/^\.\.(\d+)$/ ) {
169 $filtered_elements = $self->find_all_by(
'dbID', sub {
return $_[0]<=$1; } );
171 } elsif( $pattern=~/^\w+$/) {
173 $filtered_elements = $self->find_all_by(
'name', $pattern );
175 } elsif( $pattern=~/^[\w\%]+$/) {
178 $filtered_elements = $self->find_all_by(
'name', sub {
return $_[0]=~/^${pattern}$/; } );
180 } elsif( $pattern=~/^(\w+)==(.*)$/) {
182 $filtered_elements = $self->find_all_by( $1, $2 );
184 } elsif( $pattern=~/^(\w+)!=(.*)$/) {
186 $filtered_elements = $self->find_all_by( $1, sub {
return $_[0] ne $2; } );
188 } elsif( $pattern=~/^(\w+)<=(.*)$/) { # NB: the order is important - all digraphs should be parsed before their proper prefixes
190 $filtered_elements = $self->find_all_by( $1, sub {
return $_[0] <= $2; } );
192 } elsif( $pattern=~/^(\w+)>=(.*)$/) {
194 $filtered_elements = $self->find_all_by( $1, sub {
return $_[0] >= $2; } );
196 } elsif( $pattern=~/^(\w+)<(.*)$/) {
198 $filtered_elements = $self->find_all_by( $1, sub {
return $_[0] < $2; } );
200 } elsif( $pattern=~/^(\w+)>(.*)$/) {
202 $filtered_elements = $self->find_all_by( $1, sub {
return $_[0] > $2; } );
204 } elsif( $pattern=~/^(\w+)~(.*)$/) {
208 $pattern =~ s/\%/.*/g;
209 $filtered_elements = $self->find_all_by( $field, sub {
return $_[0 ]=~ /^(.*,)?${pattern}(,.*)?$/; } );
212 die
"The pattern '$pattern' is not recognized\n";
215 return $filtered_elements;
219 =head2 find_all_by_pattern
221 Arg [1] : (optional)
string $pattern
222 Example : my $first_fifteen_analyses_and_two_more = $collection->find_all_by_pattern(
'1..15,analysis_X,21' );
223 Example : my $two_open_ranges = $collection->>find_all_by_pattern(
'..7,10..' );
224 Example : my $double_exclusion = $collection->find_all_by_pattern(
'1..15-3..5+4' );
225 Example : my $blast_related_with_exceptions = $collection->find_all_by_pattern(
'blast%-12-%funnel' );
226 Description: Filters an arrayref of non-repeating objects from the given collection by interpreting a pattern.
227 The pattern can contain individual analyses_ids, individual logic_names,
228 open and closed ranges of analysis_ids, wildcard patterns of logic_names,
229 merges (+ or ,) and exclusions (-) of the above subsets.
231 Caller : both beekeeper.pl (for scheduling) and runWorker.pl (for specialization)
235 sub find_all_by_pattern {
236 my ($self, $pattern) = @_;
238 if( not defined($pattern) ) {
240 return [ $self->list ];
244 # By using the grouping, we ask Perl to return the pattern and their delimiters
245 my @syll = split(/([+\-,])/, $pattern);
247 my %uniq =
map { (
"$_" => $_) } @{ $self->_find_all_by_subpattern( shift @syll ) }; # initialize with the first syllable
250 my $operation = shift @syll; # by construction
this is one of [+-,]
251 my $subpattern = shift @syll; # can be an empty
string
253 foreach my $element (@{ $self->_find_all_by_subpattern( $subpattern ) }) {
254 if($operation eq
'-') {
255 delete $uniq{
"$element" };
257 $uniq{
"$element" } = $element;
262 return [ values %uniq ];
267 sub dark_collection { # contain another collection of objects marked
for deletion
271 $self->{
'_dark_collection'} = shift @_;
273 return $self->{
'_dark_collection'};
277 sub forget_and_mark_for_deletion {
279 my $candidate = shift @_;
281 $self->forget( $candidate );
283 unless( $self->dark_collection ) {
286 $self->dark_collection->add( $candidate );
293 $self->listref( [] );
294 $self->dark_collection( undef );