ensembl-hive  2.6
Accumulator.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 DESCRIPTION
8 
9  A data container object that defines parameters for accumulated dataflow.
10  This object is generated from specially designed datalow URLs.
11 
12 =head1 LICENSE
13 
14  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
15  Copyright [2016-2024] EMBL-European Bioinformatics Institute
16 
17  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
18  You may obtain a copy of the License at
19 
20  http://www.apache.org/licenses/LICENSE-2.0
21 
22  Unless required by applicable law or agreed to in writing, software distributed under the License
23  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24  See the License for the specific language governing permissions and limitations under the License.
25 
26 =head1 CONTACT
27 
28  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
29 
30 =cut
31 
32 
33 package Bio::EnsEMBL::Hive::Accumulator;
34 
35 use strict;
36 use warnings;
37 
38 use Bio::EnsEMBL::Hive::Utils ('stringify');
39 
40 use base ( 'Bio::EnsEMBL::Hive::Storable' );
41 
42 
43 sub unikey { # override the default from Cacheable parent
44  return [ 'accu_name', 'accu_address', 'accu_input_variable' ];
45 }
46 
47 
48 sub accu_name {
49  my $self = shift @_;
50 
51  if(@_) {
52  $self->{'_accu_name'} = shift @_;
53  }
54  return $self->{'_accu_name'};
55 }
56 
57 
58 sub accu_address {
59  my $self = shift @_;
60 
61  if(@_) {
62  $self->{'_accu_address'} = shift @_;
63  }
64  return ( $self->{'_accu_address'} // '' );
65 }
66 
67 
68 sub accu_input_variable {
69  my $self = shift @_;
70 
71  if(@_) {
72  $self->{'_accu_input_variable'} = shift @_;
73  }
74  return ( $self->{'_accu_input_variable'} // $self->accu_name );
75 }
76 
77 
78 sub url_query_params {
79  my ($self) = @_;
80 
81  return { # direct access to the actual (possibly missing) values
82  'accu_name' => $self->accu_name,
83  'accu_address' => $self->{'_accu_address'},
84  'accu_input_variable' => $self->{'_accu_input_variable'},
85  };
86 }
87 
88 
89 sub display_name {
90  my ($self) = @_;
91  return $self->accu_name
92  . $self->accu_address
93  . ':='
94  . $self->accu_input_variable;
95 }
96 
97 
98 sub dataflow {
99  my ( $self, $output_ids, $emitting_job ) = @_;
100 
101  if(my $receiving_semaphore = $emitting_job->controlled_semaphore) {
102 
103  my $sending_job_id = $emitting_job->dbID;
104  my $receiving_semaphore_id = $receiving_semaphore->dbID;
105  my $accu_adaptor = $receiving_semaphore->adaptor->db->get_AccumulatorAdaptor;
106 
107  my $accu_name = $self->accu_name;
108  my $accu_address = $self->accu_address;
109  my $accu_input_variable = $self->accu_input_variable;
110 
111  my @rows = ();
112 
113  foreach my $output_id (@$output_ids) {
114 
115  my $key_signature = $accu_address;
116  $key_signature=~s{(\w+)}{$emitting_job->_param_possibly_overridden($1,$output_id) // '' }eg;
117 
118  _check_empty_keys($key_signature, $accu_address);
119 
120  push @rows, {
121  'sending_job_id' => $sending_job_id,
122  'receiving_semaphore_id' => $receiving_semaphore_id,
123  'struct_name' => $accu_name,
124  'key_signature' => $key_signature,
125  'value' => stringify( $emitting_job->_param_possibly_overridden($accu_input_variable, $output_id) ),
126  };
127  }
128 
129  $accu_adaptor->store( \@rows );
130 
131  } else {
132  die "No controlled semaphore, cannot perform accumulated dataflow";
133  }
134 }
135 
136 =head2 _check_empty_keys
137 
138  Description: a private function that checks the $key_signature for empty
139  bracket pairs that weren't empty before
140 
141 =cut
142 
143 sub _check_empty_keys {
144  my ( $key_signature, $accu_address ) = @_;
145 
146  foreach my $pair ( ( ['{', '}'], ['[', ']'] ) ) {
147 
148  # verify that each empty pair of brackets in key_signature was also empty in accu_address
149  my $empty_in_key = _find_empty_brackets( $key_signature, $pair->[0], $pair->[1] );
150  my $empty_in_address = _find_empty_brackets( $accu_address, $pair->[0], $pair->[1] );
151  my %empty_in_address_idx = map { $_ => 1 } @$empty_in_address;
152 
153  foreach my $index (@$empty_in_key) {
154  if ( !exists( $empty_in_address_idx{$index} ) ) {
155  die "A key in the accumulator had an empty substitution. Bracket '"
156  . $pair->[0] . $pair->[1] .
157  "' pair number $index, substitution from '$accu_address' to '$key_signature'";
158  }
159  }
160  }
161 }
162 
163 =head2 _find_empty_brackets
164 
165  Description: a private function that finds and counts opening brackets in a
166  string
167  Returns: a ref to an array with an entry for each empty bracket pair. The
168  entry is the count of how many preceding opening brackets there are.
169 
170 =cut
171 
172 sub _find_empty_brackets {
173  my ( $string, $open, $close ) = @_;
174  my $count = 0;
175  my $result = [];
176 
177  # look for opening bracket
178  while ( $string =~ /\Q$open/g ) {
179  # count how many opening brackets we have
180  $count++;
181  if ( $string =~ /\G(?=$close)/ ) {
182  # store number of bracket if we find an empty pair (like {})
183  push( @$result, $count );
184  }
185  }
186  return $result;
187 }
188 
189 sub toString {
190  my $self = shift @_;
191 
192  return 'Accumulator(' . $self->display_name . ')';
193 }
194 
195 1;
Bio::EnsEMBL::Hive::Utils
Definition: Collection.pm:4
Bio::EnsEMBL::Hive::Version
Definition: Version.pm:19
Bio::EnsEMBL::Hive::Cacheable
Definition: Cacheable.pm:6
Bio::EnsEMBL::Hive::Accumulator
Definition: Accumulator.pm:11
Bio::EnsEMBL::Hive
Definition: Hive.pm:38