ensembl-hive  2.6
Params.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 SYNOPSIS
8 
9 By inheriting from this module you make your module able to deal with parameters:
10 
11  1) parsing of parameters in the order of precedence, starting with the lowest:
12  #
13  ## general usage:
14  # $self->param_init( $lowest_precedence_hashref, $middle_precedence_hashref, $highest_precedence_hashref );
15  #
16  ## typical usage:
17  # $job->param_init(
18  # $runObj->param_defaults(), # module-wide built-in defaults have the lowest precedence (will always be the same for this module)
19  # $hive_pipeline->params_as_hash(), # then come the pipeline-wide parameters from the 'pipeline_wide_parameters' table (define things common to all analyses in this pipeline)
20  # $self->analysis->parameters(), # analysis-wide 'parameters' are even more specific (can be defined differently for several occurence of the same module)
21  # $job->input_id(), # job-specific 'input_id' parameters have the highest precedence
22  # $job->accu_hash(), # parameters accumulated and sent for this job by other preceding jobs
23  # );
24 
25 
26  2) reading a parameter's value
27  #
28  # my $source = $self->param('source'); )
29 
30  3) dynamically setting a parameter's value
31  #
32  # $self->param('binpath', '/software/ensembl/compara');
33  #
34  Note: It proved to be a convenient mechanism to exchange params
35  between fetch_input(), run(), write_output() and other methods.
36 
37 =head1 DESCRIPTION
38 
39  Most of Compara RunnableDB methods work under assumption
40  that both analysis.parameters and job.input_id fields contain a Perl-style parameter hashref as a string.
41 
42  This module implements a generic param() method that allows to set parameters according to the following parameter precedence rules:
43 
44  (1) Job-Specific parameters defined in job.input_id hash, they have the highest priority and override everything else.
45 
46  (2) Analysis-Wide parameters defined in analysis.parameters hash. Can be overridden by (1).
47 
48  (3) Pipeline-Wide parameters defined in the 'meta' table. Can be overridden by (1) and (2).
49 
50  (4) Module_Defaults that are hard-coded into modules have the lowest precedence. Can be overridden by (1), (2) and (3).
51 
52 
53  param_exists() returns 1 if the parameter is present and can be substituted,
54  undef if the substitution failed.
55  0 if the parameter is absent,
56  param_is_defined() returns 1 if the parameter is present and can be substituted to a defined value,
57  undef if the substitution fails,
58  0 otherwise.
59  param() returns the value if param_exists() returned true, undef otherwise.
60  param_required() is like param() but dies instead of returning undef.
61 
62  In practice. given this hash of parameters:
63  {
64  'a' => 3,
65  'b' => undef,
66  'c' => '#other#',
67  }
68  the Params API would return:
69 
70  | a b c d
71  -------------------+----------------------------
72  param_exists() | 1 1 undef 0
73  param_is_defined() | 1 0 undef 0
74  param() | 3 undef undef undef
75  param_required() | 3 (die) (die) (die)
76 
77 
78 =head1 LICENSE
79 
80  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
81  Copyright [2016-2024] EMBL-European Bioinformatics Institute
82 
83  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
84  You may obtain a copy of the License at
85 
86  http://www.apache.org/licenses/LICENSE-2.0
87 
88  Unless required by applicable law or agreed to in writing, software distributed under the License
89  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
90  See the License for the specific language governing permissions and limitations under the License.
91 
92 =head1 CONTACT
93 
94  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
95 
96 =cut
97 
98 
99 package Bio::EnsEMBL::Hive::Params;
100 
101 use strict;
102 use warnings;
103 
104 use List::Util qw(first min max minstr maxstr reduce sum shuffle); # make them available for substituted expressions
105 use Bio::EnsEMBL::Hive::Utils ('stringify', 'dir_revhash', 'go_figure_dbc', 'throw'); # NB: dir_revhash() is used by some substituted expressions, do not remove!
106 
107 
108 =head2 new
109 
110  Description: a trivial constructor, mostly for testing a Params object
111 
112 =cut
113 
114 sub new {
115  my $class = shift @_;
116 
117  return bless {}, $class;
118 }
119 
120 
121 =head2 fuse_param_hashes
122 
123  Description: Performs the actual task of evaluating and fusing/merging a preference list of parameter hashes into one parameter hash.
124 
125 =cut
126 
127 sub fuse_param_hashes {
128  my $self = shift @_; # NB: other parameters will be shifted off it
129 
130  my %fused_hash = ();
131 
132  foreach my $source (@_) {
133  if(ref($source) ne 'HASH') {
134  my $param_hash;
135  $param_hash = eval($source) if(defined($source));
136  $param_hash = {} if(!defined($param_hash));
137 
138  if($@) {
139  die "Could not evaluate '$source': $@\n";
140  } elsif(ref($param_hash) ne 'HASH') {
141  die "Expected a {'param'=>'value'} hashref, but got the following string instead: '$source'\n";
142  }
143  $source = $param_hash;
144  }
145  while(my ($k,$v) = each %$source ) {
146  $fused_hash{$k} = $v;
147  }
148  }
149 
150  return \%fused_hash;
151 }
152 
153 
154 =head2 param_init
155 
156  Description: Sets up the unsubstituted parameters in the right precedence order (called by AnalysisJob::load_parameters)
157 
158 =cut
159 
160 sub param_init {
161  my $self = shift @_; # NB: other parameters will be shifted off it
162 
163  $self->{'_unsubstituted_param_hash'} = $self->fuse_param_hashes( @_ );
164  $self->{'_param_hash'} = {};
165 }
166 
167 
168 sub _param_possibly_overridden {
169  my ($self, $param_name, $overriding_hash) = @_;
170 
171  return ( ( (ref($overriding_hash) eq 'HASH') && exists($overriding_hash->{ $param_name }) )
172  ? $overriding_hash->{ $param_name }
173  : $self->_param_silent($param_name)
174  );
175 }
176 
177 
178 sub _param_silent {
179  my $self = shift @_;
180  my $param_name = shift @_
181  or throw("ParamError: calling param() without arguments");
182 
183  if(@_) { # If there is a value (even if undef), then set it!
184  my $new_val = shift @_;
185  if (@_ and (shift)) {
186  # If there is an extra parameter after the value, it means that
187  # the value is unsubstituted
188  $self->{'_unsubstituted_param_hash'}{$param_name} = $new_val;
189  } else {
190  $self->{'_param_hash'}{$param_name} = $new_val;
191  }
192  } elsif( !exists( $self->{'_param_hash'}{$param_name}) ) {
193  if (exists( $self->{'_unsubstituted_param_hash'}{$param_name} ) ) {
194  my $ini_used_missing_param = $self->{'_used_missing_params'};
195  delete $self->{'_used_missing_params'};
196  my $unsubstituted = $self->{'_unsubstituted_param_hash'}{$param_name};
197  my $substituted = $self->param_substitute( $unsubstituted );
198  if (my $failed_dep = $self->{'_used_missing_params'}) {
199  delete $self->{'_used_missing_params'};
200  delete $self->{'_substitution_in_progress'};
201  die "ParamError: the evaluation of '$param_name' requires '$failed_dep' which is missing\n";
202  }
203  $self->{'_param_hash'}{$param_name} = $substituted;
204  $self->{'_used_missing_params'} = $ini_used_missing_param if $ini_used_missing_param;
205  } else {
206  $self->{'_used_missing_params'} = $param_name;
207  }
208  } else {
209  # The parameter has already been substituted
210  }
211 
212  return exists( $self->{'_param_hash'}{$param_name} )
213  ? $self->{'_param_hash'}{$param_name}
214  : undef;
215 }
216 
217 
218 =head2 param_required
219 
220  Arg [1] : string $param_name
221 
222  Description: A strict getter method for a job's parameter; will die if the parameter was not set or is undefined
223 
224  Example : my $source = $self->param_required('source');
225 
226  Returntype : any Perl structure or object that you dared to store
227 
228 =cut
229 
230 sub param_required {
231  my $self = shift @_;
232  my $param_name = shift @_;
233 
234  my $value = $self->_param_silent($param_name);
235 
236  return defined( $value )
237  ? $value
238  : die "ParamError: value for param_required('$param_name') is required and has to be defined\n";
239 }
240 
241 
242 =head2 param_exists
243 
244  Arg [1] : string $param_name
245 
246  Description: A predicate tester for whether the parameter has been initialized (even to undef)
247 
248  Example : if( $self->param_exists('source') ) { print "'source' exists\n"; } else { print "never heard of 'source'\n"; }
249 
250  Returntype : boolean
251 
252 =cut
253 
254 sub param_exists {
255  my $self = shift @_;
256  my $param_name = shift @_;
257 
258  $self->_param_silent($param_name);
259  if (exists( $self->{'_param_hash'}{$param_name} )) {
260  return 1;
261  } elsif (exists( $self->{'_unsubstituted_param_hash'}{$param_name} )) {
262  # In this case, the substitution failed
263  return undef;
264  } else {
265  return 0;
266  }
267 }
268 
269 =head2 param_is_defined
270 
271  Arg [1] : string $param_name
272 
273  Description: A predicate tester for definedness of a parameter
274 
275  Example : if( $self->param_is_defined('source') ) { print "defined, possibly zero"; } else { print "undefined"; }
276 
277  Returntype : boolean
278 
279 =cut
280 
281 sub param_is_defined {
282  my $self = shift @_;
283  my $param_name = shift @_;
284 
285  my $value = $self->_param_silent($param_name);
286  if (exists( $self->{'_param_hash'}{$param_name} )) {
287  return (defined $value ? 1 : 0);
288  } elsif (exists( $self->{'_unsubstituted_param_hash'}{$param_name} )) {
289  # In this case, the substitution failed
290  return undef;
291  } else {
292  return 0;
293  }
294 }
295 
296 
297 =head2 param
298 
299  Arg [1] : string $param_name
300 
301  Arg [2] : (optional) $param_value
302 
303  Arg [3] : (optional) $value_needs_substitution (in case you want to define a parameter with '#other_param#' and let the system compute its true value later)
304 
305  Description: A getter/setter method for a job's parameters that are initialized through multiple levels of precedence (see param_init() )
306 
307  Example 1 : my $source = $self->param('source'); # acting as a getter
308 
309  Example 2 : $self->param('binpath', '/software/ensembl/compara'); # acting as a setter
310 
311  Returntype : any Perl structure or object that you dared to store
312 
313 =cut
314 
315 sub param {
316  my $self = shift @_;
317  my $param_name = shift @_
318  or throw("ParamError: calling param() without arguments");
319 
320  my $value = $self->_param_silent( $param_name, @_ );
321 
322  unless( exists( $self->{'_param_hash'}{$param_name} )) {
323  warn "ParamWarning: value for param('$param_name') is used before having been initialized!\n";
324  }
325 
326  return $value;
327 }
328 
329 
330 =head2 param_substitute
331 
332  Arg [1] : Perl structure $string_with_templates
333 
334  Description: Performs parameter substitution on strings that contain templates like " #param_name# followed by #another_param_name# " .
335 
336  Returntype : *another* Perl structure with matching topology (may be more complex as a result of substituting a substructure for a term)
337 
338 =cut
339 
340 sub param_substitute {
341  my ($self, $structure, $overriding_hash) = @_;
342 
343  my $ref_type = ref($structure);
344 
345  if(!$ref_type) {
346 
347  if(!$structure) {
348 
349  return $structure;
350 
351  } elsif($structure=~/^(?:#(expr\(.+?\)expr|[\w:]+)#)$/) { # if the given string is one complete substitution, we don't want to force the output into a string
352 
353  return $self->_subst_one_hashpair($1, $overriding_hash);
354 
355  } else {
356  my $scalar_defined = 1;
357 
358  $structure=~s/(?:#(expr\(.+?\)expr|[\w:]+)#)/my $value = $self->_subst_one_hashpair($1, $overriding_hash); $scalar_defined &&= defined($value); $value/eg;
359 
360  return $scalar_defined ? $structure : undef;
361  }
362 
363  } elsif($ref_type eq 'ARRAY') {
364  my @substituted_array = ();
365  foreach my $element (@$structure) {
366  push @substituted_array, $self->param_substitute($element, $overriding_hash);
367  }
368  return \@substituted_array;
369  } elsif($ref_type eq 'HASH') {
370  my %substituted_hash = ();
371  while(my($key,$value) = each %$structure) {
372  $substituted_hash{$self->param_substitute($key, $overriding_hash)} = $self->param_substitute($value, $overriding_hash);
373  }
374  return \%substituted_hash;
375  } else {
376  warn "Could not substitute parameters in '$structure' - unsupported data type '$ref_type'\n";
377  return $structure;
378  }
379 }
380 
381 
382 sub mysql_conn { # an example stringification formatter (others can be defined here or in a descendent of Params)
383  my ($self, $db_conn) = @_;
384 
385  if(ref($db_conn) eq 'HASH') {
386  return "--host=$db_conn->{-host} --port=$db_conn->{-port} --user='$db_conn->{-user}' --password='$db_conn->{-pass}' $db_conn->{-dbname}";
387  } else {
388  my $dbc = go_figure_dbc( $db_conn );
389  return '--host='.$dbc->host.' --port='.$dbc->port." --user='".$dbc->username."' --password='".$dbc->password."' ".$dbc->dbname;
390  }
391 }
392 
393 sub mysql_dbname { # another example stringification formatter
394  my ($self, $db_conn) = @_;
395 
396  if(ref($db_conn) eq 'HASH') {
397  return $db_conn->{-dbname};
398  } else {
399  my $dbc = go_figure_dbc( $db_conn );
400  return $dbc->dbname;
401  }
402 }
403 
404 sub csvq { # another example stringification formatter
405  my ($self, $list) = @_;
406 
407  return join(',', map { "'$_'" } @$list);
408 }
409 
410 #--------------------------------------------[private methods]----------------------------------------------
411 
412 =head2 _subst_one_hashpair
413 
414  Description: this is a private method that performs one substitution. Called by param_substitute().
415 
416 =cut
417 
418 sub _subst_one_hashpair {
419  my ($self, $inside_hashes, $overriding_hash) = @_;
420 
421  if($self->{'_substitution_in_progress'}{$inside_hashes}++) {
422  die "ParamError: substitution loop among {".join(', ', map {"'$_'"} keys %{$self->{'_substitution_in_progress'}})."} has been detected\n";
423  }
424 
425  my $value;
426 
427  # FIXME does not allow substitution of parameters names that have non-alphanumeric characters
428  if($inside_hashes=~/^\w+$/) {
429 
430  $value = $self->_param_possibly_overridden($inside_hashes, $overriding_hash);
431 
432  } elsif($inside_hashes=~/^(\w+):(\w+)$/) {
433 
434  $value = $self->$1($self->_param_possibly_overridden($2, $overriding_hash));
435 
436  } elsif($inside_hashes=~/^expr\((.*)\)expr$/) {
437 
438  my $expression = $1;
439 
440  if($expression=~/\$\w+/) {
441  warn "ParamWarning: possibly using old substitution syntax in expression '$expression'; please use new syntax '#alpha#' instead of old '\$alpha'.\n";
442  }
443 
444  $expression=~s{(?:#(\w+)#)}{\$self->_param_possibly_overridden('$1', \$overriding_hash)}g;
445 
446  $value = eval "return ($expression)"; # NB: 'return' is needed to protect the hashrefs from being interpreted as scoping blocks
447  # and parentheses are needed because return binds stronger than 'and' and 'or'
448 
449  if ($@) {
450  delete $self->{'_substitution_in_progress'}{$inside_hashes}; # to allow re-entering the sub
451  die $@ if $@ =~ /^ParamError/; # re-raise the underlying Param error
452  die "ParamError: Cannot evaluate the expression: '$inside_hashes' ==> '$expression'\n$@";
453  }
454  }
455 
456  delete $self->{'_substitution_in_progress'}{$inside_hashes};
457  return $value;
458 }
459 
460 1;
Bio::EnsEMBL::Hive::Utils
Definition: Collection.pm:4
usage
public usage()
Bio::EnsEMBL::Hive::Params
Definition: Params.pm:80
Bio::EnsEMBL::Hive::Version
Definition: Version.pm:19
Bio::EnsEMBL::Hive::Params::fuse_param_hashes
public fuse_param_hashes()
run
public run()
Bio::EnsEMBL::Hive
Definition: Hive.pm:38