10 # Example of an import: 12 my $input_id_string = stringify($input_id_hash);
14 # Example of inheritance: 15 use base (
'Bio::EnsEMBL::Hive::Utils', ...);
16 my $input_id_string = $self->stringify($input_id_hash);
18 # Example of a direct call: 24 This module provides general utility functions that can be used in different contexts through three different calling mechanisms:
26 *
import: another module/script can selectively
import methods from
this module into its
namespace 28 * inheritance: another module can inherit from
this one and so implicitly acquire the methods into its
namespace 30 * direct call to a module
's method: another module/script can directly call a method from this module prefixed with this module's name
34 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
35 Copyright [2016-2022] EMBL-European Bioinformatics Institute
37 Licensed under the Apache License,
Version 2.0 (the
"License"); you may not use
this file except in compliance with the License.
38 You may obtain a copy of the License at
42 Unless required by applicable law or agreed to in writing, software distributed under the License
43 is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
44 See the License
for the specific language governing permissions and limitations under the License.
48 Please subscribe to the
Hive mailing list: http:
53 package Bio::EnsEMBL::Hive::Utils;
59 use Scalar::Util qw(looks_like_number);
60 #use Bio::EnsEMBL::Hive::DBSQL::DBConnection; # causes warnings that all exported functions have been redefined 62 use Exporter
'import';
63 our @EXPORT_OK = qw(stringify destringify dir_revhash parse_cmdline_options find_submodules load_file_or_module split_for_bash go_figure_dbc
throw join_command_args whoami timeout);
65 no warnings (
'once'); # otherwise the next line complains about $Carp::Internal being used just once
66 $Carp::Internal{ (__PACKAGE__) }++;
71 Description: This
function takes in a Perl data structure and stringifies it
using specific configuration
72 that allows us to store/recreate
this data structure according to our specific storage/communication requirements.
73 NOTE: Some recursive structures are not stringified in a way that allows destringification with destringify
81 my $structure = pop @_;
83 local $Data::Dumper::Indent = 0; # we want everything on one line
84 local $Data::Dumper::Terse = 1; # and we want it without dummy variable names
85 local $Data::Dumper::Sortkeys = 1; # make stringification more deterministic
86 local $Data::Dumper::Quotekeys = 1; # conserve some space
87 local $Data::Dumper::Useqq = 1; # escape the \n and \t correctly
88 local $Data::Dumper::Pair =
' => '; # make sure we always produce Perl-parsable structures, no matter what is
set externally
89 local $Data::Dumper::Maxdepth = 0; # make sure nobody can mess up stringification by setting a lower Maxdepth
90 local $Data::Dumper::Deepcopy = 1; # avoid
self-references in
case the same structure is reused within params
92 return Dumper($structure);
97 Description: This
function takes in a
string that may or may not contain a stingified Perl structure.
98 If it seems to contain a hash/array/quoted_string, the contents is evaluated, otherwise it is returned
"as is".
99 This
function is mainly used to read values from
'meta' table that may represent Perl structures, but generally don
't have to. 101 Callers : Bio::EnsEMBL::Hive::DBSQL::PipelineWideParametersAdaptor # destringification of general 'meta
' params 102 beekeeper.pl script # destringification of the 'pipeline_name
' meta param 113 or $value=~/^\[.*\]$/s 114 or looks_like_number($value) # Needed for pipeline_wide_parameters as each value is destringified independently and the JSON writer would otherwise force writing numbers as strings 115 or $value eq 'undef
') { 117 $value = eval($value); 126 Description: This function takes in a string (which is usually a numeric id) and turns its reverse into a multilevel directory hash. 127 Please note that no directory is created at this step - it is purely a string conversion function. 129 Callers : Bio::EnsEMBL::Hive::Worker # hashing of the worker output directories 130 Bio::EnsEMBL::Hive::RunnableDB::JobFactory # hashing of an arbitrary id 137 my @dirs = reverse(split(//, $id)); 138 pop @dirs; # do not use the first digit for hashing 140 return join('/
', @dirs); 144 =head2 parse_cmdline_options 146 Description: This function reads all options from command line into a key-value hash 147 (keys must be prefixed with a single or double dash, the following term becomes the value). 148 The rest of the terms go into the list. 149 Command line options are not removed from @ARGV, so the same or another parser can be run again if needed. 155 sub parse_cmdline_options { 161 foreach my $arg (@ARGV) { 162 if($temp_key) { # only the value, get the key from buffer 163 $pairs{$temp_key} = destringify($arg); 165 } elsif($arg=~/^--?(\w+)=(.+)$/) { # both the key and the value 166 $pairs{$1} = destringify($2); 167 } elsif($arg=~/^--?(\w+)$/) { # only the key, buffer it and expect the value on the next round 173 return (\%pairs, \@list); 177 =head2 find_submodules 179 Description: This function takes one argument ("prefix" of a module name), 180 transforms it into a directory name from the filesystem's point of view
181 and finds all module names in these
"directories".
182 Each module_name found is reported only once,
183 even
if there are multiple matching files in different directories.
189 sub find_submodules {
190 my $prefix = shift @_;
194 my %seen_module_name = ();
196 foreach my $inc (@INC) {
197 foreach my $full_module_path (glob(
"$inc/$prefix/*.pm")) {
198 my $module_name = substr($full_module_path, length($inc)+1, -3); #
remove leading
"$inc/" and trailing
'.pm' 199 $module_name=~s{/}{::}g; # transform back to module_name space
201 $seen_module_name{$module_name}++;
204 return [ keys %seen_module_name ];
208 =head2 load_file_or_module
210 Description: This
function takes one argument, tries to determine whether it is a module name (
'::'-separated)
211 or a path to the module (
'/'-separated), finds the module_name and dynamically loads it.
217 sub load_file_or_module {
218 my $file_or_module = pop @_;
222 if( $file_or_module=~/^(\w|::)+$/ ) {
224 $module_name = $file_or_module;
226 } elsif(-r $file_or_module) {
228 if(my $package_line = `grep ^package $file_or_module`) {
229 if($package_line=~/^\s*package\s+((?:\w|::)+)\s*;/) {
234 die
"Package line format in '$file_or_module' unrecognized:\n$package_line\n";
237 die
"Could not find the package definition line in '$file_or_module'\n";
241 die
"The parameter '$file_or_module' neither seems to be a valid module nor a valid readable file\n";
244 eval
"require $module_name;";
251 =head2 split_for_bash
253 Description: This
function takes one argument (String) and splits it assuming it represents bash command line parameters.
254 It mainly splits on whitespace, except
for cases when spaces are trapped between quotes or apostrophes.
255 In the latter
case the outer quotes are removed.
256 Returntype : list of Strings
265 if( defined($cmd) ) {
266 @cmd = ($cmd =~ /((?:
".*?"|
'.*?'|\S)+)/g); # split on space except
for quoted strings
268 foreach my $syll (@cmd) { #
remove the outer quotes or apostrophes
269 if($syll=~/^(\S*?)
"(.*?)"(\S*?)$/) {
270 $syll = $1 . $2 . $3;
271 } elsif($syll=~/^(\S*?)
'(.*?)'(\S*?)$/) {
272 $syll = $1 . $2 . $3;
283 Description: This
function tries its best to build a DBConnection from $foo
284 It may need $reg_type
if $foo is a Registry key and there are more than 1 DBAdaptors
for it
289 my ($foo, $reg_type) = @_; # NB: the second parameter is used by a Compara Runnable
293 # if(UNIVERSAL::isa($foo, 'Bio::EnsEMBL::DBSQL::DBConnection')) { # already a DBConnection, return it: 294 if ( ref($foo) =~ /DBConnection$/ ) { # already a DBConnection, hive-ify it and
return 295 return bless $foo,
'Bio::EnsEMBL::Hive::DBSQL::DBConnection';
297 # } elsif(UNIVERSAL::can($foo, 'dbc') and UNIVERSAL::isa($foo->dbc, 'Bio::EnsEMBL::DBSQL::DBConnection')) { 298 } elsif(UNIVERSAL::can($foo,
'dbc') and ref($foo->dbc) =~ /DBConnection$/) {
300 return bless $foo->dbc,
'Bio::EnsEMBL::Hive::DBSQL::DBConnection';
302 # } elsif(UNIVERSAL::can($foo, 'db') and UNIVERSAL::can($foo->db, 'dbc') and UNIVERSAL::isa($foo->db->dbc, 'Bio::EnsEMBL::DBSQL::DBConnection')) { # another data adaptor or Runnable: 303 } elsif(UNIVERSAL::can($foo,
'db') and UNIVERSAL::can($foo->db,
'dbc') and ref($foo->db->dbc) =~ /DBConnection$/) { # another data adaptor or Runnable:
305 return bless $foo->db->dbc,
'Bio::EnsEMBL::Hive::DBSQL::DBConnection';
307 } elsif(ref($foo) eq
'HASH') {
309 return Bio::EnsEMBL::Hive::DBSQL::DBConnection->
new( %$foo );
311 } elsif($foo =~ m{^(\w*):
313 return Bio::EnsEMBL::Hive::DBSQL::DBConnection->
new( -url => $foo );
316 unless(ref($foo)) { # maybe it is simply a registry key?
320 require Bio::EnsEMBL::Registry;
322 if($foo=~/^(\w+):(\w+)$/) {
323 ($reg_type, $foo) = ($1, $2);
327 $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($foo, $reg_type);
329 my $dbas = Bio::EnsEMBL::Registry->get_all_DBAdaptors(-species => $foo);
331 if( scalar(@$dbas) == 1 ) {
334 die
"The registry contains multiple entries for '$foo', please prepend the reg_alias with the desired type";
339 if(UNIVERSAL::can($dba,
'dbc')) {
340 return bless $dba->dbc,
'Bio::EnsEMBL::Hive::DBSQL::DBConnection';
343 die
"Sorry, could not figure out how to make a DBConnection object out of '$foo'";
351 # TODO: newer versions of Carp are much more tunable, but I am stuck with v1.08 . 352 # Alternatively, we could implement our own stack reporter instead of Carp::confess. 357 =head2 join_command_args
359 Argument[0]: String or Arrayref of Strings
360 Description: Prepares the command to be executed by system(). It is needed
if the
361 command is in fact composed of multiple commands.
362 Returns: Tuple (
boolean,
string). The
boolean indicates whether it was needed to
363 join the arguments. The
string is the
new command-line
string.
364 PS: Shamelessly adapted from http:
368 my %shell_characters = map {$_ => 1} qw(< > >> 2> 2>&1 | && || ;);
370 sub join_command_args {
372 return (0,$args) unless ref($args);
374 # system() can only spawn 1 process. For multiple commands piped 375 # together or if redirections are used, it needs a shell to parse 376 # a string representing the whole command 377 my $join_needed = (grep {$shell_characters{$_}} @$args) ? 1 : 0;
380 foreach my $a (@$args) {
381 if ($shell_characters{$a} or $a =~ /^[a-zA-Z0-9_\/\-]+\z/) {
384 # Escapes the single-quotes and protects the arguments 386 push @new_args,
"'$a'";
390 return ($join_needed,join(
' ', @new_args));
396 Description: Returns the name of the user who
's currently running Perl. 397 $ENV{'USER
'} is the most common source but it can be missing 398 so we also default to a builtin method. 403 return ($ENV{'USER
'} || (getpwuid($<))[0]); 409 Argument[0]: (coderef) Callback subroutine 410 Argument[1]: (integer) Time to wait (in seconds) 411 Description: Calls the callback whilst ensuring it does not take more than the allowed time to run. 412 Returns: The return value (scalar context) of the callback or -2 if the 413 command had to be aborted. 414 FIXME: may need a better mechanism that allows callbacks to return -2 too 419 my ($callback, $timeout) = @_; 421 return $callback->(); 425 ## Adapted from the TimeLimit pacakge: http://www.perlmonks.org/?node_id=74429 426 my $die_text = "_____RunCommandTimeLimit_____\n"; 427 my $old_alarm = alarm(0); # turn alarm off and read old value 429 local $SIG{ALRM} = 'IGNORE
'; # ignore alarms in this scope 433 local $SIG{__DIE__}; # turn die handler off in eval block 434 local $SIG{ALRM} = sub { die $die_text }; 435 alarm($timeout); # set alarm 436 $ret = $callback->(); 439 # Note the alarm is still active here - however we assume that 440 # if we got here without an alarm the user's code succeeded -
441 # hence the IGNOREing of alarms in this scope 443 alarm 0; # kill off alarm
446 alarm $old_alarm; # restore alarm
449 # the eval returned an error 450 die $@
if $@ ne $die_text;