10 # Example of an import:
12 my $input_id_string = stringify($input_id_hash);
14 # Example of inheritance:
15 use base (
'Bio::EnsEMBL::Hive::Utils', ...);
16 my $input_id_string = $self->stringify($input_id_hash);
18 # Example of a direct call:
24 This module provides general utility functions that can be used in different contexts through three different calling mechanisms:
26 *
import: another module/script can selectively
import methods from
this module into its
namespace
28 * inheritance: another module can inherit from
this one and so implicitly acquire the methods into its
namespace
30 * direct call to a module
's method: another module/script can directly call a method from this module prefixed with this module's name
34 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
35 Copyright [2016-2024] EMBL-European Bioinformatics Institute
37 Licensed under the Apache License,
Version 2.0 (the
"License"); you may not use
this file except in compliance with the License.
38 You may obtain a copy of the License at
42 Unless required by applicable law or agreed to in writing, software distributed under the License
43 is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
44 See the License
for the specific language governing permissions and limitations under the License.
48 Please subscribe to the
Hive mailing list: http:
53 package Bio::EnsEMBL::Hive::Utils;
60 use Scalar::Util qw(looks_like_number);
61 #use Bio::EnsEMBL::Hive::DBSQL::DBConnection; # causes warnings that all exported functions have been redefined
63 use Exporter
'import';
64 our @EXPORT_OK = qw(stringify destringify dir_revhash parse_cmdline_options find_submodules load_file_or_module split_for_bash go_figure_dbc
throw join_command_args whoami timeout print_aligned_fields);
66 no warnings (
'once'); # otherwise the next line complains
about $Carp::Internal being used just once
67 $Carp::Internal{ (__PACKAGE__) }++;
72 Description: This
function takes in a Perl data structure and stringifies it
using specific configuration
73 that allows us to store/recreate
this data structure according to our specific storage/communication requirements.
74 NOTE: Some recursive structures are not stringified in a way that allows destringification with destringify
82 my $structure = pop @_;
84 local $Data::Dumper::Indent = 0; # we want everything on one line
85 local $Data::Dumper::Terse = 1; # and we want it without dummy variable names
86 local $Data::Dumper::Sortkeys = 1; # make stringification more deterministic
87 local $Data::Dumper::Quotekeys = 1; # conserve some space
88 local $Data::Dumper::Useqq = 1; # escape the \n and \t correctly
89 local $Data::Dumper::Pair =
' => '; # make sure we always produce Perl-parsable structures, no matter what is set externally
90 local $Data::Dumper::Maxdepth = 0; # make sure nobody can mess up stringification by setting a lower Maxdepth
91 local $Data::Dumper::Deepcopy = 1; # avoid
self-references in
case the same structure is reused within params
92 local $Data::Dumper::Sparseseen= 1; # optimized
"seen" hash of scalars
94 return Dumper($structure);
99 Description: This
function takes in a
string that may or may not contain a stingified Perl structure.
100 If it seems to contain a hash/array/quoted_string, the contents is evaluated, otherwise it is returned
"as is".
101 This
function is mainly used to read values from
'meta' table that may represent Perl structures, but generally don
't have to.
103 Callers : Bio::EnsEMBL::Hive::DBSQL::PipelineWideParametersAdaptor # destringification of general 'meta
' params
104 beekeeper.pl script # destringification of the 'pipeline_name
' meta param
115 or $value=~/^\[.*\]$/s
116 or looks_like_number($value) # Needed for pipeline_wide_parameters as each value is destringified independently and the JSON writer would otherwise force writing numbers as strings
117 or $value eq 'undef
') {
119 $value = eval($value);
128 Description: This function takes in a string (which is usually a numeric id) and turns its reverse into a multilevel directory hash.
129 Please note that no directory is created at this step - it is purely a string conversion function.
131 Callers : Bio::EnsEMBL::Hive::Worker # hashing of the worker output directories
132 Bio::EnsEMBL::Hive::RunnableDB::JobFactory # hashing of an arbitrary id
139 my @dirs = reverse(split(//, $id));
140 pop @dirs; # do not use the first digit for hashing
142 return join('/
', @dirs);
146 =head2 parse_cmdline_options
148 Description: This function reads all options from command line into a key-value hash
149 (keys must be prefixed with a single or double dash, the following term becomes the value).
150 The rest of the terms go into the list.
151 Command line options are not removed from @ARGV, so the same or another parser can be run again if needed.
157 sub parse_cmdline_options {
163 foreach my $arg (@ARGV) {
164 if($temp_key) { # only the value, get the key from buffer
165 $pairs{$temp_key} = destringify($arg);
167 } elsif($arg=~/^--?(\w+)=(.+)$/) { # both the key and the value
168 $pairs{$1} = destringify($2);
169 } elsif($arg=~/^--?(\w+)$/) { # only the key, buffer it and expect the value on the next round
175 return (\%pairs, \@list);
179 =head2 find_submodules
181 Description: This function takes one argument ("prefix" of a module name),
182 transforms it into a directory name from the filesystem's point of view
183 and finds all module names in these
"directories".
184 Each module_name found is reported only once,
185 even
if there are multiple matching files in different directories.
191 sub find_submodules {
192 my $prefix = shift @_;
196 my %seen_module_name = ();
198 foreach my $inc (@INC) {
199 foreach my $full_module_path (glob(
"$inc/$prefix/*.pm")) {
200 my $module_name = substr($full_module_path, length($inc)+1, -3); # remove leading
"$inc/" and trailing
'.pm'
201 $module_name=~s{/}{::}g; # transform back to module_name space
203 $seen_module_name{$module_name}++;
206 return [ keys %seen_module_name ];
210 =head2 load_file_or_module
212 Description: This
function takes one argument, tries to determine whether it is a module name (
'::'-separated)
213 or a path to the module (
'/'-separated), finds the module_name and dynamically loads it.
219 sub load_file_or_module {
220 my $file_or_module = pop @_;
224 if( $file_or_module=~/^(\w|::)+$/ ) {
226 $module_name = $file_or_module;
228 } elsif(-r $file_or_module) {
230 if(my $package_line = `grep ^package $file_or_module`) {
231 if($package_line=~/^\s*package\s+((?:\w|::)+)\s*;/) {
236 die
"Package line format in '$file_or_module' unrecognized:\n$package_line\n";
239 die
"Could not find the package definition line in '$file_or_module'\n";
243 die
"The parameter '$file_or_module' neither seems to be a valid module nor a valid readable file\n";
246 eval
"require $module_name;";
253 =head2 split_for_bash
255 Description: This
function takes one argument (String) and splits it assuming it represents bash command line parameters.
256 It mainly splits on whitespace, except
for cases when spaces are trapped between quotes or apostrophes.
257 In the latter
case the outer quotes are removed.
258 Returntype : list of Strings
267 if( defined($cmd) ) {
268 @cmd = ($cmd =~ /((?:
".*?"|
'.*?'|\S)+)/g); # split on space except
for quoted strings
270 foreach my $syll (@cmd) { # remove the outer quotes or apostrophes
271 if($syll=~/^(\S*?)
"(.*?)"(\S*?)$/) {
272 $syll = $1 . $2 . $3;
273 } elsif($syll=~/^(\S*?)
'(.*?)'(\S*?)$/) {
274 $syll = $1 . $2 . $3;
285 Description: This
function tries its best to build a DBConnection from $foo
286 It may need $reg_type
if $foo is a
Registry key and there are more than 1 DBAdaptors
for it
291 my ($foo, $reg_type) = @_; # NB: the second parameter is used by a Compara Runnable
295 # if(UNIVERSAL::isa($foo, 'Bio::EnsEMBL::DBSQL::DBConnection')) { # already a DBConnection, return it:
296 if ( ref($foo) =~ /DBConnection$/ ) { # already a DBConnection, hive-ify it and
return
297 return bless $foo,
'Bio::EnsEMBL::Hive::DBSQL::DBConnection';
299 # } elsif(UNIVERSAL::can($foo, 'dbc') and UNIVERSAL::isa($foo->dbc, 'Bio::EnsEMBL::DBSQL::DBConnection')) {
300 } elsif(UNIVERSAL::can($foo,
'dbc') and ref($foo->dbc) =~ /DBConnection$/) {
302 return bless $foo->dbc,
'Bio::EnsEMBL::Hive::DBSQL::DBConnection';
304 # } elsif(UNIVERSAL::can($foo, 'db') and UNIVERSAL::can($foo->db, 'dbc') and UNIVERSAL::isa($foo->db->dbc, 'Bio::EnsEMBL::DBSQL::DBConnection')) { # another data adaptor or Runnable:
305 } elsif(UNIVERSAL::can($foo,
'db') and UNIVERSAL::can($foo->db,
'dbc') and ref($foo->db->dbc) =~ /DBConnection$/) { # another data adaptor or Runnable:
307 return bless $foo->db->dbc,
'Bio::EnsEMBL::Hive::DBSQL::DBConnection';
309 } elsif(ref($foo) eq
'HASH') {
313 } elsif($foo =~ m{^(\w*):
318 unless(ref($foo)) { # maybe it is simply a registry key?
324 if($foo=~/^(\w+):(\w+)$/) {
325 ($reg_type, $foo) = ($1, $2);
333 if( scalar(@$dbas) == 1 ) {
336 die
"The registry contains multiple entries for '$foo', please prepend the reg_alias with the desired type";
341 if(UNIVERSAL::can($dba,
'dbc')) {
342 return bless $dba->dbc,
'Bio::EnsEMBL::Hive::DBSQL::DBConnection';
345 die
"Sorry, could not figure out how to make a DBConnection object out of '$foo'";
353 # TODO: newer versions of Carp are much more tunable, but I am stuck with v1.08 .
354 # Alternatively, we could implement our own stack reporter instead of Carp::confess.
359 =head2 join_command_args
361 Argument[0]: String or Arrayref of Strings
362 Description: Prepares the command to be executed by system(). It is needed
if the
363 command is in fact composed of multiple commands.
364 Returns: Tuple (
boolean,
string). The
boolean indicates whether it was needed to
365 join the arguments. The
string is the
new command-line
string.
366 PS: Shamelessly adapted from http:
370 my %shell_characters =
map {$_ => 1} qw(< > >> 2> 2>&1 | && || ;);
372 sub join_command_args {
374 return (0,$args) unless ref($args);
376 # system() can only spawn 1 process. For multiple commands piped
377 # together or if redirections are used, it needs a shell to parse
378 # a string representing the whole command
379 my $join_needed = (grep {$shell_characters{$_}} @$args) ? 1 : 0;
382 foreach my $a (@$args) {
383 if ($shell_characters{$a} or $a =~ /^[a-zA-Z0-9_\/\-]+\z/) {
386 # Escapes the single-quotes and protects the arguments
388 push @new_args,
"'$a'";
392 return ($join_needed,join(
' ', @new_args));
398 Description: Returns the name of the user who
's currently running Perl.
399 $ENV{'USER
'} is the most common source but it can be missing
400 so we also default to a builtin method.
405 return ($ENV{'USER
'} || (getpwuid($<))[0]);
411 Argument[0]: (coderef) Callback subroutine
412 Argument[1]: (integer) Time to wait (in seconds)
413 Description: Calls the callback whilst ensuring it does not take more than the allowed time to run.
414 Returns: The return value (scalar context) of the callback or -2 if the
415 command had to be aborted.
416 FIXME: may need a better mechanism that allows callbacks to return -2 too
421 my ($callback, $timeout) = @_;
423 return $callback->();
427 ## Adapted from the TimeLimit pacakge: http://www.perlmonks.org/?node_id=74429
428 my $die_text = "_____RunCommandTimeLimit_____\n";
429 my $old_alarm = alarm(0); # turn alarm off and read old value
431 local $SIG{ALRM} = 'IGNORE
'; # ignore alarms in this scope
435 local $SIG{__DIE__}; # turn die handler off in eval block
436 local $SIG{ALRM} = sub { die $die_text };
437 alarm($timeout); # set alarm
438 $ret = $callback->();
441 # Note the alarm is still active here - however we assume that
442 # if we got here without an alarm the user's code succeeded -
443 # hence the IGNOREing of alarms in this scope
445 alarm 0; # kill off alarm
448 alarm $old_alarm; # restore alarm
451 # the eval returned an error
452 die $@
if $@ ne $die_text;
459 =head2 print_aligned_fields
461 Argument[0]: Arrayref of key-value Hashrefs
462 Argument[1]: Template
string
463 Description: For each hashref the
template string will be interpolated (replacing
464 each key with its value) and printed, but making sure the same fields
465 are (right) aligned across all lines.
466 The interpolator searches
for C<%(key)> patterns and replaces them
467 with the value found in the hashref. The key name can be prefixed with
468 a dash to require a left alignment instead.
472 sub print_aligned_fields {
473 my $all_fields = shift;
474 my $template = shift;
476 return unless @$all_fields;
478 my @field_names = keys %{$all_fields->[0]};
482 # Get the width of each element
483 foreach my $line_fields (@$all_fields) {
484 # Remove the ANSI colour codes before getting the length
485 my %row_width =
map {my $s = $line_fields->{$_}; $s =~ s/\x1b\[[0-9;]*m
486 push @all_widths, \%row_width;
489 # Get the width of each field (across all lines)
490 foreach my $field_name (@field_names) {
491 $col_width{$field_name} = max(
map {$_->{$field_name}} @all_widths);
494 # Interpolate and print each line
495 foreach my $line_fields (@$all_fields) {
496 my $row_width = shift @all_widths;
497 my $line = $template;
498 $line =~ s/%\((-?)([a-zA-Z_]\w*)\)/
500 $line_fields->{$2} . (
' ' x ($col_width{$2}-$row_width->{$2}))
502 (
' ' x ($col_width{$2}-$row_width->{$2})) . $line_fields->{$2};