ensembl-hive  2.5
LOCAL.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 DESCRIPTION
8 
9  This is the 'Local' implementation of Meadow
10 
11 =head1 LICENSE
12 
13  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
14  Copyright [2016-2022] EMBL-European Bioinformatics Institute
15 
16  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
17  You may obtain a copy of the License at
18 
19  http://www.apache.org/licenses/LICENSE-2.0
20 
21  Unless required by applicable law or agreed to in writing, software distributed under the License
22  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23  See the License for the specific language governing permissions and limitations under the License.
24 
25 =head1 CONTACT
26 
27  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
28 
29 =cut
30 
31 
32 package Bio::EnsEMBL::Hive::Meadow::LOCAL;
33 
34 use strict;
35 use warnings;
36 use Cwd ('cwd');
37 use Bio::EnsEMBL::Hive::Utils ('split_for_bash');
38 
39 # --------------------------------------------------------------------------------------------------------------------
40 # <hack> What follows is a hack to extend the built-in exec() function that is called by Proc::Daemon .
41 # The extended version also understands an ARRAYref as valid input and turns it into a LIST.
42 # Thanks to this we can avoid calling an extra shell to interpret the command line being daemonized.
43 # --------------------------------------------------------------------------------------------------------------------
44 
45 BEGIN {
46  *Proc::Daemon::exec = sub {
47  return ( ref($_[0]) eq 'ARRAY' ) ? CORE::exec( @{$_[0]} ) : CORE::exec( @_ );
48  };
49 }
50 
51 use Proc::Daemon 0.23; # NB: this line absolutely must come after the BEGIN block that redefines exec(), or the trick will fail.
52 
53 # --------------------------------------------------------------------------------------------------------------------
54 # </hack>
55 # --------------------------------------------------------------------------------------------------------------------
56 
57 
58 use base ('Bio::EnsEMBL::Hive::Meadow');
59 
60 
61 our $VERSION = '5.0'; # Semantic version of the Meadow interface:
62  # change the Major version whenever an incompatible change is introduced,
63  # change the Minor version whenever the interface is extended, but compatibility is retained.
64 
65 
66 sub name { # also called to check for availability; for the moment assume LOCAL meadow is always available
67  my ($self) = @_;
68 
69  return (split(/\./, $self->get_current_hostname() ))[0]; # only take the first name
70 }
71 
72 
73 sub get_current_worker_process_id {
74  my ($self) = @_;
75 
76  return $$;
77 }
78 
79 sub deregister_local_process {} # Nothing to do
80 
81 sub _command_line_to_extract_all_running_workers {
82  my ($self) = @_;
83 
84  # Make sure we have excluded both 'awk' itself and commands like "less runWorker.pl" :
85  return q{ps ex -o state,user,pid,command -w -w | awk '((/runWorker.pl/ || /beekeeper.pl/) && ($4 ~ /perl[[:digit:].]*$/) )'};
86 }
87 
88 
89 sub status_of_all_our_workers { # returns an arrayref
90  my ($self) = @_;
91 
93  my $job_name_prefix = $self->job_name_prefix();
94 
95  my @status_list = ();
96  foreach my $line (`$cmd`) {
97  my ($pre_status, $meadow_user, $worker_pid, @job_name) = split(/\s+/, $line);
98 
99  my $status = {
100  'R' => 'RUN', # running
101 
102  'S' => 'RUN', # sleeping (sleeping for less than 20 sec on a Mac)
103  'I' => 'RUN', # Mac: idle (sleeping for more than 20 sec)
104 
105  'D' => 'RUN', # Linux: uninterruptible sleep, usually IO
106  'U' => 'RUN', # Mac: uninterruptible wait
107 
108  'T' => 'SSUSP' # stopped process
109  }->{ substr($pre_status,0,1) }; # only take the first character because of Mac's additional modifiers
110 
111  # Note: you can locally 'kill -19' a worker to suspend it and 'kill -18' a worker to resume it
112 
113  # Exclude workers from other pipelines
114  if (join(' ', @job_name) =~ / EHIVE_SUBMISSION_NAME=(\S+)/) {
115  unless ($1 =~ /^$job_name_prefix/) {
116  next;
117  }
118  }
119 
120  push @status_list, [$worker_pid, $meadow_user, $status];
121  }
122  return \@status_list;
123 }
124 
125 
126 sub check_worker_is_alive_and_mine {
127  my ($self, $worker) = @_;
128 
129  my $wpid = $worker->process_id();
130  my $is_alive_and_mine = kill 0, $wpid;
131 
132  return $is_alive_and_mine;
133 }
134 
135 
136 sub kill_worker {
137  my ($self, $worker, $fast) = @_;
138 
139  system('kill', '-9', $worker->process_id());
140 }
141 
142 
143 sub submit_workers_return_meadow_pids {
144  my ($self, $worker_cmd, $required_worker_count, $iteration, $rc_name, $rc_specific_submission_cmd_args, $submit_log_subdir) = @_;
145 
146  my $worker_cmd_components = [ split_for_bash($worker_cmd) ];
147 
148  my $job_name = $self->job_array_common_name($rc_name, $iteration);
149  $ENV{EHIVE_SUBMISSION_NAME} = $job_name;
150 
151  my @children_pids = ();
152 
153  print "Spawning [ ".$self->signature." ] x$required_worker_count \t\t$worker_cmd\n";
154 
155  foreach my $idx (1..$required_worker_count) {
156 
157  my $child_pid = Proc::Daemon::Init( {
158  $submit_log_subdir ? (
159  child_STDOUT => $submit_log_subdir . "/log_${iteration}_${rc_name}_${idx}_$$.out",
160  child_STDERR => $submit_log_subdir . "/log_${iteration}_${rc_name}_${idx}_$$.err",
161  ) : (), # both STD streams are sent to /dev/null by default
162  work_dir => cwd(),
163  exec_command => [ $worker_cmd_components ], # the AoA format is supported thanks to the BEGIN hack introduced in the beginning of this module.
164  } );
165 
166  push @children_pids, $child_pid;
167  }
168 
169  return \@children_pids;
170 }
171 
172 
173 sub run_on_host { # Overrides Meadow::run_on_host
174  my ($self, $meadow_host, $meadow_user, $command) = @_;
175  # We can assume the current host is $meadow_host and bypass ssh
176  return system(@$command);
177 }
178 
179 1;
protected _command_line_to_extract_all_running_workers()