ensembl-hive  2.7.0
LOCAL.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 DESCRIPTION
8 
9  This is the 'Local' implementation of Meadow
10 
11 =head1 LICENSE
12 
13  See the NOTICE file distributed with this work for additional information
14  regarding copyright ownership.
15 
16  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
17  You may obtain a copy of the License at
18 
19  http://www.apache.org/licenses/LICENSE-2.0
20 
21  Unless required by applicable law or agreed to in writing, software distributed under the License
22  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23  See the License for the specific language governing permissions and limitations under the License.
24 
25 =head1 CONTACT
26 
27  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
28 
29 =cut
30 
31 
32 package Bio::EnsEMBL::Hive::Meadow::LOCAL;
33 
34 use strict;
35 use warnings;
36 use Cwd ('cwd');
37 use Bio::EnsEMBL::Hive::Utils ('split_for_bash');
38 
39 # --------------------------------------------------------------------------------------------------------------------
40 # <hack> What follows is a hack to extend the built-in exec() function that is called by Proc::Daemon .
41 # The extended version also understands an ARRAYref as valid input and turns it into a LIST.
42 # Thanks to this we can avoid calling an extra shell to interpret the command line being daemonized.
43 # --------------------------------------------------------------------------------------------------------------------
44 
45 BEGIN {
46  *Proc::Daemon::exec = sub {
47  return ( ref($_[0]) eq 'ARRAY' ) ? CORE::exec( @{$_[0]} ) : CORE::exec( @_ );
48  };
49 }
50 
51 use Proc::Daemon 0.23; # NB: this line absolutely must come after the BEGIN block that redefines exec(), or the trick will fail.
52 
53 # --------------------------------------------------------------------------------------------------------------------
54 # </hack>
55 # --------------------------------------------------------------------------------------------------------------------
56 
57 
58 use base ('Bio::EnsEMBL::Hive::Meadow');
59 
60 
61 our $VERSION = '5.0'; # Semantic version of the Meadow interface:
62  # change the Major version whenever an incompatible change is introduced,
63  # change the Minor version whenever the interface is extended, but compatibility is retained.
64 
65 
66 sub name { # also called to check for availability; for the moment assume LOCAL meadow is always available
67  my ($self) = @_;
68 
69  return (split(/\./, $self->get_current_hostname() ))[0]; # only take the first name
70 }
71 
72 
73 sub get_current_worker_process_id {
74  my ($self) = @_;
75 
76  return $$;
77 }
78 
79 sub deregister_local_process {} # Nothing to do
80 
81 sub _command_line_to_extract_all_running_workers {
82  my ($self) = @_;
83 
84  # Make sure we have excluded both 'awk' itself and commands like "less runWorker.pl" :
85  return q{ps ex -o state,user,pid,command -w -w | awk '((/runWorker.pl/ || /beekeeper.pl/) && ($4 ~ /perl[[:digit:].]*$/) )'};
86 }
87 
88 
89 sub status_of_all_our_workers { # returns an arrayref
90  my ($self) = @_;
91 
93  my $job_name_prefix = $self->job_name_prefix();
94 
95  my @status_list = ();
96  foreach my $line (`$cmd`) {
97  my ($pre_status, $meadow_user, $worker_pid, @job_name) = split(/\s+/, $line);
98 
99  my $status = {
100  'R' => 'RUN', # running
101 
102  'S' => 'RUN', # sleeping (sleeping for less than 20 sec on a Mac)
103  'I' => 'RUN', # Mac: idle (sleeping for more than 20 sec)
104 
105  'D' => 'RUN', # Linux: uninterruptible sleep, usually IO
106  'U' => 'RUN', # Mac: uninterruptible wait
107 
108  'T' => 'SSUSP' # stopped process
109  }->{ substr($pre_status,0,1) }; # only take the first character because of Mac's additional modifiers
110 
111  # Note: you can locally 'kill -19' a worker to suspend it and 'kill -18' a worker to resume it
112 
113  # Exclude workers from other pipelines
114  if (join(' ', @job_name) =~ / EHIVE_SUBMISSION_NAME=(\S+)/) {
115  unless ($1 =~ /^$job_name_prefix/) {
116  next;
117  }
118  }
119 
120  push @status_list, [$worker_pid, $meadow_user, $status];
121  }
122  return \@status_list;
123 }
124 
125 
126 sub check_worker_is_alive_and_mine {
127  my ($self, $worker) = @_;
128 
129  my $wpid = $worker->process_id();
130  my $is_alive_and_mine = kill 0, $wpid;
131 
132  return $is_alive_and_mine;
133 }
134 
135 
136 sub kill_worker {
137  my ($self, $worker, $fast) = @_;
138 
139  my $exec_status = system('kill', '-9', $worker->process_id());
140  return ( $exec_status >> 8 );
141 }
142 
143 
144 sub submit_workers_return_meadow_pids {
145  my ($self, $worker_cmd, $required_worker_count, $iteration, $rc_name, $rc_specific_submission_cmd_args, $submit_log_subdir) = @_;
146 
147  my $worker_cmd_components = [ split_for_bash($worker_cmd) ];
148 
149  my $job_name = $self->job_array_common_name($rc_name, $iteration);
150  $ENV{EHIVE_SUBMISSION_NAME} = $job_name;
151 
152  my @children_pids = ();
153 
154  print "Spawning [ ".$self->signature." ] x$required_worker_count \t\t$worker_cmd\n";
155 
156  foreach my $idx (1..$required_worker_count) {
157 
158  my $child_pid = Proc::Daemon::Init( {
159  $submit_log_subdir ? (
160  child_STDOUT => $submit_log_subdir . "/log_${iteration}_${rc_name}_${idx}_$$.out",
161  child_STDERR => $submit_log_subdir . "/log_${iteration}_${rc_name}_${idx}_$$.err",
162  ) : (), # both STD streams are sent to /dev/null by default
163  work_dir => cwd(),
164  exec_command => [ $worker_cmd_components ], # the AoA format is supported thanks to the BEGIN hack introduced in the beginning of this module.
165  } );
166 
167  push @children_pids, $child_pid;
168  }
169 
170  return \@children_pids;
171 }
172 
173 
174 sub run_on_host { # Overrides Meadow::run_on_host
175  my ($self, $meadow_host, $meadow_user, $command) = @_;
176  # We can assume the current host is $meadow_host and bypass ssh
177  return system(@$command);
178 }
179 
180 1;
Bio::EnsEMBL::Hive::Utils
Definition: Collection.pm:4
Bio::EnsEMBL::Hive::Version
Definition: Version.pm:19
Bio::EnsEMBL::Hive::Meadow
Definition: Meadow.pm:12
Bio::EnsEMBL::Hive::Meadow::LOCAL::_command_line_to_extract_all_running_workers
protected _command_line_to_extract_all_running_workers()
BEGIN
public BEGIN()
Bio::EnsEMBL::Hive
Definition: Hive.pm:38
Bio::EnsEMBL::Hive::Meadow::LOCAL
Definition: LOCAL.pm:13