ensembl-hive  2.5
Meadow.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 DESCRIPTION
8 
9  Meadow is an abstract interface to the queue manager.
10 
11  A Meadow knows how to check&change the actual status of Workers on the farm.
12 
13 =head1 LICENSE
14 
15  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
16  Copyright [2016-2022] EMBL-European Bioinformatics Institute
17 
18  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
19  You may obtain a copy of the License at
20 
21  http://www.apache.org/licenses/LICENSE-2.0
22 
23  Unless required by applicable law or agreed to in writing, software distributed under the License
24  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  See the License for the specific language governing permissions and limitations under the License.
26 
27 =head1 CONTACT
28 
29  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
30 
31 =cut
32 
33 
34 package Bio::EnsEMBL::Hive::Meadow;
35 
36 use strict;
37 use warnings;
38 use Sys::Hostname ('hostname');
39 
40 use base ('Bio::EnsEMBL::Hive::Configurable');
41 
42 
43 # -------------------------------------- <versioning of the Meadow interface> -------------------------------------------------------
44 
45 our $MEADOW_MAJOR_VERSION = '5'; # Make sure you change this number whenever an incompatible change is introduced
46 
47 
48 sub get_meadow_major_version {
49 
50  return $MEADOW_MAJOR_VERSION; # fetch the declared $MEADOW_MAJOR_VERSION of the interface
51 }
52 
53 
54 sub get_meadow_version {
55  my $self = shift @_;
56 
57  return $self->VERSION // 'unversioned'; # fetch the declared $VERSION of a specific Meadow implementation
58 }
59 
60 
61 sub check_version_compatibility {
62  my $self = shift @_;
63 
64  my $mmv = $self->get_meadow_major_version();
65  my $mv = $self->get_meadow_version();
66 # warn "$self : MVC='$mmv', MV='$mv'\n";
67 
68  return ($mv=~/^$mmv\./) ? 1 : 0;
69 }
70 
71 # -------------------------------------- </versioning of the Meadow interface> ------------------------------------------------------
72 
73 
74 =head2 new
75 
76  Title : new (constructor)
77  Function: Instantiates a new Meadow object
78 
79 =cut
80 
81 sub new {
82  my ($class, $config, $name) = @_;
83 
84  my $self = bless {}, $class;
85 
86  $self->{'_name'} = $name; # Record the name given to avoid querying the meadow once more
87  $self->_init_meadow($config);
88 
89  return $self;
90 }
91 
92 
93 sub _init_meadow {
94  my ($self, $config) = @_;
95 
96  $self->config( $config );
97  $self->context( [ 'Meadow', $self->type, $self->cached_name ] );
98 }
99 
100 
101 =head2 cached_name
102 
103  Title : cached_name
104  Function: Wrapper around L<name()> that caches its return value.
105  This is because (1) it can be expensive to get the name
106  (e.g. calling an external command), and (2) the name of a
107  Meadow is not expected to change through the life of the
108  agent.
109 
110 =cut
111 
112 sub cached_name {
113  my ($self) = @_;
114 
115  my $name;
116 
117  unless( ref($self) and $name = $self->{'_name'} ) { # unless the name was storable AND stored in the object
118 
119  if($name = $self->name() and ref($self) ) { # ... get the not-yet-stored name and if it is storable
120  $self->{'_name'} = $name; # ... ... then store it in the object
121  }
122  }
123 
124  return $name;
125 }
126 
127 
128 =head2 type
129 
130  Title : type
131  Function: The "type" of a Meadow is basically its job management
132  system. eHive comes with two Meadows: Platform LSF (type
133  "LSF"), and a default fork()-based (type "LOCAL"). Other
134  meadows can be implemented provided that they follow the
135  right interface.
136 
137 =cut
138 
139 sub type {
140  my $class = shift @_;
141 
142  $class = ref($class) if(ref($class));
143 
144  return (reverse split(/::/, $class ))[0];
145 }
146 
147 
148 =head2 get_current_hostname
149 
150  Title : get_current_hostname
151  Function: Returns the "current" hostname (most UNIX-based Meadows will simply use this base method)
152 
153 =cut
154 
155 sub get_current_hostname {
156  return hostname();
157 }
158 
159 
160 =head2 signature
161 
162  Title : signature
163  Function: The "signature" of a Meadow is its unique identifier across
164  the Valley.
165 
166 =cut
167 
168 sub signature {
169  my $self = shift @_;
170 
171  return $self->type.'/'.$self->cached_name;
172 }
173 
174 
175 =head2 pipeline_name
176 
177  Title : pipeline_name
178  Function: Getter/setter for the name of the current pipeline.
179  This method is used by other Meadow methods such as
180  L<job_name_prefix()>.
181 
182 =cut
183 
184 sub pipeline_name {
185  my $self = shift @_;
186 
187  if(@_) { # new value is being set (which can be undef)
188  $self->{'_pipeline_name'} = shift @_;
189  }
190  return $self->{'_pipeline_name'};
191 }
192 
193 
194 =head2 runWorker_path
195 
196  Title : runWorker_path
197  Function: Getter for the path to runWorker.pl
198  This is now set in the JSON config file. When missing or set to null,
199  defaults to $EHIVE_ROOT_DIR/scripts
200 
201 =cut
202 
203 sub runWorker_path {
204  my $self = shift @_;
205 
206  my $path = $self->config_get('RunWorkerPath') // $ENV{'EHIVE_ROOT_DIR'}.'/scripts/';
207  if ( length($path) ) {
208  $path = $path . '/' unless $path =~ /\/$/; # add "/" as suffix if user forgot
209  }
210  return $path;
211 }
212 
213 
214 =head2 job_name_prefix
215 
216  Title : job_name_prefix
217  Function: Tells how the agents (workers) should be generally named. It
218  is used to name new agents, and to find our own agents.
219 
220 =cut
221 
222 sub job_name_prefix {
223  my $self = shift @_;
224 
225  return ($self->pipeline_name() ? $self->pipeline_name().'-' : '') . 'Hive-';
226 }
227 
228 
229 =head2 job_array_common_name
230 
231  Title : job_array_common_name
232  Function: More specific version of L<job_name_prefix()> that returns
233  the actual name that agents should have at a specific
234  beekeeper loop.
235 
236 =cut
237 
238 sub job_array_common_name {
239  my ($self, $rc_name, $iteration) = @_;
240 
241  return $self->job_name_prefix() ."${rc_name}-${iteration}";
242 }
243 
244 
245 ##
246 ## The methods below must be reimplemented in a sub-class. See Meadow/LOCAL and Meadow/LSF
247 ##
248 
249 =head2 name
250 
251  Title : name
252  Function: Returns the name of the Meadow (which excludes the Meadow type)
253 
254 =cut
255 
256 sub name {
257  my ($self) = @_;
258 
259  die "Please use a derived method";
260 }
261 
262 
263 =head2 get_current_worker_process_id
264 
265  Title : get_current_worker_process_id
266  Function: Called by a worker to find its process_id. At any point in
267  time, the triple (meadow_type, meadow_name, process_id)
268  should be unique
269 
270 =cut
271 
272 sub get_current_worker_process_id {
273  my ($self) = @_;
274 
275  die "Please use a derived method";
276 }
277 
278 
279 =head2 status_of_all_our_workers
280 
281  Title : status_of_all_our_workers
282  Function: Returns an arrayref of arrayrefs [worker_pid, meadow_user, status, rc_name]
283  listing the workers that this Meadow can see.
284  Allowed statuses are "RUN", "PEND", "SSUSP", "UNKWN"
285 
286 =cut
287 
288 sub status_of_all_our_workers { # returns an arrayref
289  my ($self, $meadow_users_of_interest) = @_;
290 
291  die "Please use a derived method";
292 }
293 
294 
295 =head2 check_worker_is_alive_and_mine
296 
297  Title : check_worker_is_alive_and_mine
298  Function: Tells whether the given worker lives in the current Meadow
299  and belongs to the current user.
300 
301 =cut
302 
303 sub check_worker_is_alive_and_mine {
304  my ($self, $worker) = @_;
305 
306  die "Please use a derived method";
307 }
308 
309 
310 =head2 kill_worker
311 
312  Title : kill_worker
313  Function: Kill a worker.
314 
315 =cut
316 
317 sub kill_worker {
318  my ($self, $worker, $fast) = @_;
319 
320  die "Please use a derived method";
321 }
322 
323 
324 =head2 parse_report_source_line
325 
326  Title : parse_report_source_line
327  Function: Opens and parses a file / command-line to return the
328  resource-usage of some workers. Should return a hashref
329  where process_id is the key to a hashref composed of:
330  when_died
331  pending_sec
332  exception_status
333  cause_of_death
334  lifespan_sec
335  mem_megs
336  cpu_sec
337  exit_status
338  swap_megs
339 
340 =cut
341 
342 sub parse_report_source_line {
343  my ($self, $bacct_source_line) = @_;
344 
345  warn "\t".ref($self)." does not support resource usage logs\n";
346 
347  return;
348 }
349 
350 
351 =head2 get_report_entries_for_process_ids
352 
353  Title : get_report_entries_for_process_ids
354  Function: A higher-level method that gets process_ids as input and
355  returns a structure like parse_report_source_line.
356 
357 =cut
358 
359 sub get_report_entries_for_process_ids {
360  my ($self, @process_ids) = @_;
361 
362  warn "\t".ref($self)." does not support resource usage logs\n";
363 
364  return;
365 }
366 
367 =head2 get_report_entries_for_time_interval
368 
369  Title : get_report_entries_for_time_interval
370  Function: A higher-level method that gets a time interval as input and
371  returns a structure like parse_report_source_line.
372 
373 =cut
374 
375 
376 sub get_report_entries_for_time_interval {
377  my ($self, $from_time, $to_time, $username) = @_;
378 
379  warn "\t".ref($self)." does not support resource usage logs\n";
380 
381  return;
382 }
383 
384 
385 =head2 submit_workers_return_meadow_pids
386 
387  Title : submit_workers_return_meadow_pids
388  Function: Submit $required_worker_count workers with the command $worker_cmd and return the meadow-specific worker_pids
389 
390 =cut
391 
392 sub submit_workers_return_meadow_pids {
393  my ($self, $worker_cmd, $required_worker_count, $iteration, $rc_name, $rc_specific_submission_cmd_args, $submit_log_subdir) = @_;
394 
395  die "Please use a derived method";
396 }
397 
398 
399 =head2 run_on_host
400 
401  Title : run_on_host
402  Function: Runs an arbitrary commands on the given host. The host is expected to belong to the meadow and be reachable
403 
404 =cut
405 
406 sub run_on_host {
407  my ($self, $meadow_host, $meadow_user, $command) = @_;
408  # By default we trust the network, but this can be switched off in the config file
409  my @extra_args = $self->config_get('StrictHostKeyChecking') ? () : qw(-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null);
410  # Several hard-coded parameters here:
411  # - BatchMode=yes disables human interaction (no password asked)
412  # - ServerAliveInterval=30 tells ssh that the server must answer within 30 seconds
413  # - timeout 3m means that the whole command must complete within 3 minutes
414  return system('timeout', '3m', 'ssh', @extra_args, '-o', 'BatchMode=yes', '-o', 'ServerAliveInterval=30', sprintf('%s@%s', $meadow_user, $meadow_host), @$command);
415 }
416 
417 1;
public usage()