ensembl-hive  2.6
HiveGeneric_conf.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 SYNOPSIS
8 
9  # Example 1: specifying only the mandatory option:
10  init_pipeline.pl Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf -password <mypass>
11 
12  # Example 2: specifying the mandatory options as well as overriding some defaults:
13  init_pipeline.pl Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf -host <myhost> -dbname <mydbname> -password <mypass>
14 
15 =head1 DESCRIPTION
16 
17  Generic configuration module for all Hive pipelines with loader functionality.
18  All other Hive PipeConfig modules should inherit from this module and will probably need to redefine some or all of the following interface methods:
19 
20  * default_options: returns a hash of (possibly multilevel) defaults for the options on which depend the rest of the configuration
21 
22  * pipeline_create_commands: returns a list of strings that will be executed as system commands needed to create and set up the pipeline database
23 
24  * pipeline_wide_parameters: returns a hash of pipeline-wide parameter names and their values
25 
26  * resource_classes: returns a hash of resource class definitions
27 
28  * pipeline_analyses: returns a list of hash structures that define analysis objects bundled with definitions of corresponding jobs, rules and resources
29 
30  * beekeeper_extra_cmdline_options returns a string with command line options that you want to be passed to the beekeeper.pl
31 
32  When defining anything except the keys of default_options() a call to $self->o('myoption') can be used.
33  This call means "substitute this call for the value of 'myoption' at the time of configuring the pipeline".
34  All option names mentioned in $self->o() calls within the five interface methods above can be given non-default values from the command line.
35 
36  Please make sure you have studied the pipeline configuraton examples in Bio::EnsEMBL::Hive::PipeConfig before creating your own PipeConfig modules.
37 
38 =head1 LICENSE
39 
40  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
41  Copyright [2016-2024] EMBL-European Bioinformatics Institute
42 
43  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
44  You may obtain a copy of the License at
45 
46  http://www.apache.org/licenses/LICENSE-2.0
47 
48  Unless required by applicable law or agreed to in writing, software distributed under the License
49  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
50  See the License for the specific language governing permissions and limitations under the License.
51 
52 =head1 CONTACT
53 
54  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
55 
56 =cut
57 
58 
60 
61 use strict;
62 use warnings;
63 
64 use Exporter 'import';
65 our @EXPORT = qw(WHEN ELSE INPUT_PLUS);
66 
67 use Scalar::Util qw(looks_like_number);
68 
70 use Bio::EnsEMBL::Hive::Utils ('stringify', 'join_command_args', 'whoami');
83 
84 use base ('Bio::EnsEMBL::Hive::DependentOptions');
85 
86 
87 # ---------------------------[the following methods will be overridden by specific pipelines]-------------------------
88 
89 
90 =head2 default_options
91 
92  Description : Interface method that should return a hash of option_name->default_option_value pairs.
93  Please see existing PipeConfig modules for examples.
94 
95 =cut
96 
97 sub default_options {
98  my ($self) = @_;
99  return {
100  'hive_root_dir' => $ENV{'EHIVE_ROOT_DIR'}, # this value is set up automatically if this code is run by init_pipeline.pl
101 
102  'hive_driver' => 'mysql',
103  'host' => $ENV{'EHIVE_HOST'} || 'localhost', # BEWARE that 'localhost' for mysql driver usually means a UNIX socket, not a TCPIP socket!
104  # If you need to connect to TCPIP socket, set -host => '127.0.0.1' instead.
105 
106  'port' => $ENV{'EHIVE_PORT'}, # or remain undef, which means default for the driver
107  'user' => $ENV{'EHIVE_USER'} // $self->o('user'),
108  'password' => $ENV{'EHIVE_PASS'} // $self->o('password'), # people will have to make an effort NOT to insert it into config files like .bashrc etc
109  'dbowner' => $ENV{'EHIVE_USER'} || whoami() || $self->o('dbowner'), # although it is very unlikely that the current user has no name
110 
111  'hive_use_triggers' => 0, # there have been a few cases of big pipelines misbehaving with triggers on, let's keep the default off.
112  'hive_use_param_stack' => 0, # do not reconstruct the calling stack of parameters by default (yet)
113  'hive_auto_rebalance_semaphores' => 0, # do not attempt to rebalance semaphores periodically by default
114  'hive_default_max_retry_count' => 3, # default value for the max_retry_count parameter of each analysis
115  'hive_force_init' => 0, # setting it to 1 will drop the database prior to creation (use with care!)
116  'hive_no_init' => 0, # setting it to 1 will skip pipeline_create_commands (useful for topping up)
117  'hive_debug_init' => 0, # setting it to 1 will make init_pipeline.pl tell everything it's doing
118 
119  'pipeline_name' => $self->default_pipeline_name(),
120 
121  'pipeline_db' => {
122  -driver => $self->o('hive_driver'),
123  -host => $self->o('host'),
124  -port => $self->o('port'),
125  -user => $self->o('user'),
126  -pass => $self->o('password'),
127  -dbname => $self->o('dbowner').'_'.$self->o('pipeline_name'),
128  },
129  };
130 }
131 
132 
133 =head2 pipeline_create_commands
134 
135  Description : Interface method that should return a list of command lines to be run in order to create and set up the pipeline database.
136  Please see existing PipeConfig modules for examples.
137 
138 =cut
139 
140 sub pipeline_create_commands {
141  my $self = shift @_;
142 
143  my $pipeline_url = $self->pipeline_url();
144  my $second_pass = $pipeline_url!~ /^#:subst/;
145 
146  my $parsed_url = $second_pass && (Bio::EnsEMBL::Hive::Utils::URL::parse( $pipeline_url ) || die "Could not parse the '$pipeline_url' as the database URL");
147  my $driver = $second_pass ? $parsed_url->{'driver'} : '';
148  my $hive_force_init = $self->o('hive_force_init');
149 
150  # Will insert two keys: "hive_all_base_tables" and "hive_all_views"
151  my $hive_tables_sql = 'INSERT INTO hive_meta SELECT CONCAT("hive_all_", REPLACE(LOWER(TABLE_TYPE), " ", "_"), "s"), GROUP_CONCAT(TABLE_NAME) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = "%s" GROUP BY TABLE_TYPE';
152 
153  return [
154  $hive_force_init ? $self->db_cmd('DROP DATABASE IF EXISTS') : (),
155  $self->db_cmd('CREATE DATABASE'),
156 
157  # we got table definitions for all drivers:
158  $self->db_cmd().' <'.$self->o('hive_root_dir').'/sql/tables.'.$driver,
159 
160  # auto-sync'ing triggers are off by default:
161  $self->o('hive_use_triggers') ? ( $self->db_cmd().' <'.$self->o('hive_root_dir').'/sql/triggers.'.$driver ) : (),
162 
163  # FOREIGN KEY constraints cannot be defined in sqlite separately from table definitions, so they are off there:
164  ($driver ne 'sqlite') ? ( $self->db_cmd().' <'.$self->o('hive_root_dir').'/sql/foreign_keys.sql' ) : (),
165 
166  # we got procedure definitions for all drivers:
167  $self->db_cmd().' <'.$self->o('hive_root_dir').'/sql/procedures.'.$driver,
168 
169  # list of all tables and views (MySQL only)
170  ($driver eq 'mysql' ? ($self->db_cmd(sprintf($hive_tables_sql, $parsed_url->{'dbname'}))) : ()),
171 
172  # when the database was created
173  $self->db_cmd(q{INSERT INTO hive_meta (meta_key, meta_value) VALUES ('creation_timestamp', CURRENT_TIMESTAMP)}),
174  ];
175 }
176 
177 
178 =head2 pipeline_wide_parameters
179 
180  Description : Interface method that should return a hash of pipeline_wide_parameter_name->pipeline_wide_parameter_value pairs.
181  The value doesn't have to be a scalar, can be any Perl structure now (will be stringified and de-stringified automagically).
182  Please see existing PipeConfig modules for examples.
183 
184 =cut
185 
186 sub pipeline_wide_parameters {
187  my ($self) = @_;
188  return {
189  # 'variable1' => 'value1',
190  # 'variable2' => 'value2',
191  };
192 }
193 
194 
195 =head2 resource_classes
196 
197  Description : Interface method that should return a hash of resource_description_id->resource_description_hash.
198  Please see existing PipeConfig modules for examples.
199 
200 =cut
201 
202 sub resource_classes {
203  my ($self) = @_;
204  return {
205 ## No longer supported resource declaration syntax:
206 # 1 => { -desc => 'default', 'LSF' => '' },
207 # 2 => { -desc => 'urgent', 'LSF' => '-q production' },
208 ## Currently supported resource declaration syntax:
209  'default' => { 'LSF' => '' },
210  'urgent' => { 'LSF' => '-q production' },
211  };
212 }
213 
214 
215 =head2 pipeline_analyses
216 
217  Description : Interface method that should return a list of hashes that define analysis bundled with corresponding jobs, dataflow and analysis_ctrl rules and resource_id.
218  Please see existing PipeConfig modules for examples.
219 
220 =cut
221 
222 sub pipeline_analyses {
223  my ($self) = @_;
224  return [
225  ];
226 }
227 
228 
229 =head2 beekeeper_extra_cmdline_options
230 
231  Description : Interface method that should return a string with extra parameters that you want to be passed to beekeeper.pl
232 
233 =cut
234 
235 sub beekeeper_extra_cmdline_options {
236  my ($self) = @_;
237 
238  return '';
239 }
240 
241 
242 # ---------------------------------[now comes the interfacing stuff - feel free to call but not to modify]--------------------
243 
244 
245 sub hive_meta_table {
246  my ($self) = @_;
247 
248  return {
249  'hive_sql_schema_version' => Bio::EnsEMBL::Hive::DBSQL::SqlSchemaAdaptor->get_code_sql_schema_version(),
250  'hive_pipeline_name' => $self->o('pipeline_name'),
251  'hive_use_param_stack' => $self->o('hive_use_param_stack'),
252  'hive_auto_rebalance_semaphores' => $self->o('hive_auto_rebalance_semaphores'),
253  'hive_default_max_retry_count' => $self->o('hive_default_max_retry_count'),
254  };
255 }
256 
257 sub pre_options {
258  my $self = shift @_;
259 
260  return {
261  'help!' => '',
262  'pipeline_url' => '',
263  'pipeline_name' => '',
264  };
265 }
266 
267 
268 =head2 dbconn_2_url
269 
270  Description : A convenience method used to stringify a connection-parameters hash into a 'pipeline_url' that beekeeper.pl will undestand
271 
272 =cut
273 
274 sub dbconn_2_url {
275  my ($self, $db_conn, $with_db) = @_;
276 
277  $with_db = 1 unless(defined($with_db));
278 
279  my $driver = $self->o($db_conn, '-driver');
280  my $port = $self->o($db_conn,'-port');
281 
282  return ( ($driver eq 'sqlite')
283  ? $driver.':
284  : $driver.'://'.$self->o($db_conn,'-user').':'.$self->o($db_conn,'-pass').'@'.$self->o($db_conn,'-host').($port ? ':'.$port : '').'/'
285  ) . ($with_db ? $self->o($db_conn,'-dbname') : '');
286 }
287 
288 
289 sub pipeline_url {
290  my $self = shift @_;
291 
292  return $self->root()->{'pipeline_url'} || $self->dbconn_2_url('pipeline_db', 1); # used to force vivification of the whole 'pipeline_db' structure (used in run() )
293 }
294 
295 
296 =head2 db_cmd
297 
298  Description : Returns a db_cmd.pl-based command line that should execute by any supported driver (mysql/pgsql/sqlite)
299 
300 =cut
301 
302 sub db_cmd {
303  my ($self, $sql_command, $db_url) = @_;
304 
305  $db_url //= $self->pipeline_url();
306  my $db_cmd_path = $self->o('hive_root_dir').'/scripts/db_cmd.pl';
307  $sql_command =~ s/'/'\\''/g if $sql_command;
308  return "$db_cmd_path -url '$db_url'".($sql_command ? " -sql '$sql_command'" : '');
309 }
310 
311 
312 sub print_debug {
313  my $self = shift;
314  print @_ if $self->o('hive_debug_init');
315 }
316 
317 
318 sub process_pipeline_name {
319  my ($self, $ppn) = @_;
320 
321  $ppn=~s/([[:lower:]])([[:upper:]])/${1}_${2}/g; # CamelCase into Camel_Case
322  $ppn=~s/[\s\/]/_/g; # remove all spaces and other annoying characters
323  $ppn = lc($ppn);
324 
325  return $ppn;
326 }
327 
328 
329 sub default_pipeline_name {
330  my $self = shift @_;
331 
332  my $dpn = ref($self); # get the original class name
333  $dpn=~s/^.*:://; # trim the leading classpath prefix
334  $dpn=~s/_conf$//; # trim the optional _conf from the end
335 
336  return $dpn;
337 }
338 
339 
340 =head2 process_options
341 
342  Description : The method that does all the parameter parsing magic.
343  It is two-pass through the interface methods: first pass collects the options, second is intelligent substitution.
344 
345  Caller : init_pipeline.pl or any other script that will drive this module.
346 
347  Note : You can override parsing the command line bit by providing a hash as the argument to this method.
348  This hash should contain definitions of all the parameters you would otherwise be providing from the command line.
349  Useful if you are creating batches of hive pipelines using a script.
350 
351 =cut
352 
353 sub process_options {
354  my ($self, $include_pcc_use_case) = @_;
355 
356  # pre-patch definitely_used_options:
357  $self->{'_extra_options'} = $self->load_cmdline_options( $self->pre_options() );
358  $self->root()->{'pipeline_url'} = $self->{'_extra_options'}{'pipeline_url'};
359 
360  my @use_cases = ( 'pipeline_wide_parameters', 'resource_classes', 'pipeline_analyses', 'beekeeper_extra_cmdline_options', 'hive_meta_table', 'print_debug' );
361  if($include_pcc_use_case) {
362  unshift @use_cases, 'overridable_pipeline_create_commands';
363  push @use_cases, 'useful_commands_legend';
364  }
365  $self->use_cases( \@use_cases );
366 
367  $self->SUPER::process_options();
368 
369  # post-processing:
370  $self->root()->{'pipeline_name'} = $self->process_pipeline_name( $self->root()->{'pipeline_name'} );
371  $self->root()->{'pipeline_db'}{'-dbname'} &&= $self->process_pipeline_name( $self->root()->{'pipeline_db'}{'-dbname'} ); # may be used to construct $self->pipeline_url()
372 }
373 
374 
375 sub overridable_pipeline_create_commands {
376  my $self = shift @_;
377  my $pipeline_create_commands = $self->pipeline_create_commands();
378 
379  return $self->o('hive_no_init') ? [] : $pipeline_create_commands;
380 }
381 
382 
383 sub is_analysis_topup {
384  my $self = shift @_;
385 
386  return $self->o('hive_no_init');
387 }
388 
389 
390 sub run_pipeline_create_commands {
391  my $self = shift @_;
392 
393  foreach my $cmd (@{$self->overridable_pipeline_create_commands}) {
394  # We allow commands to be given as an arrayref, but we join the
395  # array elements anyway
396  (my $dummy,$cmd) = join_command_args($cmd);
397  $self->print_debug( "$cmd\n" );
398  if(my $retval = system($cmd)) {
399  die "Return value = $retval, possibly an error running $cmd\n";
400  }
401  }
402  $self->print_debug( "\n" );
403 }
404 
405 
406 =head2 add_objects_from_config
407 
408  Description : The method that uses the Hive/EnsEMBL API to actually create all the analyses, jobs, dataflow and control rules and resource descriptions.
409 
410  Caller : init_pipeline.pl or any other script that will drive this module.
411 
412 =cut
413 
414 sub add_objects_from_config {
415  my $self = shift @_;
416  my $pipeline = shift @_;
417 
418  $self->print_debug( "Adding hive_meta table entries ...\n" );
419  my $new_meta_entries = $self->hive_meta_table();
420  while( my ($meta_key, $meta_value) = each %$new_meta_entries ) {
421  $pipeline->add_new_or_update( 'MetaParameters', $self->o('hive_debug_init'),
422  'meta_key' => $meta_key,
423  'meta_value' => $meta_value,
424  );
425  }
426  $self->print_debug( "Done.\n\n" );
427 
428  $self->print_debug( "Adding pipeline-wide parameters ...\n" );
429  my $new_pwp_entries = $self->pipeline_wide_parameters();
430  while( my ($param_name, $param_value) = each %$new_pwp_entries ) {
431  $pipeline->add_new_or_update( 'PipelineWideParameters', $self->o('hive_debug_init'),
432  'param_name' => $param_name,
433  'param_value' => stringify($param_value),
434  );
435  }
436  $self->print_debug( "Done.\n\n" );
437 
438  $self->print_debug( "Adding Resources ...\n" );
439  my $resource_classes_hash = $self->resource_classes;
440  unless( exists $resource_classes_hash->{'default'} ) {
441  warn "\tNB:'default' resource class is not in the database (did you forget to inherit from SUPER::resource_classes ?) - creating it for you\n";
442  $resource_classes_hash->{'default'} = {};
443  }
444  my @resource_classes_order = sort { ($b eq 'default') or -($a eq 'default') or ($a cmp $b) } keys %$resource_classes_hash; # put 'default' to the front
445  my %cached_resource_classes = map {$_->name => $_} $pipeline->collection_of('ResourceClass')->list();
446  foreach my $rc_name (@resource_classes_order) {
447  if($rc_name=~/^\d+$/) {
448  die "-rc_id syntax is no longer supported, please use the new resource notation (-rc_name)";
449  }
450 
451  my ($resource_class) = $pipeline->add_new_or_update( 'ResourceClass', # NB: add_new_or_update returns a list
452  'name' => $rc_name,
453  );
454  $cached_resource_classes{$rc_name} = $resource_class;
455 
456  while( my($meadow_type, $resource_param_list) = each %{ $resource_classes_hash->{$rc_name} } ) {
457  $resource_param_list = [ $resource_param_list ] unless(ref($resource_param_list)); # expecting either a scalar or a 2-element array
458 
459  my ($resource_description) = $pipeline->add_new_or_update( 'ResourceDescription', $self->o('hive_debug_init'), # NB: add_new_or_update returns a list
460  'resource_class' => $resource_class,
461  'meadow_type' => $meadow_type,
462  'submission_cmd_args' => $resource_param_list->[0],
463  'worker_cmd_args' => $resource_param_list->[1],
464  );
465 
466  }
467  }
468  $self->print_debug( "Done.\n\n" );
469 
470 
471  my $amh = Bio::EnsEMBL::Hive::Valley->new()->available_meadow_hash();
472 
473  my %seen_logic_name = ();
474  my %analyses_by_logic_name = map {$_->logic_name => $_} $pipeline->collection_of('Analysis')->list();
475 
476  $self->print_debug( "Adding Analyses ...\n" );
477  foreach my $aha (@{$self->pipeline_analyses}) {
478  my %aha_copy = %$aha;
479  my ($logic_name, $module, $parameters_hash, $comment, $tags, $input_ids, $blocked, $batch_size, $hive_capacity, $failed_job_tolerance,
480  $max_retry_count, $can_be_empty, $rc_id, $rc_name, $priority, $meadow_type, $analysis_capacity, $language, $wait_for, $flow_into)
481  = delete @aha_copy{qw(-logic_name -module -parameters -comment -tags -input_ids -blocked -batch_size -hive_capacity -failed_job_tolerance
482  -max_retry_count -can_be_empty -rc_id -rc_name -priority -meadow_type -analysis_capacity -language -wait_for -flow_into)}; # slicing a hash reference
483 
484  my @unparsed_attribs = keys %aha_copy;
485  if(@unparsed_attribs) {
486  die "Could not parse the following analysis attributes: ".join(', ',@unparsed_attribs);
487  }
488 
489  if( not $logic_name ) {
490  die "'-logic_name' must be defined in every analysis";
491  } elsif( $logic_name =~ /[+\-\%\.,]/ ) {
492  die "Characters + - % . , are no longer allowed to be a part of an Analysis name. Please rename Analysis '$logic_name' and try again.\n";
493  } elsif( looks_like_number($logic_name) ) {
494  die "Numeric Analysis names are not allowed because they may clash with dbIDs. Please rename Analysis '$logic_name' and try again.\n";
495  }
496 
497  if($seen_logic_name{$logic_name}++) {
498  die "an entry with -logic_name '$logic_name' appears at least twice in the same configuration file, probably a typo";
499  }
500 
501  if($rc_id) {
502  die "(-rc_id => $rc_id) syntax is deprecated, please use (-rc_name => 'your_resource_class_name')";
503  }
504 
505  my $analysis = $analyses_by_logic_name{$logic_name}; # the analysis with this logic_name may have already been stored in the db
506  my $stats;
507  if( $analysis ) {
508 
509  warn "Skipping creation of already existing analysis '$logic_name'.\n";
510  next;
511 
512  } else {
513 
514  $rc_name ||= 'default';
515  my $resource_class = $cached_resource_classes{$rc_name}
516  or die "Could not find local resource with name '$rc_name', please check that resource_classes() method of your PipeConfig either contains or inherits it from the parent class";
517 
518  if ($meadow_type and not exists $amh->{$meadow_type}) {
519  warn "The meadow '$meadow_type' is currently not registered (analysis '$logic_name')\n";
520  }
521 
522  $parameters_hash ||= {}; # in case nothing was given
523  die "'-parameters' has to be a hash" unless(ref($parameters_hash) eq 'HASH');
524 
525  ($analysis) = $pipeline->add_new_or_update( 'Analysis', $self->o('hive_debug_init'), # NB: add_new_or_update returns a list
526  'logic_name' => $logic_name,
527  'module' => $module,
528  'language' => $language,
529  'parameters' => $parameters_hash,
530  'comment' => $comment,
531  'tags' => ( (ref($tags) eq 'ARRAY') ? join(',', @$tags) : $tags ),
532  'resource_class' => $resource_class,
533  'failed_job_tolerance' => $failed_job_tolerance,
534  'max_retry_count' => $max_retry_count,
535  'can_be_empty' => $can_be_empty,
536  'priority' => $priority,
537  'meadow_type' => $meadow_type,
538  'analysis_capacity' => $analysis_capacity,
539  'hive_capacity' => $hive_capacity,
540  'batch_size' => $batch_size,
541  );
542  $analysis->get_compiled_module_name(); # check if it compiles and is named correctly
543 
544  ($stats) = $pipeline->add_new_or_update( 'AnalysisStats', $self->o('hive_debug_init'), # NB: add_new_or_update returns a list
545  'analysis' => $analysis,
546  'status' => $blocked ? 'BLOCKED' : 'EMPTY', # be careful, as this "soft" way of blocking may be accidentally unblocked by deep sync
547  'total_job_count' => 0,
548  'semaphored_job_count' => 0,
549  'ready_job_count' => 0,
550  'done_job_count' => 0,
551  'failed_job_count' => 0,
552  'num_running_workers' => 0,
553  'sync_lock' => 0,
554  );
555  }
556 
557  # Keep a link to the analysis object to speed up the creation of control and dataflow rules
558  $analyses_by_logic_name{$logic_name} = $analysis;
559 
560  # now create the corresponding jobs (if there are any):
561  if($input_ids) {
562  push @{ $analysis->jobs_collection }, map { Bio::EnsEMBL::Hive::AnalysisJob->new(
563  'prev_job' => undef, # these jobs are created by the initialization script, not by another job
564  'analysis' => $analysis,
565  'input_id' => $_, # input_ids are now centrally stringified in the AnalysisJob itself
566  ) } @$input_ids;
567 
568  unless( $pipeline->hive_use_triggers() ) {
569  $stats->recalculate_from_job_counts( { 'READY' => scalar(@$input_ids) } );
570  }
571  }
572  }
573  $self->print_debug( "Done.\n\n" );
574 
575  $self->print_debug( "Adding Control and Dataflow Rules ...\n" );
576  foreach my $aha (@{$self->pipeline_analyses}) {
577 
578  my ($logic_name, $wait_for, $flow_into)
579  = @{$aha}{qw(-logic_name -wait_for -flow_into)}; # slicing a hash reference
580 
581  my $analysis = $analyses_by_logic_name{$logic_name};
582 
583  if($wait_for) {
584  Bio::EnsEMBL::Hive::Utils::PCL::parse_wait_for($pipeline, $analysis, $wait_for, $self->o('hive_debug_init'));
585  }
586 
587  if($flow_into) {
588  Bio::EnsEMBL::Hive::Utils::PCL::parse_flow_into($pipeline, $analysis, $flow_into, $self->o('hive_debug_init'));
589  }
590 
591  }
592  $self->print_debug( "Done.\n\n" );
593 
594  # Block the analyses that should be blocked
595  $self->print_debug( "Blocking the analyses that should be ...\n" );
596  foreach my $stats ($pipeline->collection_of('AnalysisStats')->list()) {
597  $stats->check_blocking_control_rules('no_die');
598  $stats->determine_status();
599  }
600  $self->print_debug( "Done.\n\n" );
601 }
602 
603 
604 sub useful_commands_legend {
605  my $self = shift @_;
606 
607  my $pipeline_url = $self->pipeline_url();
608  unless ($pipeline_url =~ /^[\'\"]/) {
609  $pipeline_url = '"' . $pipeline_url . '"';
610  }
611  my $pipeline_name = $self->o('pipeline_name');
612  my $extra_cmdline = $self->beekeeper_extra_cmdline_options();
613 
614  my @output_lines = (
615  '','',
616  '# ' . '-' x 22 . '[Useful commands]' . '-' x 22,
617  '',
618  " # It is convenient to store the pipeline url in a variable:",
619  "\texport EHIVE_URL=$pipeline_url\t\t\t# bash version",
620  "(OR)",
621  "\tsetenv EHIVE_URL $pipeline_url\t\t\t# [t]csh version",
622  '',
623  " # Add a new job to the pipeline (usually done once before running, but pipeline can be \"topped-up\" at any time) :",
624  "\tseed_pipeline.pl -url $pipeline_url -logic_name <analysis_name> -input_id <param_hash>",
625  '',
626  " # At any moment during or after execution you can request a pipeline diagram in an image file (desired format is set via extension) :",
627  "\tgenerate_graph.pl -url $pipeline_url -out $pipeline_name.png",
628  '',
629  " # Synchronize the Hive (to display fresh statistics about all analyses):",
630  "\tbeekeeper.pl -url $pipeline_url -sync",
631  '',
632  " # Depending on the Meadow the pipeline is running on, you may be able to collect actual resource usage statistics :",
633  "\tload_resource_usage.pl -url $pipeline_url",
634  '',
635  " # After having run load_resource_usage.pl, you can request a resource usage timeline in an image file (desired format is set via extension) :",
636  "\tgenerate_timeline.pl -url $pipeline_url -out timeline_$pipeline_name.png",
637  '',
638  " # Peek into your pipeline database with a database client (useful to have open while the pipeline is running) :",
639  "\tdb_cmd.pl -url $pipeline_url",
640  '',
641  " # Run the pipeline (can be interrupted and restarted) :",
642  "\tbeekeeper.pl -url $pipeline_url $extra_cmdline -loop\t\t# run in looped automatic mode (a scheduling step performed every minute)",
643  "(OR)",
644  "\tbeekeeper.pl -url $pipeline_url $extra_cmdline -run \t\t# run one scheduling step of the pipeline and exit (useful for debugging/learning)",
645  "(OR)",
646  "\trunWorker.pl -url $pipeline_url $extra_cmdline \t\t# run exactly one Worker locally (useful for debugging/learning)",
647  '',
648  );
649 
650  return join("\n", @output_lines);
651 }
652 
653 1;
654 
Bio::EnsEMBL::Hive::Utils
Definition: Collection.pm:4
Bio::EnsEMBL::Hive::ResourceDescription
Definition: ResourceDescription.pm:9
Bio::EnsEMBL::Hive::Utils::URL
Definition: URL.pm:11
Bio::EnsEMBL::Hive::Utils::URL::parse
public parse()
EnsEMBL
Definition: Filter.pm:1
map
public map()
Bio::EnsEMBL::Hive::Utils::PCL::parse_flow_into
public parse_flow_into()
Bio::EnsEMBL::Hive::ResourceClass
Definition: ResourceClass.pm:9
Bio::EnsEMBL::Hive::Valley::new
public new()
Bio::EnsEMBL::Hive::Storable::new
public Bio::EnsEMBL::Hive::Storable new()
Bio::EnsEMBL::Hive::AnalysisStats
Definition: AnalysisStats.pm:12
Bio::EnsEMBL::Hive::DataflowRule
Definition: DataflowRule.pm:20
Bio::EnsEMBL::Hive::Utils::whoami
public whoami()
Bio::EnsEMBL::Hive::PipeConfig
Definition: EnsemblGeneric_conf.pm:5
Bio::EnsEMBL::Hive
Definition: DockerSwarm.pm:5
Bio::EnsEMBL::Hive::Utils::PCL
Definition: PCL.pm:12
Bio::EnsEMBL::Hive::DBSQL::SqlSchemaAdaptor
Definition: SqlSchemaAdaptor.pm:17
Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf
Definition: HiveGeneric_conf.pm:54
Bio::EnsEMBL::Hive::Utils::PCL::parse_wait_for
public parse_wait_for()
run
public run()
Bio::EnsEMBL::Hive::AnalysisCtrlRule
Definition: AnalysisCtrlRule.pm:17
Bio::EnsEMBL::Hive
Definition: Hive.pm:38
Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor
Definition: AnalysisJobAdaptor.pm:22
Bio::EnsEMBL::Hive::AnalysisJob
Definition: AnalysisJob.pm:13
Bio::EnsEMBL::Hive::Valley
Definition: Valley.pm:16
Bio::EnsEMBL::Hive::Analysis
Definition: Analysis.pm:18