6 # Finding out own path in order to reference own components (including own modules):
10 $ENV{
'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
11 unshift @INC, $ENV{
'EHIVE_ROOT_DIR'}.
'/modules';
14 use Getopt::Long qw(:config no_auto_abbrev);
28 my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc); # Connection parameters
29 my ($preregistered, $resource_class_id, $resource_class_name, $analyses_pattern, $analysis_id, $logic_name, $job_id, $force, $beekeeper_id); # Task specification parameters
30 my ($job_limit, $life_span, $no_cleanup, $no_write, $worker_cur_dir, $hive_log_dir, $worker_log_dir, $worker_base_temp_dir, $retry_throwing_jobs, $can_respecialize, # Worker control parameters
31 $worker_delay_startup_seconds, $worker_crash_on_startup_prob, $config_files);
32 my ($help, $report_versions, $debug);
37 $|=1; # make STDOUT unbuffered (STDERR is unbuffered anyway)
41 # Connection parameters:
43 'reg_conf|regfile|reg_file=s' => \$reg_conf,
44 'reg_type=s' => \$reg_type,
45 'reg_alias|regname|reg_name=s' => \$reg_alias,
46 'nosqlvc' => \$nosqlvc, #
using "nosqlvc" instead of
"sqlvc!" for consistency with scripts where it is a propagated option
49 'config_file=s@' => $config_files,
51 # Task specification parameters:
52 'preregistered!' => \$preregistered,
53 'rc_id=i' => \$resource_class_id,
54 'rc_name=s' => \$resource_class_name,
55 'analyses_pattern=s' => \$analyses_pattern,
56 'analysis_id=i' => \$analysis_id,
57 'logic_name=s' => \$logic_name,
58 'job_id=i' => \$job_id,
60 'beekeeper_id=i' => \$beekeeper_id,
62 # Worker control parameters:
63 'job_limit=i' => \$job_limit,
64 'life_span|lifespan=i' => \$life_span,
65 'no_cleanup' => \$no_cleanup,
66 'no_write' => \$no_write,
67 'worker_cur_dir|cwd=s' => \$worker_cur_dir,
68 'hive_log_dir|hive_output_dir=s' => \$hive_log_dir, # keep compatibility with the old name
69 'worker_log_dir|worker_output_dir=s' => \$worker_log_dir, # will take precedence over hive_log_dir
if set
70 'worker_base_temp_dir=s' => \$worker_base_temp_dir,
71 'retry_throwing_jobs!' => \$retry_throwing_jobs,
72 'can_respecialize|can_respecialise!' => \$can_respecialize,
73 'worker_delay_startup_seconds=i' => \$worker_delay_startup_seconds,
74 'worker_crash_on_startup_prob=f' => \$worker_crash_on_startup_prob,
78 'v|version|versions' => \$report_versions,
80 ) or die "Error in command line arguments\n";
83 die
"ERROR: There are invalid arguments on the command-line: ". join(
" ", @ARGV).
"\n";
87 pod2usage({-exitvalue => 0, -verbose => 2});
90 if($report_versions) {
95 chdir $worker_cur_dir
if $worker_cur_dir; # Allows
using relative paths
for Sqlite URLs, registry files etc
99 if($url or $reg_alias) {
103 -reg_conf => $reg_conf,
104 -reg_type => $reg_type,
105 -reg_alias => $reg_alias,
106 -no_sql_schema_version_check => $nosqlvc,
108 $pipeline->hive_dba()->dbc->requires_write_access();
111 die
"\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
114 unless($pipeline->hive_dba) {
115 die
"ERROR : no database connection, the pipeline could not be accessed\n\n";
119 warn
"-logic_name is now deprecated, please use -analyses_pattern that extends the functionality of -logic_name and -analysis_id .\n";
120 $analyses_pattern = $logic_name;
121 } elsif ( $analysis_id ) {
122 warn
"-analysis_id is now deprecated, please use -analyses_pattern that extends the functionality of -analysis_id and -logic_name .\n";
123 $analyses_pattern = $analysis_id;
126 my %specialisation_options = (
127 preregistered => $preregistered,
128 resource_class_id => $resource_class_id,
129 resource_class_name => $resource_class_name,
130 can_respecialize => $can_respecialize,
131 analyses_pattern => $analyses_pattern,
134 beekeeper_id => $beekeeper_id,
137 job_limit => $job_limit,
138 life_span => $life_span,
139 retry_throwing_jobs => $retry_throwing_jobs,
140 worker_delay_startup_seconds => $worker_delay_startup_seconds,
141 worker_crash_on_startup_prob => $worker_crash_on_startup_prob,
143 my %execution_options = (
144 config_files => $config_files,
145 no_cleanup => $no_cleanup,
146 no_write => $no_write,
147 worker_base_temp_dir=> $worker_base_temp_dir,
148 worker_log_dir => $worker_log_dir,
149 hive_log_dir => $hive_log_dir,
163 runWorker.pl [options]
167 runWorker.pl is an eHive component script that does the work of a single Worker.
168 It specialises in one of the analyses and starts executing Jobs of that Analysis one-by-one or batch-by-batch.
170 Most of the functionality of the eHive is accessible via beekeeper.pl script,
171 but feel free to
run the runWorker.pl
if you think you need a direct access to the running Jobs.
173 =head1 USAGE EXAMPLES
175 # Run one local Worker process in ehive_dbname and let the system pick up the Analysis
176 runWorker.pl -url mysql:
178 # Run one local Worker process in ehive_dbname and let the system pick up the Analysis from the given resource_class
179 runWorker.pl -url mysql:
181 # Run one local Worker process in ehive_dbname and constrain its initial specialisation within a subset of analyses
182 runWorker.pl -url mysql:
184 # Run one local Worker process in ehive_dbname and allow it to respecialize within a subset of Analyses
185 runWorker.pl -url mysql:
187 # Run a specific Job in a local Worker process:
188 runWorker.pl -url mysql:
192 =head2 Connection parameters:
196 =item --reg_conf <path>
198 path to a Registry configuration file
200 =item --reg_alias <string>
202 species/alias name
for the eHive DBAdaptor
204 =item --reg_type <string>
206 type of the registry entry (
"hive",
"core",
"compara", etc - defaults to
"hive")
208 =item --url <url string>
210 URL defining where database is located
214 "No SQL Version Check" - set
if you want to force working with a database created by a potentially schema-incompatible API
218 =head2 Configs overriding
222 =item --config_file <string>
224 JSON file (with absolute path) to
override the
default configurations (could be multiple)
228 =head2 Task specification parameters:
236 =item --rc_name <string>
240 =item --analyses_pattern <string>
242 restrict the specialisation of the Worker to the specified subset of Analyses
244 =item --analysis_id <id>
246 run a Worker and have it specialise to an Analysis with
this analysis_id
250 run a specific Job defined by its database
id
254 set
if you want to force running a Worker over a BLOCKED Analysis or to
run a specific DONE/SEMAPHORED job_id
258 =head2 Worker control parameters:
262 =item --job_limit <num>
264 number of Jobs to
run before the Worker can die naturally
266 =item --life_span <num>
268 number of minutes
this Worker is allowed to
run
272 don
't perform temp directory cleanup when the Worker exits
276 don't write_output or auto_dataflow input_job
278 =item --worker_base_temp_dir <path>
280 The base directory that
this worker will use
for temporary operations. This overrides the
default set
281 in the JSON config file and in the code (/tmp)
283 =item --hive_log_dir <path>
285 directory where stdout/stderr of the whole eHive of workers is redirected
287 =item --worker_log_dir <path>
289 directory where stdout/stderr of
this particular Worker is redirected
291 =item --retry_throwing_jobs
293 By
default, Jobs are allowed to fail a few times (up to the Analysis
' max_retry_count parameter) until the systems "gives up" and considers them as FAILED.
294 retry Jobs if the Job dies knowingly (e.g. due to encountering a die statement in the Runnable)
296 =item --can_respecialize
298 allow this Worker to re-specialise into another Analysis (within resource_class) after it has exhausted all Jobs of the current one
300 =item --worker_delay_startup_seconds <number>
302 number of seconds each Worker has to wait before first talking to the database (0 by default, useful for debugging)
304 =item --worker_crash_on_startup_prob <float>
306 probability of each Worker failing at startup (0 by default, useful for debugging)
310 =head2 Other options:
320 report both eHive code version and eHive database schema version
322 =item --debug <level>
324 turn on debug messages at <level>
330 See the NOTICE file distributed with this work for additional information
331 regarding copyright ownership.
333 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
334 You may obtain a copy of the License at
336 http://www.apache.org/licenses/LICENSE-2.0
338 Unless required by applicable law or agreed to in writing, software distributed under the License
339 is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
340 See the License for the specific language governing permissions and limitations under the License.
344 Please subscribe to the eHive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss eHive-related questions or to be notified of our updates