6 # Finding out own path in order to reference own components (including own modules):
10 $ENV{
'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
11 unshift @INC, $ENV{
'EHIVE_ROOT_DIR'}.
'/modules';
14 use Getopt::Long qw(:config pass_through no_auto_abbrev);
40 # connection parameters
41 'reg_conf|regfile|reg_file=s' => \$reg_conf,
44 'input_id=s' => \$input_id,
46 'job_id=i' => \$job_id,
49 'flow_into|flow=s' => \$flow_into,
52 'no_write' => \$no_write,
53 'no_cleanup' => \$no_cleanup,
56 # other commands/options
57 'language=s' => \$language,
62 pod2usage({-exitvalue => 0, -verbose => 2});
72 if ($input_id && ($job_id || $url)) {
73 die
"Error: -input_id cannot be given at the same time as -job_id or -url\n";
75 } elsif ($job_id && $url) {
77 unless($pipeline->hive_dba) {
78 die
"ERROR : no database connection\n\n";
80 my $job = $pipeline->hive_dba->get_AnalysisJobAdaptor->fetch_by_dbID($job_id)
81 || die
"ERROR: No Job with jo_id=$job_id\n";
82 $job->load_parameters();
83 my ($param_hash, $param_list) = parse_cmdline_options();
85 die
"ERROR: There are invalid arguments on the command-line: ". join(
" ", @$param_list).
"\n";
87 $input_id = stringify( {%{$job->{
'_unsubstituted_param_hash'}}, %$param_hash} );
88 $module_or_file = $job->analysis->module;
89 my $status = $job->status;
90 warn
"\nTaken parameters from job_id $job_id (status $status) @ $url\n";
91 warn
"Will now disconnect from it. Be aware that the original Job will NOT be updated with the outcome of this standalone. Use runWorker.pl if you want to register your run.\n";
92 $pipeline->hive_dba->dbc->disconnect_if_idle;
94 } elsif (!$input_id) {
95 $module_or_file = shift @ARGV;
96 my ($param_hash, $param_list) = parse_cmdline_options();
98 die
"ERROR: There are invalid arguments on the command-line: ". join(
" ", @$param_list).
"\n";
100 $input_id = stringify($param_hash);
102 $module_or_file = shift @ARGV;
104 die
"ERROR: There are invalid arguments on the command-line: ". join(
" ", @ARGV).
"\n";
108 if (!$module_or_file) {
109 die
"ERROR: need to provide a module name to run\n";
112 warn
"\nRunning '$module_or_file' with input_id='$input_id' :\n";
115 no_write => $no_write,
116 no_cleanup => $no_cleanup,
120 exit(1) unless $job_successful;
134 standaloneJob.pl is an eHive component script that
140 takes in a Runnable module,
144 creates a standalone Job outside an eHive database by initialising parameters from command line arguments
148 and runs that Job outside of any eHive database.
150 I<WARNING> the Runnable code may still access databases provided
151 as arguments and even harm them!
155 can optionally dataflow into tables fully defined by URLs
159 Naturally, only certain Runnable modules can be
run using this script, and some database-related functionality will be lost.
161 There are several ways of initialising the Job parameters:
167 C<Module::Name -input_id>. The simplest one: just provide a stringified hash
171 C<Module::Name -param1 value1 -param2 value2 (...)>. Enumerate all the arguments on the command-line. ARRAY- and HASH-
172 arguments can be passed+parsed too!
176 C<-url $ehive_url job_id XXX>. The reference to an existing Job from which the parameters will be pulled. It is
177 a convenient way of gathering all the parameters (the Job's input_id, the Job's accu, the Analysis parameters
178 and the pipeline-wide parameters). Further parameters can be added with C<-param1 value1 -param2 value2 (...)>
179 and they take priority over the existing Job's parameters. The Runnable is also found in the database.
181 <NOTE> the standaloneJob will *not* interact any further with this eHive database. There won't be any updates
182 to the C<job>, C<worker>, C<log_message> etc tables.
186 =head1 USAGE EXAMPLES
188 # Run a Job with default parameters, specify module by its package name:
189 standaloneJob.pl Bio::EnsEMBL::Hive::RunnableDB::FailureTest
191 # Run the same Job with default parameters, but specify module by its relative filename:
192 standaloneJob.pl RunnableDB/FailureTest.pm
194 # Run a Job and re-define some of the default parameters:
195 standaloneJob.pl Bio::EnsEMBL::Hive::RunnableDB::FailureTest -time_RUN=2 -time_WRITE_OUTPUT=3 -state=WRITE_OUTPUT -value=2
199 # Run a Job and re-define its "db_conn" parameter to allow it to perform some database-related operations:
200 standaloneJob.pl RunnableDB/SqlCmd.pm -db_conn mysql:
202 # Run a Job initialised from the parameters of an existing Job topped-up with extra ones.
203 # In this particular example the Runnable needs a "compara_db" parameter which defaults to the eHive database.
204 # Since there is no eHive database here we need to define -compara_db on the command-line
205 standaloneJob.pl -url mysql:
207 # Run a Job with given parameters, but skip the write_output() step:
208 standaloneJob.pl Bio::EnsEMBL::Hive::RunnableDB::FailureTest -no_write -time_RUN=2 -time_WRITE_OUTPUT=3 -state=WRITE_OUTPUT -value=2
210 # Run a Job and re-direct its dataflow into tables:
212 -flow_into
"{ 2 => ['mysql://ensadmin:xxxxxxx@127.0.0.1:2914/lg4_triggers/foo', 'mysql://ensadmin:xxxxxxx@127.0.0.1:2914/lg4_triggers/bar'] }"
214 # Run a Compara Job that needs a connection to Compara database:
215 standaloneJob.pl Bio::EnsEMBL::Compara::RunnableDB::ObjectFactory -compara_db
'mysql://ensadmin:xxxxxxx@127.0.0.1:2911/sf5_ensembl_compara_master' \
216 -adaptor_name MethodLinkSpeciesSetAdaptor -adaptor_method fetch_all_by_method_link_type -method_param_list
"[ 'ENSEMBL_ORTHOLOGUES' ]" \
217 -column_names2getters
"{ 'name' => 'name', 'mlss_id' => 'dbID' }" -flow_into
"{ 2 => 'mysql://ensadmin:xxxxxxx@127.0.0.1:2914/lg4_triggers/baz' }"
219 # Create a new Job in a database using automatic dataflow from a database-less Dummy Job:
221 -flow_into
"{ 1 => 'mysql://ensadmin:xxxxxxx@127.0.0.1/lg4_long_mult/analysis?logic_name=start' }"
223 # Produce a Semaphore group of Jobs from a database-less DigitFactory Job:
225 -flow_into
"{ '2->A' => 'mysql://ensadmin:${ENSADMIN_PSW}@127.0.0.1/lg4_long_mult/analysis?logic_name=part_multiply', 'A->1' => 'mysql://ensadmin:${ENSADMIN_PSW}@127.0.0.1/lg4_long_mult/analysis?logic_name=add_together' }"
228 =head1 SCRIPT-SPECIFIC OPTIONS
236 =item --debug <level>
238 turn on
debug messages at <level>
242 skip the execution of write_output() step this time
248 =item --reg_conf <path>
250 load registry entries from the given file (these entries may be needed by the Runnable itself)
252 =item --input_id <hash>
254 specify the whole input_id parameter in one stringified hash
256 =item --flow_out <hash>
258 defines the dataflow re-direction rules in a format similar to PipeConfig's - see the last example
260 =item --language <name>
262 language in which the Runnable is written
266 All other options will be passed to the Runnable (leading dashes removed) and will constitute the parameters for the Job.
270 See the NOTICE file distributed with this work for additional information
271 regarding copyright ownership.
273 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
274 You may obtain a copy of the License at
278 Unless required by applicable law or agreed to in writing, software distributed under the License
279 is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
280 See the License for the specific language governing permissions and limitations under the License.
284 Please subscribe to the eHive mailing list: http: