6 # Finding out own path in order to reference own components (including own modules):
10 $ENV{
'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
11 unshift @INC, $ENV{
'EHIVE_ROOT_DIR'}.
'/modules';
14 use Getopt::Long qw(:config no_auto_abbrev);
28 my $job_adaptor = $pipeline->hive_dba->get_AnalysisJobAdaptor;
30 foreach my $source_analysis ( @{ $pipeline->get_source_analyses } ) {
31 my $logic_name = $source_analysis->logic_name;
32 my $analysis_id = $source_analysis->dbID;
33 my ($example_job) = @{ $job_adaptor->fetch_some_by_analysis_id_limit( $analysis_id, 1 ) };
34 print
"\t$logic_name ($analysis_id)\t\t".($example_job ?
"Example input_id: '".$example_job->input_id.
"'" :
"[not populated yet]").
"\n";
53 # connect to the database:
55 'reg_conf|regfile|reg_file=s' => \$reg_conf,
56 'reg_type=s' => \$reg_type,
57 'reg_alias|regname|reg_name=s' => \$reg_alias,
58 'nosqlvc' => \$nosqlvc, #
using "nosqlvc" instead of
"sqlvc!" for consistency with scripts where it is a propagated option
60 # identify the analysis:
61 'analyses_pattern=s' => \$analyses_pattern,
62 'analysis_id=i' => \$analysis_id,
63 'logic_name=s' => \$logic_name,
65 'input_id=s' => \$input_id, # specify the Job
's input parameters (as a stringified hash)
66 'wrap|semaphored!
' => \$wrap_in_semaphore, # wrap the job into a funnel semaphore (provide a stable_id for the whole execution stream)
68 # other commands/options
70 ) or die "Error in command line arguments\n";
73 die "ERROR: There are invalid arguments on the command-line: ". join(" ", @ARGV). "\n";
77 pod2usage({-exitvalue => 0, -verbose => 2});
81 if($url or $reg_alias) {
82 $pipeline = Bio::EnsEMBL::Hive::HivePipeline->new(
84 -reg_conf => $reg_conf,
85 -reg_type => $reg_type,
86 -reg_alias => $reg_alias,
87 -no_sql_schema_version_check => $nosqlvc,
89 $pipeline->hive_dba()->dbc->requires_write_access();
91 die "\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
95 if($analyses_pattern ||= $analysis_id || $logic_name) {
97 my $candidate_analyses = $pipeline->collection_of( 'Analysis
' )->find_all_by_pattern( $analyses_pattern );
99 if( scalar(@$candidate_analyses) > 1 ) {
100 die "Too many analyses matching pattern '$analyses_pattern
', please specify\n";
101 } elsif( !scalar(@$candidate_analyses) ) {
102 die "Analysis matching the pattern '$analyses_pattern
' could not be found\n";
105 ($analysis) = @$candidate_analyses;
109 print "\nYou haven't specified -logic_name nor -analysis_id of the Analysis being seeded.\n
";
110 print "\nSeedable analyses without incoming dataflow:\n
";
111 show_seedable_analyses($pipeline);
117 warn "Since -input_id has not been set, assuming input_id=
'$input_id'\n
";
119 my $dinput_id = destringify($input_id);
120 if (!ref($dinput_id)) {
121 die "'$input_id' cannot be eval
'ed, likely because of a syntax error\n";
123 if (ref($dinput_id) ne 'HASH
') {
124 die "'$input_id
' is not a hash\n";
127 my $job = Bio::EnsEMBL::Hive::AnalysisJob->new(
128 'hive_pipeline
' => $pipeline,
129 'prev_job
' => undef, # This job has been created by the seed_pipeline.pl script, not by another job
130 'analysis
' => $analysis,
131 'input_id
' => $dinput_id, # Make sure all job creations undergo re-stringification to avoid alternative "spellings" of the same input_id hash
134 my $job_adaptor = $pipeline->hive_dba->get_AnalysisJobAdaptor;
135 my ($semaphore_id, $job_id);
137 if( $wrap_in_semaphore ) {
139 ($semaphore_id, $dummy, $job_id) = $job_adaptor->store_a_semaphored_group_of_jobs( undef, [ $job ], undef );
141 ($job_id) = @{ $job_adaptor->store_jobs_and_adjust_counters( [ $job ] ) };
145 print "Job $job_id [ ".$analysis->logic_name.'(
'.$analysis->dbID.")] : '$input_id
'".($semaphore_id ? ", wrapped in Semaphore $semaphore_id" : '')."\n";
148 warn "Could not create Job '$input_id
' (it may have been created already)\n";
164 seed_pipeline.pl {-url <url> | -reg_conf <reg_conf> [-reg_type <reg_type>] -reg_alias <reg_alias>} [ {-analyses_pattern <pattern> | -analysis_id <analysis_id> | -logic_name <logic_name>} [ -input_id <input_id> ] ]
168 seed_pipeline.pl is a generic script that is used to create {initial or top-up} Jobs for eHive pipelines
170 =head1 USAGE EXAMPLES
172 # find out which analyses may need seeding (with an example input_id):
174 seed_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult"
177 # seed one Job into the "start" Analysis:
179 seed_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult" \
180 -logic_name start -input_id '{
"a_multiplier" => 2222222222,
"b_multiplier" => 3434343434}
'
184 =head2 Connection parameters
188 =item --reg_conf <path>
190 path to a Registry configuration file
192 =item --reg_type <string>
194 type of the registry entry ("hive", "core", "compara", etc - defaults to "hive")
196 =item --reg_alias <string>
198 species/alias name for the eHive DBAdaptor
200 =item --url <url string>
202 URL defining where eHive database is located
206 "No SQL Version Check" - set if you want to force working with a database created by a potentially schema-incompatible API
210 =head2 Analysis parameters
214 =item --analyses_pattern <string>
216 seed Job(s) for analyses whose logic_name matches the supplied pattern
218 =item --analysis_id <num>
220 seed Job for Analysis with the given analysis_id
228 =item --input_id <string>
230 specify the Job's input parameters as a stringified hash
234 wrap the Job into a funnel Semaphore (provide a stable_id
for the whole execution stream)
238 =head2 Other commands/options
244 show
this help message
250 See the NOTICE file distributed with
this work
for additional information
251 regarding copyright ownership.
253 Licensed under the Apache License, Version 2.0 (the
"License"); you may not use
this file except in compliance with the License.
254 You may obtain a copy of the License at
258 Unless required by applicable law or agreed to in writing, software distributed under the License
259 is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
260 See the License
for the specific language governing permissions and limitations under the License.
264 Please subscribe to the eHive mailing list: http: