ensembl-hive  2.7.0
seed_pipeline.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 
3 use strict;
4 use warnings;
5 
6  # Finding out own path in order to reference own components (including own modules):
7 use Cwd ();
8 use File::Basename ();
9 BEGIN {
10  $ENV{'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
11  unshift @INC, $ENV{'EHIVE_ROOT_DIR'}.'/modules';
12 }
13 
14 use Getopt::Long qw(:config no_auto_abbrev);
15 use Pod::Usage;
16 
20 use Bio::EnsEMBL::Hive::Utils ('destringify', 'stringify');
22 
24 
26  my ($pipeline) = @_;
27 
28  my $job_adaptor = $pipeline->hive_dba->get_AnalysisJobAdaptor;
29 
30  foreach my $source_analysis ( @{ $pipeline->get_source_analyses } ) {
31  my $logic_name = $source_analysis->logic_name;
32  my $analysis_id = $source_analysis->dbID;
33  my ($example_job) = @{ $job_adaptor->fetch_some_by_analysis_id_limit( $analysis_id, 1 ) };
34  print "\t$logic_name ($analysis_id)\t\t".($example_job ? "Example input_id: '".$example_job->input_id."'" : "[not populated yet]")."\n";
35  }
36 }
37 
38 
39 sub main {
40  my ($url,
41  $reg_conf,
42  $reg_type,
43  $reg_alias,
44  $nosqlvc,
45  $analyses_pattern,
46  $analysis_id,
47  $logic_name,
48  $input_id,
49  $wrap_in_semaphore,
50  $help);
51 
52  GetOptions(
53  # connect to the database:
54  'url=s' => \$url,
55  'reg_conf|regfile|reg_file=s' => \$reg_conf,
56  'reg_type=s' => \$reg_type,
57  'reg_alias|regname|reg_name=s' => \$reg_alias,
58  'nosqlvc' => \$nosqlvc, # using "nosqlvc" instead of "sqlvc!" for consistency with scripts where it is a propagated option
59 
60  # identify the analysis:
61  'analyses_pattern=s' => \$analyses_pattern,
62  'analysis_id=i' => \$analysis_id,
63  'logic_name=s' => \$logic_name,
64 
65  'input_id=s' => \$input_id, # specify the Job's input parameters (as a stringified hash)
66  'wrap|semaphored!' => \$wrap_in_semaphore, # wrap the job into a funnel semaphore (provide a stable_id for the whole execution stream)
67 
68  # other commands/options
69  'h|help!' => \$help,
70  ) or die "Error in command line arguments\n";
71 
72  if (@ARGV) {
73  die "ERROR: There are invalid arguments on the command-line: ". join(" ", @ARGV). "\n";
74  }
75 
76  if ($help) {
77  pod2usage({-exitvalue => 0, -verbose => 2});
78  }
79 
80  my $pipeline;
81  if($url or $reg_alias) {
82  $pipeline = Bio::EnsEMBL::Hive::HivePipeline->new(
83  -url => $url,
84  -reg_conf => $reg_conf,
85  -reg_type => $reg_type,
86  -reg_alias => $reg_alias,
87  -no_sql_schema_version_check => $nosqlvc,
88  );
89  $pipeline->hive_dba()->dbc->requires_write_access();
90  } else {
91  die "\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
92  }
93 
94  my $analysis;
95  if($analyses_pattern ||= $analysis_id || $logic_name) {
96 
97  my $candidate_analyses = $pipeline->collection_of( 'Analysis' )->find_all_by_pattern( $analyses_pattern );
98 
99  if( scalar(@$candidate_analyses) > 1 ) {
100  die "Too many analyses matching pattern '$analyses_pattern', please specify\n";
101  } elsif( !scalar(@$candidate_analyses) ) {
102  die "Analysis matching the pattern '$analyses_pattern' could not be found\n";
103  }
104 
105  ($analysis) = @$candidate_analyses;
106 
107  } else {
108 
109  print "\nYou haven't specified -logic_name nor -analysis_id of the Analysis being seeded.\n";
110  print "\nSeedable analyses without incoming dataflow:\n";
111  show_seedable_analyses($pipeline);
112  exit(0);
113  }
114 
115  unless($input_id) {
116  $input_id = '{}';
117  warn "Since -input_id has not been set, assuming input_id='$input_id'\n";
118  }
119  my $dinput_id = destringify($input_id);
120  if (!ref($dinput_id)) {
121  die "'$input_id' cannot be eval'ed, likely because of a syntax error\n";
122  }
123  if (ref($dinput_id) ne 'HASH') {
124  die "'$input_id' is not a hash\n";
125  }
126 
127  my $job = Bio::EnsEMBL::Hive::AnalysisJob->new(
128  'hive_pipeline' => $pipeline,
129  'prev_job' => undef, # This job has been created by the seed_pipeline.pl script, not by another job
130  'analysis' => $analysis,
131  'input_id' => $dinput_id, # Make sure all job creations undergo re-stringification to avoid alternative "spellings" of the same input_id hash
132  );
133 
134  my $job_adaptor = $pipeline->hive_dba->get_AnalysisJobAdaptor;
135  my ($semaphore_id, $job_id);
136 
137  if( $wrap_in_semaphore ) {
138  my $dummy;
139  ($semaphore_id, $dummy, $job_id) = $job_adaptor->store_a_semaphored_group_of_jobs( undef, [ $job ], undef );
140  } else {
141  ($job_id) = @{ $job_adaptor->store_jobs_and_adjust_counters( [ $job ] ) };
142  }
143 
144  if($job_id) {
145  print "Job $job_id [ ".$analysis->logic_name.'('.$analysis->dbID.")] : '$input_id'".($semaphore_id ? ", wrapped in Semaphore $semaphore_id" : '')."\n";
146 
147  } else {
148  warn "Could not create Job '$input_id' (it may have been created already)\n";
149  }
150 }
151 
152 main();
153 
154 __DATA__
155 
156 =pod
157 
158 =head1 NAME
159 
160 seed_pipeline.pl
161 
162 =head1 SYNOPSIS
163 
164  seed_pipeline.pl {-url <url> | -reg_conf <reg_conf> [-reg_type <reg_type>] -reg_alias <reg_alias>} [ {-analyses_pattern <pattern> | -analysis_id <analysis_id> | -logic_name <logic_name>} [ -input_id <input_id> ] ]
165 
166 =head1 DESCRIPTION
167 
168 seed_pipeline.pl is a generic script that is used to create {initial or top-up} Jobs for eHive pipelines
169 
170 =head1 USAGE EXAMPLES
171 
172  # find out which analyses may need seeding (with an example input_id):
173 
174  seed_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult"
175 
176 
177  # seed one Job into the "start" Analysis:
178 
179  seed_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult" \
180  -logic_name start -input_id '{"a_multiplier" => 2222222222, "b_multiplier" => 3434343434}'
181 
182 =head1 OPTIONS
183 
184 =head2 Connection parameters
185 
186 =over
187 
188 =item --reg_conf <path>
189 
190 path to a Registry configuration file
191 
192 =item --reg_type <string>
193 
194 type of the registry entry ("hive", "core", "compara", etc - defaults to "hive")
195 
196 =item --reg_alias <string>
197 
198 species/alias name for the eHive DBAdaptor
199 
200 =item --url <url string>
201 
202 URL defining where eHive database is located
203 
204 =item --nosqlvc
205 
206 "No SQL Version Check" - set if you want to force working with a database created by a potentially schema-incompatible API
207 
208 =back
209 
210 =head2 Analysis parameters
211 
212 =over
213 
214 =item --analyses_pattern <string>
215 
216 seed Job(s) for analyses whose logic_name matches the supplied pattern
217 
218 =item --analysis_id <num>
219 
220 seed Job for Analysis with the given analysis_id
221 
222 =back
223 
224 =head2 Input
225 
226 =over
227 
228 =item --input_id <string>
229 
230 specify the Job's input parameters as a stringified hash
231 
232 =item --semaphored
233 
234 wrap the Job into a funnel Semaphore (provide a stable_id for the whole execution stream)
235 
236 =back
237 
238 =head2 Other commands/options
239 
240 =over
241 
242 =item -h, --help
243 
244 show this help message
245 
246 =back
247 
248 =head1 LICENSE
249 
250  See the NOTICE file distributed with this work for additional information
251  regarding copyright ownership.
252 
253  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
254  You may obtain a copy of the License at
255 
256  http://www.apache.org/licenses/LICENSE-2.0
257 
258  Unless required by applicable law or agreed to in writing, software distributed under the License
259  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
260  See the License for the specific language governing permissions and limitations under the License.
261 
262 =head1 CONTACT
263 
264 Please subscribe to the eHive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss eHive-related questions or to be notified of our updates
265 
266 =cut
267 
Bio::EnsEMBL::Hive::Utils
Definition: Collection.pm:4
Bio::EnsEMBL::Hive::Utils::URL::hide_url_password
public Void hide_url_password()
Bio::EnsEMBL::Hive::Utils::URL
Definition: URL.pm:11
show_seedable_analyses
public show_seedable_analyses()
BEGIN
public BEGIN()
main
public main()
Bio::EnsEMBL::Hive::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:31
Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor
Definition: AnalysisJobAdaptor.pm:22
Bio::EnsEMBL::Hive::AnalysisJob
Definition: AnalysisJob.pm:13