ensembl-hive  2.8.1
run.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 # Don't change the above line.
18 # Change the PATH in the myRun.ksh script if you want to use another perl.
19 
20 =head1 NAME
21 
22 run_all.pl - wrapper script to run the stable ID mapping
23 
24 =head1 SYNOPSIS
25 
26 run_all.pl [arguments]
27 
28 Required arguments:
29 
30  --dbname, db_name=NAME database name NAME
31  --host, --dbhost, --db_host=HOST database host HOST
32  --port, --dbport, --db_port=PORT database port PORT
33  --user, --dbuser, --db_user=USER database username USER
34  --pass, --dbpass, --db_pass=PASS database passwort PASS
35 
36 Optional arguments:
37 
38  --conffile, --conf=FILE read parameters from FILE
39  (default: conf/Conversion.ini)
40 
41  --logfile, --log=FILE log to FILE (default: *STDOUT)
42  --logpath=PATH write logfile to PATH (default: .)
43  --logappend, --log_append append to logfile (default: truncate)
44  --loglevel=LEVEL define log level (default: INFO)
45 
46  -i, --interactive=0|1 run script interactively (default: true)
47  -n, --dry_run, --dry=0|1 don't write results to database
48  -h, --help, -? print help (this message)
49 
50 =head1 DESCRIPTION
51 
52 
53 
54 =head1 AUTHOR
55 
56 Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
57 
58 =head1 CONTACT
59 
60 Please post comments/questions to the Ensembl development list
61 <http://lists.ensembl.org/mailman/listinfo/dev>
62 
63 =cut
64 
65 use strict;
66 use warnings;
67 no warnings 'uninitialized';
68 
69 use FindBin qw($Bin);
70 use Bio::EnsEMBL::Utils::ConfParser;
71 use Bio::EnsEMBL::Utils::Logger;
72 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
73 use Bio::EnsEMBL::IdMapping::Cache;
74 
75 my %valid_modes = ( 'check_only' => 1,
76  'normal' => 1,
77  'upload' => 1,
78  'mapping' => 1 );
79 
80 # parse configuration and commandline arguments
81 my $conf = new Bio::EnsEMBL::Utils::ConfParser(
82  -SERVERROOT => "$Bin/../../..",
83  -DEFAULT_CONF => "$Bin/default.conf"
84 );
85 
86 $conf->parse_options(
87  'sourcehost|source_host=s' => 1,
88  'sourceport|source_port=n' => 1,
89  'sourceuser|source_user=s' => 1,
90  'sourcepass|source_pass=s' => 0,
91  'sourcedbname|source_dbname=s' => 1,
92  'targethost|target_host=s' => 1,
93  'targetport|target_port=n' => 1,
94  'targetuser|target_user=s' => 1,
95  'targetpass|target_pass=s' => 0,
96  'targetdbname|target_dbname=s' => 1,
97  'mode=s' => 0,
98  'basedir|basedir=s' => 1,
99  'chromosomes|chr=s@' => 0,
100  'region=s' => 0,
101  'biotypes=s@' => 0,
102  'biotypes_include=s@' => 0,
103  'biotypes_exclude=s@' => 0,
104  'cache_method=s' => 0,
105  'build_cache_auto_threshold=n' => 0,
106  'build_cache_concurrent_jobs=n' => 0,
107  'min_exon_length|minexonlength=i' => 0,
108  'exonerate_path|exoneratepath=s' => 1,
109  'exonerate_threshold|exoneratethreshold=f' => 0,
110  'exonerate_jobs|exoneratejobs=i' => 0,
111  'exonerate_bytes_per_job|exoneratebytesperjob=f' => 0,
112  'exonerate_extra_params|exonerateextraparams=s' => 0,
113  'plugin_internal_id_mappers_gene=s@' => 0,
114  'plugin_internal_id_mappers_transcript=s@' => 0,
115  'plugin_internal_id_mappers_exon=s@' => 0,
116  'mapping_types=s@' => 1,
117  'plugin_stable_id_generator=s' => 0,
118  'upload_events|uploadevents=s' => 0,
119  'upload_stable_ids|uploadstableids=s' => 0,
120  'upload_archive|uploadarchive=s' => 0,
121  'lsf!' => 0,
122  'lsf_opt_run|lsfoptrun=s' => 0,
123  'lsf_opt_dump_cache|lsfoptdumpcache=s' => 0,
124  'no_check!' => 0,
125  'no_check_empty_tables' => 0,
126 );
127 
128 # set default logpath
129 unless ($conf->param('logpath')) {
130  $conf->param('logpath', path_append($conf->param('basedir'), 'log'));
131 }
132 
133 # get log filehandle and print heading and parameters to logfile
134 my $logger = new Bio::EnsEMBL::Utils::Logger(
135  -LOGFILE => $conf->param('logfile'),
136  -LOGAUTO => $conf->param('logauto'),
137  -LOGAUTOBASE => 'run_all',
138  -LOGPATH => $conf->param('logpath'),
139  -LOGAPPEND => $conf->param('logappend'),
140  -LOGLEVEL => $conf->param('loglevel'),
141 );
142 
143 # initialise log
144 $logger->init_log($conf->list_param_values);
145 
146 my $mode = $conf->param('mode') || 'normal';
147 
148 # check configuration and resources.
149 # this is deliberately done before submitting to lsf (doesn't need much
150 # resources and you will know about config errors before waiting for job to
151 # run). the 'no_check' option prevents the checks to be re-run after automatic
152 # lsf submission
153 unless ($conf->param('no_check')) {
154  if (&init_check($mode) > 0) {
155  $logger->error("Configuration check failed. See above for details.\n");
156  }
157 
158  if ($mode eq 'check_only') {
159  $logger->info("Nothing else to do for 'check_only' mode. Exiting.\n");
160  exit;
161  }
162 }
163 
164 # if user wants to run via lsf, submit script with bsub (this will exit this
165 # instance of the script)
166 &bsubmit if ($conf->param('lsf'));
167 
168 # this script is only a wrapper and will run one or more components.
169 # define options for the components here.
170 my %options;
171 my $logautoid = $logger->log_auto_id;
172 
173 $options{'dump_cache'} = $conf->create_commandline_options(
174  logautoid => $logautoid,
175  logappend => 1,
176  interactive => 0,
177  is_component => 1,
178 );
179 
180 $options{'id_mapping'} = $conf->create_commandline_options(
181  logautoid => $logautoid,
182  logappend => 1,
183  interactive => 0,
184  is_component => 1,
185 );
186 
187 # run components, depending on mode
188 my $sub = "run_$mode";
189 no strict 'refs';
190 &$sub;
191 
192 # finish logfile
193 $logger->finish_log;
194 
195 ### END main ###
196  # add one more job to
197 
198 
199 sub init_check {
200  my $mode = shift;
201 
202  my $err = 0;
203 
204  $logger->info("Checking configuration...\n", 0, 'stamped');
205 
206  #
207  # check for valid mode
208  #
209  unless ($valid_modes{$mode}) {
210  $logger->warning("Invalid mode: $mode.\n");
211  $err++;
212  } else {
213  $logger->debug("Run mode ok.\n");
214  }
215 
216  #
217  # create the base directory, throw if this fails
218  #
219  my $basedir = $conf->param('basedir');
220  unless (-d $basedir) {
221  if (system("mkdir -p $basedir") == 0) {
222  $logger->debug("Base directory created successfully.\n");
223  } else {
224  $logger->warning("Unable to create base directory $basedir: $!\n");
225  $err++;
226  }
227  }
228 
229  #
230  # check db connection and permissions (SELECT for source, INSERT for target)
231  #
232  my $cache = Bio::EnsEMBL::IdMapping::Cache->new(
233  -LOGGER => $logger,
234  -CONF => $conf,
235  );
236 
237  # source db
238  $err += $cache->check_db_connection('source');
239  $err += $cache->check_db_read_permissions('source');
240 
241  # target db
242  $err += $cache->check_db_connection('target');
243  $err += $cache->check_db_read_permissions('target');
244  $err += $cache->check_db_write_permissions('target');
245 
246  #
247  # check stable ID and archive tables in target db are empty
248  #
249  $err += $cache->check_empty_tables('target');
250 
251  #
252  # check both dbs have sequence
253  #
254  $err += $cache->check_sequence('source');
255  $err += $cache->check_sequence('target');
256 
257  #
258  # check for required meta table entries
259  #
260  $err += $cache->check_meta_entries('source');
261  $err += $cache->check_meta_entries('target');
262 
263  $logger->info("Done.\n\n", 0, 'stamped');
264 
265  return $err;
266 }
267 
268 
269 sub run_normal {
270 
271  # dump cache files (this is done for all modes)
272  &run_component('dump_cache', $options{'dump_cache'}, 'building cache');
273 
274  # ID mapping
275  &run_component('id_mapping', $options{'id_mapping'}, 'ID mapping');
276 
277  # QC
278  #&run_component('qc', $options{'qc'}, 'QC');
279 
280 }
281 
282 sub run_mapping {
283  # Skip dumping and start at the ID mapping step.
284  &run_component( 'id_mapping', $options{'id_mapping'},
285  'ID mapping (skipping the dumping step)' );
286 }
287 
288 sub run_upload {
289  # upload table data files into db
290  # (delegate to id_mapping.pl which will do the right thing based on --mode)
291  &run_component('id_mapping', $options{'id_mapping'}, 'uploading tables');
292 }
293 
294 
295 sub run_component {
296  my $basename = shift;
297  my $options = shift;
298  my $logtext = shift;
299 
300  my $cmd = "$basename.pl";
301  $logtext ||= $cmd;
302 
303  $logger->info("----- $logtext -----\n", 0, 'stamped');
304 
305  if ($logger->logauto) {
306  $logger->info("See ${basename}_".$logger->log_auto_id.".log for logs.\n", 1);
307  } elsif ($logger->logfile) {
308  $logger->info("See below for logs.\n", 1);
309  }
310 
311  system("./$cmd $options") == 0
312  or $logger->error("Error running $cmd. Please see the respective logfile for more information.\n");
313 
314  $logger->info("----- done with $logtext -----\n\n", 0, 'stamped');
315 }
316 
317 
318 sub bsubmit {
319  #
320  # build bsub commandline
321  #
322 
323  # automatically create a filename for lsf output
324  my $cmd = 'bsub -o '.$conf->param('logpath');
325  $cmd .= '/lsf_'.$logger->log_auto_id.'.out';
326 
327  # add extra lsf options as configured by the user
328  $cmd .= ' '.$conf->param('lsf_opt_run');
329 
330  # this script's name
331  $cmd .= " $0";
332 
333  # options for this script
334  my $options = $conf->create_commandline_options(
335  logautoid => $logger->log_auto_id,
336  interactive => 0,
337  lsf => 0,
338  no_check => 1,
339  );
340  $cmd .= " $options";
341 
342  #
343  # execute bsub
344  #
345  print "\nRe-executing via lsf:\n";
346  print "$cmd\n\n";
347 
348  exec($cmd) or die "Could not exec $0 via lsf: $!\n";
349  #exit;
350 }
351 
run_mapping
public run_mapping()
upload_stable_ids
public upload_stable_ids()
run_normal
public run_normal()
run_component
public run_component()
bsubmit
public bsubmit()
Bio::EnsEMBL::IdMapping::Cache::new
public Bio::EnsEMBL::IdMapping::Cache new()
init_check
public init_check()
Bio::EnsEMBL::IdMapping::Cache
Definition: Cache.pm:18
run
public run()
Bio::EnsEMBL::IdMapping::Cache::check_db_connection
public check_db_connection()
upload_archive
public upload_archive()
run_upload
public run_upload()