2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 # Don't change the above line.
18 # Change the PATH in the myRun.ksh script if you want to use another perl.
22 run_all.pl - wrapper script to
run the stable ID mapping
26 run_all.pl [arguments]
30 --dbname, db_name=NAME database name NAME
31 --host, --dbhost, --db_host=HOST database host HOST
32 --port, --dbport, --db_port=PORT database port PORT
33 --user, --dbuser, --db_user=USER database username USER
34 --pass, --dbpass, --db_pass=PASS database passwort PASS
38 --conffile, --conf=FILE read parameters from FILE
39 (
default: conf/Conversion.ini)
41 --logfile, --log=FILE log to FILE (
default: *STDOUT)
42 --logpath=PATH write logfile to PATH (
default: .)
43 --logappend, --log_append append to logfile (
default: truncate)
44 --loglevel=LEVEL define log level (
default: INFO)
46 -i, --interactive=0|1
run script interactively (
default:
true)
47 -n, --dry_run, --dry=0|1 don
't write results to database
48 -h, --help, -? print help (this message)
56 Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
60 Please post comments/questions to the Ensembl development list
61 <http://lists.ensembl.org/mailman/listinfo/dev>
67 no warnings 'uninitialized
';
70 use Bio::EnsEMBL::Utils::ConfParser;
71 use Bio::EnsEMBL::Utils::Logger;
72 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
73 use Bio::EnsEMBL::IdMapping::Cache;
75 my %valid_modes = ( 'check_only
' => 1,
80 # parse configuration and commandline arguments
81 my $conf = new Bio::EnsEMBL::Utils::ConfParser(
82 -SERVERROOT => "$Bin/../../..",
83 -DEFAULT_CONF => "$Bin/default.conf"
87 'sourcehost|source_host=s
' => 1,
88 'sourceport|source_port=n
' => 1,
89 'sourceuser|source_user=s
' => 1,
90 'sourcepass|source_pass=s
' => 0,
91 'sourcedbname|source_dbname=s
' => 1,
92 'targethost|target_host=s
' => 1,
93 'targetport|target_port=n
' => 1,
94 'targetuser|target_user=s
' => 1,
95 'targetpass|target_pass=s
' => 0,
96 'targetdbname|target_dbname=s
' => 1,
98 'basedir|basedir=s
' => 1,
99 'chromosomes|chr=s@
' => 0,
102 'biotypes_include=s@
' => 0,
103 'biotypes_exclude=s@
' => 0,
104 'cache_method=s
' => 0,
105 'build_cache_auto_threshold=n
' => 0,
106 'build_cache_concurrent_jobs=n
' => 0,
107 'min_exon_length|minexonlength=i
' => 0,
108 'exonerate_path|exoneratepath=s
' => 1,
109 'exonerate_threshold|exoneratethreshold=f
' => 0,
110 'exonerate_jobs|exoneratejobs=i
' => 0,
111 'exonerate_bytes_per_job|exoneratebytesperjob=f
' => 0,
112 'exonerate_extra_params|exonerateextraparams=s
' => 0,
113 'plugin_internal_id_mappers_gene=s@
' => 0,
114 'plugin_internal_id_mappers_transcript=s@
' => 0,
115 'plugin_internal_id_mappers_exon=s@
' => 0,
116 'mapping_types=s@
' => 1,
117 'plugin_stable_id_generator=s
' => 0,
118 'upload_events|uploadevents=s
' => 0,
122 'lsf_opt_run|lsfoptrun=s
' => 0,
123 'lsf_opt_dump_cache|lsfoptdumpcache=s
' => 0,
125 'no_check_empty_tables
' => 0,
128 # set default logpath
129 unless ($conf->param('logpath
')) {
130 $conf->param('logpath
', path_append($conf->param('basedir
'), 'log
'));
133 # get log filehandle and print heading and parameters to logfile
134 my $logger = new Bio::EnsEMBL::Utils::Logger(
135 -LOGFILE => $conf->param('logfile
'),
136 -LOGAUTO => $conf->param('logauto
'),
137 -LOGAUTOBASE => 'run_all
',
138 -LOGPATH => $conf->param('logpath
'),
139 -LOGAPPEND => $conf->param('logappend
'),
140 -LOGLEVEL => $conf->param('loglevel
'),
144 $logger->init_log($conf->list_param_values);
146 my $mode = $conf->param('mode
') || 'normal
';
148 # check configuration and resources.
149 # this is deliberately done before submitting to lsf (doesn't need much
150 # resources and you will know about config errors before waiting for job to
151 # run). the 'no_check' option prevents the checks to be re-run after automatic
153 unless ($conf->param(
'no_check')) {
155 $logger->error(
"Configuration check failed. See above for details.\n");
158 if ($mode eq
'check_only') {
159 $logger->info(
"Nothing else to do for 'check_only' mode. Exiting.\n");
164 # if user wants to run via lsf, submit script with bsub (this will exit this
165 # instance of the script)
166 &
bsubmit if ($conf->param(
'lsf'));
168 # this script is only a wrapper and will run one or more components.
169 # define options for the components here.
171 my $logautoid = $logger->log_auto_id;
173 $options{
'dump_cache'} = $conf->create_commandline_options(
174 logautoid => $logautoid,
180 $options{
'id_mapping'} = $conf->create_commandline_options(
181 logautoid => $logautoid,
187 # run components, depending on mode
188 my $sub =
"run_$mode";
196 # add one more job to
204 $logger->info(
"Checking configuration...\n", 0,
'stamped');
207 # check for valid mode
209 unless ($valid_modes{$mode}) {
210 $logger->warning(
"Invalid mode: $mode.\n");
213 $logger->debug(
"Run mode ok.\n");
217 # create the base directory, throw if this fails
219 my $basedir = $conf->param(
'basedir');
220 unless (-d $basedir) {
221 if (system(
"mkdir -p $basedir") == 0) {
222 $logger->debug(
"Base directory created successfully.\n");
224 $logger->warning(
"Unable to create base directory $basedir: $!\n");
230 # check db connection and permissions (SELECT for source, INSERT for target)
239 $err += $cache->check_db_read_permissions(
'source');
242 $err += $cache->check_db_connection(
'target');
243 $err += $cache->check_db_read_permissions(
'target');
244 $err += $cache->check_db_write_permissions(
'target');
247 # check stable ID and archive tables in target db are empty
249 $err += $cache->check_empty_tables(
'target');
252 # check both dbs have sequence
254 $err += $cache->check_sequence(
'source');
255 $err += $cache->check_sequence(
'target');
258 # check for required meta table entries
260 $err += $cache->check_meta_entries(
'source');
261 $err += $cache->check_meta_entries(
'target');
263 $logger->info(
"Done.\n\n", 0,
'stamped');
271 # dump cache files (this is done for all modes)
272 &
run_component(
'dump_cache', $options{
'dump_cache'},
'building cache');
275 &
run_component(
'id_mapping', $options{
'id_mapping'},
'ID mapping');
278 #&run_component('qc', $options{'qc'}, 'QC');
283 # Skip dumping and start at the ID mapping step.
285 'ID mapping (skipping the dumping step)' );
289 # upload table data files into db
290 # (delegate to id_mapping.pl which will do the right thing based on --mode)
291 &
run_component(
'id_mapping', $options{
'id_mapping'},
'uploading tables');
296 my $basename = shift;
300 my $cmd =
"$basename.pl";
303 $logger->info(
"----- $logtext -----\n", 0,
'stamped');
305 if ($logger->logauto) {
306 $logger->info(
"See ${basename}_".$logger->log_auto_id.
".log for logs.\n", 1);
307 } elsif ($logger->logfile) {
308 $logger->info(
"See below for logs.\n", 1);
311 system(
"./$cmd $options") == 0
312 or $logger->error(
"Error running $cmd. Please see the respective logfile for more information.\n");
314 $logger->info(
"----- done with $logtext -----\n\n", 0,
'stamped');
320 # build bsub commandline
323 # automatically create a filename for lsf output
324 my $cmd =
'bsub -o '.$conf->param(
'logpath');
325 $cmd .=
'/lsf_'.$logger->log_auto_id.
'.out';
327 # add extra lsf options as configured by the user
328 $cmd .=
' '.$conf->param(
'lsf_opt_run');
333 # options for this script
334 my $options = $conf->create_commandline_options(
335 logautoid => $logger->log_auto_id,
345 print
"\nRe-executing via lsf:\n";
348 exec($cmd) or die
"Could not exec $0 via lsf: $!\n";