my %jobs = ();
# create empty directory for logs
my $logpath = path_append($conf->param('logpath'), 'dump_by_seq_region');
system("rm -rf $logpath") == 0 or
$logger->error("Unable to delete lsf log dir $logpath: $!\n");
system("mkdir -p $logpath") == 0 or
$logger->error("Can't create lsf log dir $logpath: $!\n");
# load the cache implementation
my $cache_impl = 'Bio::EnsEMBL::IdMapping::Cache';
inject($cache_impl);
my $cache = $cache_impl->new(
-LOGGER => $logger,
-CONF => $conf,
);
# submit jobs to lsf
foreach my $dbtype (qw(source target)) {
$logger->info("\n".ucfirst($dbtype)." db...\n", 0, 'stamped');
# determine which slices need to be done
my $filename = "$dbtype.dump_cache.slices.txt";
open(my $fh, '>', "$logpath/$filename") or
throw("Unable to open $logpath/$filename for writing: $!");
my $num_jobs = 0;
foreach my $slice_name (@{ $cache->slice_names($dbtype) }) {
my $type = "$dbtype.$slice_name";
unless ($cache->cache_file_exists($type)) {
print $fh "$slice_name\n";
$num_jobs++;
}
}
close($fh);
unless ($num_jobs) {
$logger->info("All cache files for $dbtype exist.\n");
next;
}
# build lsf command
my $lsf_name = 'dump_by_seq_region_'.time;
my $concurrent = $conf->param('build_cache_concurrent_jobs') || 200;
my $options = $conf->create_commandline_options(
logauto => 1,
logautobase => "dump_by_seq_region",
interactive => 0,
is_component => 1,
dbtype => $dbtype,
cache_impl => $cache_impl,
);
my $cmd = qq{./dump_by_seq_region.pl $options --index \$LSB_JOBINDEX};
my $pipe =
qq{|bsub -J '$lsf_name\[1-$num_jobs\]\%$concurrent' }
. qq{-o $logpath/dump_by_seq_region.$dbtype.\%I.out }
. qq{-e $logpath/dump_by_seq_region.$dbtype.\%I.err }
. $conf->param('lsf_opt_dump_cache');
# run lsf job array
$logger->info("\nSubmitting $num_jobs jobs to lsf.\n");
$logger->debug("$cmd\n\n");
$logger->debug("$pipe\n\n");
local *BSUB;
open BSUB, $pipe or
$logger->error("Could not open open pipe to bsub: $!\n");
print BSUB $cmd;
$logger->error("Error submitting jobs: $!\n")
unless ($? == 0);
close BSUB;
# submit dependent job to monitor finishing of jobs
$logger->info("Waiting for jobs to finish...\n", 0, 'stamped');
my $dependent_job =
qq{bsub -K -w "ended($lsf_name)" -q production } .
qq{-M 100 -R 'select[mem>100]' -R 'rusage[mem=100]' } .
qq{-o $logpath/dump_cache.$dbtype.depend.out /bin/true};
system($dependent_job) == 0 or
$logger->error("Error submitting dependent job: $!\n");
$logger->info("All jobs finished.\n", 0, 'stamped');
# check for lsf errors
sleep(5);
my $err;
foreach my $i (1..$num_jobs) {
$err++ unless (-e "$logpath/dump_by_seq_region.$dbtype.$i.success");
}
if ($err) {
$logger->error("At least one of your jobs failed.\nPlease check the logfiles at $logpath for errors.\n");
return 1;
}
}
return 0;
}