3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 # Base class that all other mapping methods inherit from
22 package XrefMapper::Methods::ExonerateBasic;
30 # Path to exonerate executable
31 my $exonerate_path = `which exonerate`; $exonerate_path =~ s/\n
35 my($class, $mapper) = @_;
39 $self->mapper($mapper);
47 my ($self, $arg) = @_;
50 ($self->{_mapper} = $arg );
51 return $self->{_mapper};
58 Example : $mapper->jobcount(1004);
59 Description: Getter / Setter
for number of jobs submitted.
66 my ($self, $arg) = @_;
69 ($self->{_jobcount} = $arg );
70 return $self->{_jobcount};
76 Arg[1] : Query filename to pass to exonerate;
this should be the XREF file.
77 Arg[2] : Target filename to pass to exonerate;
this should be the ENSEMBL file.
79 Description: Run exonerate with
default options.
88 my ($self, $query, $target, $mapper) = @_;
90 my $name = $self->submit_exonerate($query, $target, $mapper, $self->options());
100 Description: Options to pass to exonerate. Override as appropriate.
101 Returntype : List of strings.
114 =head2 resubmit_exonerate
118 sub resubmit_exonerate {
119 my ($self, $mapper, $command, $outfile, $errfile, $job_id, $array_number, $root_dir) = @_;
123 my ($junk,$query, $target, @rest) = split(/\s+/,$command);
125 my $disk_space_needed = (stat($query))[7]+(stat($target))[7];
127 $disk_space_needed /= 1024000; # convert to MB
128 $disk_space_needed = int($disk_space_needed);
129 $disk_space_needed += 1;
131 my $unique_name = $self->get_class_name() .
"_" . time();
133 my $exe_file = $root_dir.
"/resub_".$job_id.
"_".$array_number;
134 open(my $rh,
">", $exe_file) || die
"Could not open file $exe_file";
136 my $lsf_profile =
'/usr/local/lsf/conf/profile.lsf';
137 if (-e $lsf_profile) {
138 print $rh
". $lsf_profile\n";
140 print $rh $command.
"\n";
144 chmod 0755, $exe_file;
146 my $queue = $self->mapper->farm_queue ||
'production-rh74';
148 my $usage =
'-M 1500 -R"select[mem>1500] rusage[tmp='.$disk_space_needed.
', mem=1500]" -J "'.$unique_name.
'"';
149 $queue and $usage .=
' -q '. $queue;
151 my $com =
"bsub $usage -o $root_dir/$outfile -e $root_dir/$errfile ".$exe_file;
154 print
"Running job locally for job number $job_id [$array_number]\n" if($mapper->verbose);
155 my $line = `$exe_file`;
156 my $sth = $mapper->xref->dbc->prepare(
'update mapping_jobs set status = "SUBMITTED"'.
" where job_id = $job_id and array_number = $array_number");
166 if ($line =~ /^Job <(\d+)> is submitted/) {
168 print
"LSF job ID for main mapping job: $jobid (job array with 1 job ($array_number))\n" if($mapper->verbose);
173 # Something went wrong
174 warn(
"Job submission failed:\n$@\n");
175 print STDERR
"bsub command was $com\n";
176 print STDERR $line.
"\n";
180 # write details of job to database
181 my $sth = $mapper->xref->dbc->prepare(
'update mapping_jobs set status = "SUBMITTED"'.
" where job_id = $job_id and array_number = $array_number");
190 =head2 submit_exonerate
194 Description: Submit a *single* mapping job array to exonerate.
195 Returntype : The name of the job array submitted to LSF. Can be used in depend job.
201 sub submit_exonerate {
203 my ($self, $query, $target, $mapper, @options) = @_;
206 my $root_dir = $mapper->core->dir;
208 my $queryfile = basename($query);
209 my $targetfile = basename($target);
211 my $prefix = $root_dir .
"/" . basename($query);
214 my ($ensembl_type) = $prefix =~ /.*_(dna|peptide)$/; # dna or prot
215 my $options_str = join(
" ", @options);
217 my $unique_name = $self->get_class_name() .
"_" . time();
219 my $disk_space_needed = (stat($query))[7]+(stat($target))[7];
221 $disk_space_needed /= 1024000; # convert to MB
222 $disk_space_needed = int($disk_space_needed);
223 $disk_space_needed += 1;
225 my $num_jobs = calculate_num_jobs($query);
228 my $exe = $self->mapper->exonerate || $exonerate_path;
231 if(defined($mapper->nofarm)){
232 my $output = $self->get_class_name() .
"_" . $ensembl_type.
"_1.map";
234 $exe $query $target --showvulgar
false --showalignment FALSE --ryo
"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep
'^xref' > $root_dir/$output
236 print
"none farm command is $cmd\n" if($mapper->verbose);
239 # write details of job to database
241 my $sth = $mapper->xref->dbc->prepare(
"insert into process_status (status, date) values('mapping_submitted',now())");
246 if($ensembl_type eq
"peptide"){
250 for( my $i=1; $i<=1; $i++){
251 my $command =
"$exe $query $target --showvulgar false --showalignment FALSE --ryo ".
252 '"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\\\n"'.
" $options_str | grep ".
'"'.
"^xref".
'"'.
" > $root_dir/$output";
253 my $insert =
"insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
254 $self->query_identity_threshold.
", ".$self->target_identity_threshold.
", '".$self->get_class_name().
"', $i)";
256 $sth = $mapper->xref->dbc->prepare($insert);
260 $sth = $mapper->xref->dbc->prepare(
"insert into mapping_jobs (root_dir, map_file, status, out_file, err_file, array_number, job_id) values (?,?,?,?,?,?,?)");
262 my $map_file = $self->get_class_name().
"_".$ensembl_type.
"_".$i.
".map";
263 my $out_file =
"xref_0_".$ensembl_type.
".".$jobid.
"-".$i.
".out";
264 my $err_file =
"xref_0_".$ensembl_type.
".".$jobid.
"-".$i.
".err";
265 $sth->execute($root_dir, $map_file,
'SUBMITTED', $out_file, $err_file, $i, $jobid);
276 # array features barf if just one job
281 $self->jobcount($self->jobcount()+$num_jobs);
285 my $output = $self->get_class_name() .
"_" . $ensembl_type .
"_" .
"\$LSB_JOBINDEX.map";
287 my $queue = $self->mapper->farm_queue ||
'production-rh74';
290 my $usage =
'-M 1500 -R"select[mem>1500] rusage[tmp='.$disk_space_needed.
', mem=1500]" '.
'-J "'.$unique_name.
'[1-'.$num_jobs.
']%200" -o '.$prefix.
'.%J-%I.out -e '.$prefix.
'.%J-%I.err';
291 $queue and $usage =
"-q $queue " . $usage;
294 my $command = $exe.
" ".$query.
" ".$target.
' --querychunkid $LSB_JOBINDEX --querychunktotal '.$num_jobs.
' --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" '.$options_str;
295 $command .=
" | grep '^xref' > $root_dir/$output";
297 my $exe_file = $root_dir.
"/".$unique_name.
".submit";
298 open(my $rh,
">", $exe_file) || die
"Could not open file $exe_file";
300 my $lsf_conf =
"/usr/local/lsf/conf/profile.lsf";
302 print $rh
". $lsf_conf\n";
304 print $rh $command.
"\n";
308 chmod 0755, $exe_file;
310 my $com =
"bsub $usage $exe_file";
315 if ($line =~ /^Job <(\d+)> is submitted/) {
317 print
"LSF job ID for main mapping job: $jobid, name $unique_name with $num_jobs arrays elements)\n" if($mapper->verbose);
322 # Something went wrong
323 warn(
"Job submission failed:\n$@\n");
324 print STDERR
"bsub command was $com\n";
325 print STDERR $line.
"\n";
329 # write details of job to database
330 my $command =
"$exe $query $target --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo ".
331 '"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\\\n"'.
" $options_str | grep ".
'"'.
"^xref".
'"'.
" > $root_dir/$output";
333 my $sth = $mapper->xref->dbc->prepare(
"insert into process_status (status, date) values('mapping_submitted',now())");
337 my $insert =
"insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
338 $self->query_identity_threshold.
", ".$self->target_identity_threshold.
", '".$self->get_class_name().
"', $num_jobs)";
341 $sth = $mapper->xref->dbc->prepare($insert);
345 $sth = $mapper->xref->dbc->prepare(
"insert into mapping_jobs (root_dir, map_file, status, out_file, err_file, array_number, job_id) values (?,?,?,?,?,?,?)");
347 for( my $i=1; $i<=$num_jobs; $i++){
348 my $map_file = $self->get_class_name().
"_".$ensembl_type.
"_".$i.
".map";
349 my $out_file =
"xref_0_".$ensembl_type.
".".$jobid.
"-".$i.
".out";
350 my $err_file =
"xref_0_".$ensembl_type.
".".$jobid.
"-".$i.
".err";
351 $sth->execute($root_dir, $map_file,
'SUBMITTED', $out_file, $err_file, $i, $jobid);
361 =head2 calculate_num_jobs
363 Args : Query file name
365 Description: Calculate the number of LSF jobs to submit based on the size of the query file.
366 Returntype : The number of jobs.
372 sub calculate_num_jobs {
376 my $bytes_per_job = 250000;
378 my $size = (stat $query)[7];
379 if( $size == 0 ){
return 0 }
380 return int($size/$bytes_per_job) || 1;
384 # Get class name from fully-qualified object name
385 # e.g. return ExonerateBasic from XrefMapper::Methods::ExonerateBasic=HASH(Ox12113c0)
391 my $module_name = ref($self);
393 my @bits = split(/::/, $module_name);
399 # Check if any .err files exist that have non-zero size;
400 # this indicates that something has gone wrong with the exonerate run
404 my ($self, $dir) = @_;
406 foreach my $err (glob(
"$dir/*.err")) {
408 print
"\n\n*** Warning: $err has non-zero size; may indicate problems with exonerate run\n\n\n" if (-s $err);
413 # Percentage identity that query (xref) must match to be considered.
415 sub query_identity_threshold {
422 # Percentage identity that target (ensembl) must match to be considered.
424 sub target_identity_threshold {