ensembl-hive  2.7.0
ExonerateBasic.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 # Base class that all other mapping methods inherit from
21 
22 package XrefMapper::Methods::ExonerateBasic;
23 
24 use strict;
25 use warnings;
26 
27 use File::Basename;
28 use IPC::Open3;
29 
30 # Path to exonerate executable
31 my $exonerate_path = `which exonerate`; $exonerate_path =~ s/\n//;
32 
33 sub new {
34 
35  my($class, $mapper) = @_;
36 
37  my $self ={};
38  bless $self,$class;
39  $self->mapper($mapper);
40  $self->jobcount(0);
41 
42  return $self;
43 
44 }
45 
46 sub mapper{
47  my ($self, $arg) = @_;
48 
49  (defined $arg) &&
50  ($self->{_mapper} = $arg );
51  return $self->{_mapper};
52 }
53 
54 
55 =head2 jobcount
56 
57  Arg [1] : (optional)
58  Example : $mapper->jobcount(1004);
59  Description: Getter / Setter for number of jobs submitted.
60  Returntype : scalar
61  Exceptions : none
62 
63 =cut
64 
65 sub jobcount {
66  my ($self, $arg) = @_;
67 
68  (defined $arg) &&
69  ($self->{_jobcount} = $arg );
70  return $self->{_jobcount};
71 }
72 
73 
74 =head2 run
75 
76  Arg[1] : Query filename to pass to exonerate; this should be the XREF file.
77  Arg[2] : Target filename to pass to exonerate; this should be the ENSEMBL file.
78  Example : none
79  Description: Run exonerate with default options.
80  Returntype : none
81  Exceptions : none
82  Caller : general
83 
84 =cut
85 
86 sub run {
87 
88  my ($self, $query, $target, $mapper) = @_;
89 
90  my $name = $self->submit_exonerate($query, $target, $mapper, $self->options());
91 
92  return $name;
93 
94 }
95 
96 =head2 options
97 
98  Args : none
99  Example : none
100  Description: Options to pass to exonerate. Override as appropriate.
101  Returntype : List of strings.
102  Exceptions : none
103  Caller : general
104 
105 =cut
106 
107 sub options {
108 
109  return ();
110 
111 }
112 
113 
114 =head2 resubmit_exonerate
115 
116 =cut
117 
118 sub resubmit_exonerate {
119  my ($self, $mapper, $command, $outfile, $errfile, $job_id, $array_number, $root_dir) = @_;
120 
121 
122 
123  my ($junk,$query, $target, @rest) = split(/\s+/,$command);
124 
125  my $disk_space_needed = (stat($query))[7]+(stat($target))[7];
126 
127  $disk_space_needed /= 1024000; # convert to MB
128  $disk_space_needed = int($disk_space_needed);
129  $disk_space_needed += 1;
130 
131  my $unique_name = $self->get_class_name() . "_" . time();
132 
133  my $exe_file = $root_dir."/resub_".$job_id."_".$array_number;
134  open(my $rh, ">", $exe_file) || die "Could not open file $exe_file";
135 
136  my $lsf_profile = '/usr/local/lsf/conf/profile.lsf';
137  if (-e $lsf_profile) {
138  print $rh ". $lsf_profile\n";
139  }
140  print $rh $command."\n";
141 
142  close $rh;
143 
144  chmod 0755, $exe_file;
145 
146  my $queue = $self->mapper->farm_queue || 'production-rh74';
147 
148  my $usage = '-M 1500 -R"select[mem>1500] rusage[tmp='.$disk_space_needed.', mem=1500]" -J "'.$unique_name.'"';
149  $queue and $usage .= ' -q '. $queue;
150 
151  my $com = "bsub $usage -o $root_dir/$outfile -e $root_dir/$errfile ".$exe_file;
152 
153  if($mapper->nofarm){
154  print "Running job locally for job number $job_id [$array_number]\n" if($mapper->verbose);
155  my $line = `$exe_file`;
156  my $sth = $mapper->xref->dbc->prepare('update mapping_jobs set status = "SUBMITTED"'." where job_id = $job_id and array_number = $array_number");
157  $sth->execute();
158  $sth->finish;
159  return 1;
160  }
161 
162 
163  my $line = `$com`;
164 
165  my $jobid = 0;
166  if ($line =~ /^Job <(\d+)> is submitted/) {
167  $jobid = $1;
168  print "LSF job ID for main mapping job: $jobid (job array with 1 job ($array_number))\n" if($mapper->verbose);
169  }
170 
171 
172  if (!$jobid) {
173  # Something went wrong
174  warn("Job submission failed:\n$@\n");
175  print STDERR "bsub command was $com\n";
176  print STDERR $line."\n";
177  return 0;
178  }
179  else{
180  # write details of job to database
181  my $sth = $mapper->xref->dbc->prepare('update mapping_jobs set status = "SUBMITTED"'." where job_id = $job_id and array_number = $array_number");
182  $sth->execute();
183  $sth->finish;
184  }
185 
186  return $jobid;
187 
188 }
189 
190 =head2 submit_exonerate
191 
192  Args : none
193  Example : none
194  Description: Submit a *single* mapping job array to exonerate.
195  Returntype : The name of the job array submitted to LSF. Can be used in depend job.
196  Exceptions : none
197  Caller : general
198 
199 =cut
200 
201 sub submit_exonerate {
202 
203  my ($self, $query, $target, $mapper, @options) = @_;
204 
205 
206  my $root_dir = $mapper->core->dir;
207 
208  my $queryfile = basename($query);
209  my $targetfile = basename($target);
210 
211  my $prefix = $root_dir . "/" . basename($query);
212  $prefix =~ s/\.\w+$//;
213 
214  my ($ensembl_type) = $prefix =~ /.*_(dna|peptide)$/; # dna or prot
215  my $options_str = join(" ", @options);
216 
217  my $unique_name = $self->get_class_name() . "_" . time();
218 
219  my $disk_space_needed = (stat($query))[7]+(stat($target))[7];
220 
221  $disk_space_needed /= 1024000; # convert to MB
222  $disk_space_needed = int($disk_space_needed);
223  $disk_space_needed += 1;
224 
225  my $num_jobs = calculate_num_jobs($query);
226 
227 
228  my $exe = $self->mapper->exonerate || $exonerate_path;
229 
230 
231  if(defined($mapper->nofarm)){
232  my $output = $self->get_class_name() . "_" . $ensembl_type. "_1.map";
233  my $cmd = <<EON;
234 $exe $query $target --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" $options_str | grep '^xref' > $root_dir/$output
235 EON
236  print "none farm command is $cmd\n" if($mapper->verbose);
237 
238 
239  # write details of job to database
240 
241  my $sth = $mapper->xref->dbc->prepare("insert into process_status (status, date) values('mapping_submitted',now())");
242  $sth->execute();
243  $sth->finish;
244 
245  my $jobid = 1;
246  if($ensembl_type eq "peptide"){
247  $jobid = 2;
248  }
249 
250  for( my $i=1; $i<=1; $i++){
251  my $command = "$exe $query $target --showvulgar false --showalignment FALSE --ryo ".
252  '"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\\\n"'." $options_str | grep ".'"'."^xref".'"'." > $root_dir/$output";
253  my $insert = "insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
254  $self->query_identity_threshold.", ".$self->target_identity_threshold.", '".$self->get_class_name()."', $i)";
255 
256  $sth = $mapper->xref->dbc->prepare($insert);
257  $sth->execute;
258  $sth->finish;
259 
260  $sth = $mapper->xref->dbc->prepare("insert into mapping_jobs (root_dir, map_file, status, out_file, err_file, array_number, job_id) values (?,?,?,?,?,?,?)");
261 
262  my $map_file = $self->get_class_name()."_".$ensembl_type."_".$i.".map";
263  my $out_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".out";
264  my $err_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".err";
265  $sth->execute($root_dir, $map_file, 'SUBMITTED', $out_file, $err_file, $i, $jobid);
266  }
267  $sth->finish;
268 
269 
270  system($cmd);
271  $self->jobcount(1);
272  return "nofarm";
273  }
274 
275 
276  # array features barf if just one job
277  if($num_jobs == 1){
278  $num_jobs++;
279  }
280 
281  $self->jobcount($self->jobcount()+$num_jobs);
282 
283 
284 
285  my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map";
286 
287  my $queue = $self->mapper->farm_queue || 'production-rh74';
288 
289 
290  my $usage = '-M 1500 -R"select[mem>1500] rusage[tmp='.$disk_space_needed.', mem=1500]" '.'-J "'.$unique_name.'[1-'.$num_jobs.']%200" -o '.$prefix.'.%J-%I.out -e '.$prefix.'.%J-%I.err';
291  $queue and $usage = "-q $queue " . $usage;
292 
293 
294  my $command = $exe." ".$query." ".$target.' --querychunkid $LSB_JOBINDEX --querychunktotal '.$num_jobs.' --showvulgar false --showalignment FALSE --ryo "xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n" '.$options_str;
295  $command .= " | grep '^xref' > $root_dir/$output";
296 
297  my $exe_file = $root_dir."/".$unique_name.".submit";
298  open(my $rh,">", $exe_file) || die "Could not open file $exe_file";
299 
300  my $lsf_conf = "/usr/local/lsf/conf/profile.lsf";
301  if (-e $lsf_conf) {
302  print $rh ". $lsf_conf\n";
303  }
304  print $rh $command."\n";
305 
306  close $rh;
307 
308  chmod 0755, $exe_file;
309 
310  my $com = "bsub $usage $exe_file";
311 
312  my $line = `$com`;
313 
314  my $jobid = 0;
315  if ($line =~ /^Job <(\d+)> is submitted/) {
316  $jobid = $1;
317  print "LSF job ID for main mapping job: $jobid, name $unique_name with $num_jobs arrays elements)\n" if($mapper->verbose);
318  }
319 
320 
321  if (!$jobid) {
322  # Something went wrong
323  warn("Job submission failed:\n$@\n");
324  print STDERR "bsub command was $com\n";
325  print STDERR $line."\n";
326  return 0;
327  }
328  else{
329  # write details of job to database
330  my $command = "$exe $query $target --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo ".
331  '"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\\\n"'." $options_str | grep ".'"'."^xref".'"'." > $root_dir/$output";
332 
333  my $sth = $mapper->xref->dbc->prepare("insert into process_status (status, date) values('mapping_submitted',now())");
334  $sth->execute();
335  $sth->finish;
336 
337  my $insert = "insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
338  $self->query_identity_threshold.", ".$self->target_identity_threshold.", '".$self->get_class_name()."', $num_jobs)";
339 
340 
341  $sth = $mapper->xref->dbc->prepare($insert);
342  $sth->execute;
343  $sth->finish;
344 
345  $sth = $mapper->xref->dbc->prepare("insert into mapping_jobs (root_dir, map_file, status, out_file, err_file, array_number, job_id) values (?,?,?,?,?,?,?)");
346 
347  for( my $i=1; $i<=$num_jobs; $i++){
348  my $map_file = $self->get_class_name()."_".$ensembl_type."_".$i.".map";
349  my $out_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".out";
350  my $err_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".err";
351  $sth->execute($root_dir, $map_file, 'SUBMITTED', $out_file, $err_file, $i, $jobid);
352  }
353  $sth->finish;
354 
355  }
356 
357  return $unique_name;
358 
359 }
360 
361 =head2 calculate_num_jobs
362 
363  Args : Query file name
364  Example : none
365  Description: Calculate the number of LSF jobs to submit based on the size of the query file.
366  Returntype : The number of jobs.
367  Exceptions : none
368  Caller : general
369 
370 =cut
371 
372 sub calculate_num_jobs {
373 
374  my $query = shift;
375 
376  my $bytes_per_job = 250000;
377 
378  my $size = (stat $query)[7];
379  if( $size == 0 ){ return 0 }
380  return int($size/$bytes_per_job) || 1;
381 
382 }
383 
384 # Get class name from fully-qualified object name
385 # e.g. return ExonerateBasic from XrefMapper::Methods::ExonerateBasic=HASH(Ox12113c0)
386 
387 sub get_class_name {
388 
389  my $self = shift;
390 
391  my $module_name = ref($self);
392 
393  my @bits = split(/::/, $module_name);
394 
395  return $bits[-1];
396 
397 }
398 
399 # Check if any .err files exist that have non-zero size;
400 # this indicates that something has gone wrong with the exonerate run
401 
402 sub check_err {
403 
404  my ($self, $dir) = @_;
405 
406  foreach my $err (glob("$dir/*.err")) {
407 
408  print "\n\n*** Warning: $err has non-zero size; may indicate problems with exonerate run\n\n\n" if (-s $err);
409 
410  }
411 }
412 
413 # Percentage identity that query (xref) must match to be considered.
414 
415 sub query_identity_threshold {
416 
417  return 0;
418 
419 }
420 
421 
422 # Percentage identity that target (ensembl) must match to be considered.
423 
424 sub target_identity_threshold {
425 
426  return 0;
427 
428 }
429 
430 1;
431 
run
public run()