ensembl-hive  2.8.1
PBSExonerateGappedBest1.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefMapper::Methods::PBSExonerateGappedBest1;
21 
22 use strict;
23 #use File::Basename;
24 #use IPC::Open3;
25 use Sys::Hostname;
26 
28 
29 use vars '@ISA';
30 
32 
33 my $exonerate_path = "exonerate";
34 
35 sub options {
36 
37  return ('--model', 'affine:local', '--subopt', 'no', '--bestn', '1');
38 
39 }
40 
41 sub query_identity_threshold {
42 
43  return 90;
44 
45 }
46 
47 sub target_identity_threshold {
48 
49  return 90;
50 
51 }
52 
53 sub submit_exonerate{
54  my ($self, $query, $target, $root_dir, @options) = @_;
55 
56  # Exonerate can run individual chunks of a large job.
57  # Determine the number of chunks that will be used, and add to total
58  my $num_jobs
60  $self->jobcount($self->jobcount()+$num_jobs);
61 
62  $num_jobs || return; # Check we have jobs to run
63 
64  # Build a template representing the exonerate command
65  my $options_str = join( ' ', @options );
66  my $shell_command = <<"EOF";
67 #!/bin/sh
68 $exonerate_path \\
69 --target $target \\
70 --query $query \\
71 --querychunktotal $num_jobs \\
72 --querychunkid %d \\
73 --showvulgar FALSE \\
74 --showalignment FALSE \\
75 --ryo "xref:%%qi:%%ti:%%ei:%%ql:%%tl:%%qab:%%qae:%%tab:%%tae:%%C:%%s\\n" \\
76 $options_str \\
77 | grep '^xref'
78 exit
79 EOF
80 
81  #warn( sprintf $shell_command, 1 );
82 
83  # Determine the output filename
84  my $host = Sys::Hostname::hostname;
85  my $query_alphabet = $query =~ /peptide.fasta$/ ? 'peptide' : 'dna';
86  my $outfile_root = sprintf( "%s:%s/%s_%s",
87  $host,
88  $root_dir,
89  $self->get_class_name(),
90  $query_alphabet );
91 
92  # Run PBS qsub for each job chunk.
93  # First set a job that depends on the completion of all other jobs
94  my $jobid = $self->submit_qsub( q( echo 'sleep 1' ),
95  [ -W => "depend=on:$num_jobs" ] );
96 
97  # Run each job chunk
98  my $jobname = join '.', 'X', ( $host =~ /^(\w+)/ ), $$;
99  foreach( my $i=1; $i<=$num_jobs; $i++ ){
100 
101  my $outfile = sprintf( '%s_%s.map', $outfile_root, $i );
102  my $errfile = sprintf( '%s_%s.err', $outfile_root, $i );
103 
104  my $chunkid = $self->submit_qsub
105  ( sprintf( $shell_command, $i ),
106  [ '-N' => $jobname,
107  '-W' => "depend=beforeany:$jobid",
108  '-o' => $outfile,
109  '-e' => $errfile ] );
110  }
111 
112  # BasicMapper calls a method called 'submit_depend_job' that waits
113  # till all the lsf jobs have completed. We're not using LSF, so this
114  # approach does not work. Solution: wait here until all jobs are done.
115  # Not as efficient for a large cluster, but we may have enough work to
116  # do to saturate a small one. TODO: abstract 'submit_depend_job' to the
117  # Method class instances.
118 
119  $self->global_jobid( $jobid );
120  #$self->submit_depend_job( $jobid );
121 
122  return $jobid;
123 }
124 
125 #----------------------------------------------------------------------
126 # Get/set the 'global' job ID that groups all chunked jobs in the method
127 sub global_jobid{
128  my $key = '_global_jobid';
129  my $self = shift;
130  if( @_ ){ $self->{$key} = shift }
131  return $self->{$key};
132 }
133 
134 #----------------------------------------------------------------------
135 # Waits in-process untill the specified jobid has completed
136 sub submit_depend_job{
137  my $self = shift;
138 
139  $self->jobcount || return; # Check that we are running jobs
140  my $jobid = $self->global_jobid || die( "global_jobid unset!" );
141 
142  my $depend = join( ':', 'afterany', $jobid );
143 
144  my $id = $self->submit_qsub( q( echo 'sleep 1' ),
145  [ '-W' => 'block=true',
146  '-W' => "depend=$depend" ] );
147  return $jobid;
148 }
149 
150 #----------------------------------------------------------------------
151 # A wrapper for submitting PBS qsub jobs;
152 # First the READER (this process) opens a pipe (-|) on the WRITER.
153 # The WRITER then opens a pipe (|-) on the RUNNER.
154 # The RUNNER then execs qsub with command line options,
155 # the WRITER writes the script to the RUNNER,
156 # and any output is collected by the READER.
157 # Returns the qsub job ID.
158 sub submit_qsub{
159  my $self = shift;
160  my $script = shift || die( "Need a script to submit to qsub!" );
161  my @qsub_args = @{ shift || [] };
162 
163  local *QSUB;
164  local *QSUB_READER;
165 
166  my $jobid;
167 
168  my $writer_pid;
169  if( $writer_pid = open( QSUB_READER, '-|' ) ){
170  # READER - Reads stdout from RUNNER process
171  while( <QSUB_READER> ){
172  if( /^(\d+)/ ){
173  $jobid = $1;
174  print( "Job ID $1 submitted to qsub\n" );
175  }
176  }
177  close( QSUB_READER );
178  }
179  else{
180  unless( defined($writer_pid) ){ die("Could not fork : $!" ) }
181  my $runner_pid;
182  if( $runner_pid = open(QSUB, '|-') ){
183  # WRITER - Writes command to RUNNER process
184  print QSUB $script;
185  close QSUB;
186  unless( $? == 0 ){ die( "qsub failed; non-zero status" ) }
187  exit(0);
188  }
189  else{
190  # RUNNER - Runs the command; STDIN,STDOUT attached to WRITER,READER.
191  unless( defined($runner_pid) ){ die("Could not fork : $!" ) }
192  #warn join( " ", 'qsub', @qsub_args );
193  exec( 'qsub', @qsub_args );
194  die("Could not exec qsub : $!");
195  }
196  }
197  return $jobid;
198 }
199 
200 
201 1;
XrefMapper::Methods::ExonerateBasic
Definition: ExonerateBasic.pm:5
XrefMapper::Methods::ExonerateBasic::calculate_num_jobs
public The calculate_num_jobs()
run
public run()