2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
24 run_all.pl [arguments]
28 --dbname, db_name=NAME database name NAME
29 --host, --dbhost, --db_host=HOST database host HOST
30 --port, --dbport, --db_port=PORT database port PORT
31 --user, --dbuser, --db_user=USER database username USER
32 --pass, --dbpass, --db_pass=PASS database passwort PASS
36 --conffile, --conf=FILE read parameters from FILE
37 (
default: conf/Conversion.ini)
39 --logfile, --log=FILE log to FILE (
default: *STDOUT)
40 --logpath=PATH write logfile to PATH (
default: .)
41 --logappend, --log_append append to logfile (
default: truncate)
42 --loglevel=LEVEL define log level (
default: INFO)
44 -i, --interactive=0|1
run script interactively (
default:
true)
45 -n, --dry_run, --dry=0|1 don
't write results to database
46 -h, --help, -? print help (this message)
54 Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
58 Please post comments/questions to the Ensembl development list
59 <http://lists.ensembl.org/mailman/listinfo/dev>
65 no warnings 'uninitialized
';
68 use Bio::EnsEMBL::Utils::ConfParser;
69 use Bio::EnsEMBL::Utils::Logger;
70 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
71 use Bio::EnsEMBL::DBSQL::DBAdaptor;
73 # parse configuration and commandline arguments
74 my $conf = new Bio::EnsEMBL::Utils::ConfParser(
75 -SERVERROOT => "$Bin/../../../..",
76 -DEFAULT_CONF => "$Bin/../default.conf"
80 'sourcehost|source_host=s
' => 1,
81 'sourceport|source_port=n
' => 1,
82 'sourceuser|source_user=s
' => 1,
83 'sourcepass|source_pass=s
' => 0,
84 'sourcedbname|source_dbname=s
' => 1,
85 'targethost|target_host=s
' => 1,
86 'targetport|target_port=n
' => 1,
87 'targetuser|target_user=s
' => 1,
88 'targetpass|target_pass=s
' => 0,
89 'targetdbname|target_dbname=s
' => 1,
90 'althost|alt_host=s
' => 0,
91 'altport|alt_port=n
' => 0,
92 'altuser|alt_user=s
' => 0,
93 'altpass|alt_pass=s
' => 0,
94 'altdbname|alt_dbname=s
' => 1,
95 'basedir|basedir=s
' => 1,
97 'lsf_opt|lsfopt=s
' => 0,
104 # set default logpath
105 unless ($conf->param('logpath
')) {
106 $conf->param('logpath
', path_append($conf->param('basedir
'), 'log
'));
109 # assume both dbs are on same host unless specified otherwise
110 foreach my $p (qw(host port user pass)) {
111 unless ($conf->param("alt$p")) {
112 $conf->param("alt$p", $conf->param("target$p"));
116 # get log filehandle and print header and parameters to logfile
117 my $logger = new Bio::EnsEMBL::Utils::Logger(
118 -LOGFILE => $conf->param('logfile
'),
119 -LOGAUTO => $conf->param('logauto
'),
120 -LOGAUTOBASE => 'compare_results
',
121 -LOGPATH => $conf->param('logpath
'),
122 -LOGAPPEND => $conf->param('logappend
'),
123 -LOGLEVEL => $conf->param('loglevel
'),
126 # if user wants to run via lsf, submit script with bsub (this will exit this
127 # instance of the script)
128 &bsubmit if ($conf->param('lsf
'));
131 $logger->init_log($conf->list_param_values);
134 my $dba_s = new Bio::EnsEMBL::DBSQL::DBAdaptor(
135 -host => $conf->param("sourcehost"),
136 -port => $conf->param("sourceport"),
137 -user => $conf->param("sourceuser"),
138 -pass => $conf->param("sourcepass"),
139 -dbname => $conf->param("sourcedbname"),
142 $dba_s->dnadb($dba_s);
144 my $dba1 = new Bio::EnsEMBL::DBSQL::DBAdaptor(
145 -host => $conf->param("targethost"),
146 -port => $conf->param("targetport"),
147 -user => $conf->param("targetuser"),
148 -pass => $conf->param("targetpass"),
149 -dbname => $conf->param("targetdbname"),
153 my $dbh1 = $dba1->dbc->db_handle;
155 my $dba2 = new Bio::EnsEMBL::DBSQL::DBAdaptor(
156 -host => $conf->param("althost"),
157 -port => $conf->param("altport"),
158 -user => $conf->param("altuser"),
159 -pass => $conf->param("altpass"),
160 -dbname => $conf->param("altdbname"),
164 my $dbh2 = $dba2->dbc->db_handle;
166 # compare mapping results
167 my $type = $conf->param('type
') || 'gene
';
168 my $function = "compare_${type}s";
179 $logger->info("Comparing genes...\n\n", 0, 'stamped
');
180 &compare_features('gene
');
181 $logger->info("Done\n\n");
185 sub compare_transcripts {
186 $logger->info("Comparing transcripts...\n\n", 0, 'stamped
');
188 $logger->info("Done\n\n");
192 sub compare_translations {
193 $logger->info("Comparing translations...\n\n", 0, 'stamped
');
194 &compare_features('translation
');
195 $logger->info("Done\n\n");
200 $logger->info("Comparing exons...\n\n", 0, 'stamped
');
201 &compare_features('exon');
202 $logger->info("Done\n\n");
206 sub compare_features {
209 # get a filehandle to write results for debugging
210 my $path = path_append($conf->param('basedir
'), 'debug');
211 my $file = "$path/${ftype}_diff.txt";
212 open(my $fh, '>
', $file) or die "Can't open $file
for writing: $!\n
";
214 # read scores from files
216 unless ($ftype eq 'translation') {
217 foreach my $path1 (qw(debug1 debug2)) {
218 my $p1 = $conf->param($path1);
219 my $file1 = "$p1/${ftype}_scores.txt
";
220 open(my $fh1, '<', $file1) or die "Can
't open $file1 for reading: $!\n";
222 while (my $line = <$fh1>) {
224 my ($old_id, $new_id, $score) = split(/\s+/, $line);
225 $score = sprintf("%.6f", $score);
227 # remember the highest score for each new_id
228 if ($score > $scores->{$path1}->{$new_id}) {
229 $scores->{$path1}->{$new_id} = $score;
238 # fetch all features from both runs and create lookup hash by stable_id
240 $logger->info("Fetching ${ftype} data from dbs...\n", 0, 'stamped
');
243 my $sql1 = qq(SELECT ${ftype}_id, stable_id FROM ${ftype}_stable_id);
244 my $sth1 = $dbh1->prepare($sql1);
250 while (my $r = $sth1->fetchrow_arrayref) {
251 # create lookup hashes of dbID to stable ID and vice versa
252 $fsi1{$r->[1]} = $r->[0];
253 $fii1{$r->[0]} = $r->[1];
259 my $suffix = $conf->param('suffix
');
260 my $sql2 = qq(SELECT ${ftype}_id, stable_id FROM ${ftype}_stable_id${suffix});
261 my $sth2 = $dbh2->prepare($sql2);
267 while (my $r = $sth2->fetchrow_arrayref) {
268 # create lookup hashes of dbID to stable ID and vice versa
269 $fsi2{$r->[1]} = $r->[0];
270 $fii2{$r->[0]} = $r->[1];
275 $logger->info("Done.\n\n", 0, 'stamped
');
278 # get max(gene_stable_id) from source db
280 my $dbh = $dba_s->dbc->db_handle;
281 my $sql = qq(SELECT max(stable_id) FROM ${ftype}_stable_id);
282 my $sth = $dbh->prepare($sql);
284 my ($max_stable_id) = $sth->fetchrow_array;
288 # now loop over dbIDs in db 1 and compare results with db2
290 $logger->info("Comparing results...\n", 0, 'stamped
');
292 my @stat_keys = qw(TOT NN II NE EN EE);
293 my %stats = map { $_ => 0 } @stat_keys;
294 my $fmt = "%-3s%6d %-20s %-20s %-10s %-10s\n";
296 foreach my $dbID1 (sort { $a <=> $b } keys %fii1) {
300 my $sid1 = $fii1{$dbID1};
301 my $sid2 = $fii2{$dbID1};
303 # db 1 has new stable ID
304 if (($max_stable_id cmp $sid1) == -1) {
306 # db 2 has new stable ID too
307 if (($max_stable_id cmp $sid2) == -1) {
310 # db 2 reuses an existing stable ID
315 # else db 1 reused an existing stable ID
318 # db 2 uses the same stable ID
319 if ($sid1 eq $sid2) {
322 # db 2 has a new stable ID
323 } elsif (($max_stable_id cmp $sid2) == -1) {
326 # db 2 reuses an existing (but different from db 1) stable ID
335 # print result line (status dbID sid1 sid2)
336 print $fh sprintf($fmt, $status, $dbID1, $sid1, $sid2,
337 $scores->{'debug1
'}->{$dbID1}, $scores->{'debug2
'}->{$dbID1});
342 $logger->info("Done.\n\n", 0, 'stamped
');
345 $logger->info("Stats:\n");
346 foreach my $key (@stat_keys) {
347 $logger->info(sprintf(" %-5s%8d (%6s)\n", $key, $stats{$key}, sprintf("%3.1f%%", 100*$stats{$key}/$stats{'TOT
'})));
355 # build bsub commandline
358 # automatically create a filename for lsf output
359 my $cmd = 'bsub -o
'.$conf->param('logpath
');
360 $cmd .= "/lsf_compare_".$logger->log_auto_id.'.out
';
362 # add extra lsf options as configured by the user
363 $cmd .= ' '.$conf->param('lsf_opt
');
368 # options for this script
369 my $options = $conf->create_commandline_options(
370 logautoid => $logger->log_auto_id,
379 print
"\nRe-executing via lsf:\n";
382 exec($cmd) or die
"Could not exec $0 via lsf: $!\n";