my $ftype = shift;
# get a filehandle to write results for debugging
my $path = path_append($conf->param('basedir'), 'debug');
my $file = "$path/${ftype}_diff.txt";
open(my $fh, '>', $file) or die "Can't open $file for writing: $!\n";
# read scores from files
my $scores = {};
unless ($ftype eq 'translation') {
foreach my $path1 (qw(debug1 debug2)) {
my $p1 = $conf->param($path1);
my $file1 = "$p1/${ftype}_scores.txt";
open(my $fh1, '<', $file1) or die "Can't open $file1 for reading: $!\n";
while (my $line = <$fh1>) {
chomp $line;
my ($old_id, $new_id, $score) = split(/\s+/, $line);
$score = sprintf("%.6f", $score);
# remember the highest score for each new_id
if ($score > $scores->{$path1}->{$new_id}) {
$scores->{$path1}->{$new_id} = $score;
}
}
close($fh1);
}
}
#
# fetch all features from both runs and create lookup hash by stable_id
#
$logger->info("Fetching ${ftype} data from dbs...\n", 0, 'stamped');
# db 2
my $sql1 = qq(SELECT ${ftype}_id, stable_id FROM ${ftype}_stable_id);
my $sth1 = $dbh1->prepare($sql1);
$sth1->execute;
my %fsi1 = ();
my %fii1 = ();
while (my $r = $sth1->fetchrow_arrayref) {
# create lookup hashes of dbID to stable ID and vice versa
$fsi1{$r->[1]} = $r->[0];
$fii1{$r->[0]} = $r->[1];
}
$sth1->finish;
# db 2
my $suffix = $conf->param('suffix');
my $sql2 = qq(SELECT ${ftype}_id, stable_id FROM ${ftype}_stable_id${suffix});
my $sth2 = $dbh2->prepare($sql2);
$sth2->execute;
my %fsi2 = ();
my %fii2 = ();
while (my $r = $sth2->fetchrow_arrayref) {
# create lookup hashes of dbID to stable ID and vice versa
$fsi2{$r->[1]} = $r->[0];
$fii2{$r->[0]} = $r->[1];
}
$sth2->finish;
$logger->info("Done.\n\n", 0, 'stamped');
#
# get max(gene_stable_id) from source db
#
my $dbh = $dba_s->dbc->db_handle;
my $sql = qq(SELECT max(stable_id) FROM ${ftype}_stable_id);
my $sth = $dbh->prepare($sql);
$sth->execute;
my ($max_stable_id) = $sth->fetchrow_array;
$sth->finish;
#
# now loop over dbIDs in db 1 and compare results with db2
#
$logger->info("Comparing results...\n", 0, 'stamped');
my @stat_keys = qw(TOT NN II NE EN EE);
my %stats =
map { $_ => 0 } @stat_keys;
my $fmt = "%-3s%6d %-20s %-20s %-10s %-10s\n";
foreach my $dbID1 (sort { $a <=> $b } keys %fii1) {
$stats{TOT}++;
my $status;
my $sid1 = $fii1{$dbID1};
my $sid2 = $fii2{$dbID1};
# db 1 has new stable ID
if (($max_stable_id cmp $sid1) == -1) {
# db 2 has new stable ID too
if (($max_stable_id cmp $sid2) == -1) {
$status = 'NN';
# db 2 reuses an existing stable ID
} else {
$status = 'NE';
}
# else db 1 reused an existing stable ID
} else {
# db 2 uses the same stable ID
if ($sid1 eq $sid2) {
$status = 'II';
# db 2 has a new stable ID
} elsif (($max_stable_id cmp $sid2) == -1) {
$status = 'EN';
# db 2 reuses an existing (but different from db 1) stable ID
} else {
$status = 'EE';
}
}
# stats
$stats{$status}++;
# print result line (status dbID sid1 sid2)
print $fh sprintf($fmt, $status, $dbID1, $sid1, $sid2,
$scores->{'debug1'}->{$dbID1}, $scores->{'debug2'}->{$dbID1});
}
close($fh);
$logger->info("Done.\n\n", 0, 'stamped');
# print stats
$logger->info("Stats:\n");
foreach my $key (@stat_keys) {
$logger->info(sprintf(" %-5s%8d (%6s)\n", $key, $stats{$key}, sprintf("%3.1f%%", 100*$stats{$key}/$stats{'TOT'})));
}
}