ensembl-hive  2.8.1
compare_scores.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 
18 =head1 NAME
19 
20 
21 =head1 SYNOPSIS
22 
23 .pl [arguments]
24 
25 Required arguments:
26 
27  --basedir=PATH base directory of ID mapping results
28 
29 Optional arguments:
30 
31  --conffile, --conf=FILE read parameters from FILE
32  (default: conf/Conversion.ini)
33 
34  --logfile, --log=FILE log to FILE (default: *STDOUT)
35  --logpath=PATH write logfile to PATH (default: .)
36  --logappend, --log_append append to logfile (default: truncate)
37  --loglevel=LEVEL define log level (default: INFO)
38 
39  -i, --interactive=0|1 run script interactively (default: true)
40  -h, --help, -? print help (this message)
41 
42 =head1 DESCRIPTION
43 
44 
45 
46 =head1 AUTHOR
47 
48 Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
49 
50 =head1 CONTACT
51 
52 Please post comments/questions to the Ensembl development list
53 <http://lists.ensembl.org/mailman/listinfo/dev>
54 
55 =cut
56 
57 use strict;
58 use warnings;
59 no warnings 'uninitialized';
60 
61 use FindBin qw($Bin);
64 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
65 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
66 
67 # parse configuration and commandline arguments
68 my $conf = new Bio::EnsEMBL::Utils::ConfParser(
69  -SERVERROOT => "$Bin/../../../..",
70  -DEFAULT_CONF => "$Bin/default.conf"
71 );
72 
73 $conf->parse_options(
74  'path1|p1=s' => 1,
75  'path2|p2=s' => 1,
76  'type|t=s@' => 0,
77 );
78 
79 # set default logpath
80 unless ($conf->param('logpath')) {
81  $conf->param('logpath', path_append($conf->param('basedir'), 'log'));
82 }
83 
84 # get log filehandle and print heading and parameters to logfile
85 my $logger = new Bio::EnsEMBL::Utils::Logger(
86  -LOGFILE => $conf->param('logfile'),
87  -LOGAUTO => $conf->param('logauto'),
88  -LOGAUTOBASE => 'compare_scores',
89  -LOGAUTOID => $conf->param('logautoid'),
90  -LOGPATH => $conf->param('logpath'),
91  -LOGAPPEND => $conf->param('logappend'),
92  -LOGLEVEL => $conf->param('loglevel'),
93 );
94 
95 # initialise log
96 $logger->init_log($conf->list_param_values);
97 
98 my @types = $conf->param('type') || qw(exon transcript gene);
99 
100 foreach my $type (@types) {
101  &compare_scores($type);
102 }
103 
104 
105 # finish logfile
106 $logger->finish_log;
107 
108 
109 ### END main ###
110 
111 
112 sub compare_scores {
113  my $type = shift;
114 
115  # read scores from file
116  $logger->info("Reading $type scores...\n", 0, 'stamped');
117 
118  my $scores1 = &parse_file($conf->param('path1')."/${type}_scores.txt");
119  my $scores2 = &parse_file($conf->param('path2')."/${type}_scores.txt");
120 
121  $logger->info("Done.\n\n", 0, 'stamped');
122 
123  # look for pairs with scores in both result sets
124  my %stats;
125  my @both = ();
126  my @only1 = ();
127  my @only2 = ();
128 
129  foreach my $key (keys %$scores1) {
130  if ($scores2->{$key}) {
131  push @both, $key;
132  } else {
133  push @only1, $key;
134  }
135  }
136 
137  foreach my $key (keys %$scores2) {
138  unless ($scores1->{$key}) {
139  push @only2, $key;
140  }
141  }
142 
143  $stats{'TOT1'} = keys %$scores1;
144  $stats{'TOT2'} = keys %$scores2;
145  $stats{'BOTH'} = @both;
146  $stats{'ONLY1'} = @only1;
147  $stats{'ONLY2'} = @only2;
148 
149  $logger->info("Only in set 1 (first 10 shown):\n");
150  my $i;
151  foreach my $key (sort @only1) {
152  $logger->info(sprintf("%-20s%-10s\n", $key, $scores1->{$key}), 1);
153  last if ($i++ == 10);
154  }
155 
156  $logger->info("\nOnly in set 2 (first 10 shown):\n");
157  my $j;
158  foreach my $key (sort @only2) {
159  $logger->info(sprintf("%-20s%-10s\n", $key, $scores2->{$key}), 1);
160  last if ($j++ == 10);
161  }
162 
163  # compare scores which are present in both result sets
164  $logger->info("\nScores different (first 10 shown):\n");
165 
166  foreach my $key (@both) {
167 
168  my $s1 = $scores1->{$key};
169  my $s2 = $scores2->{$key};
170  my $diff = $s1 - $s2;
171  $diff = -$diff if ($diff < 0);
172 
173  unless ($diff < 0.000002) {
174  $stats{'BOTH_DIFF'}++;
175  if ($stats{'BOTH_DIFF'} <= 10) {
176  $logger->info(sprintf("%-20s%-10s%-10s\n", $key, $s1, $s2), 1);
177  }
178  }
179  }
180 
181  $logger->info("\nStats:\n");
182  foreach my $t (qw(TOT1 TOT2 BOTH ONLY1 ONLY2 BOTH_DIFF)) {
183  $logger->info(sprintf("%-10s%8d\n", $t, $stats{$t}), 1);
184  }
185 }
186 
187 
188 sub parse_file {
189  my $file = shift;
190 
191  open(my $fh, '<', $file) or
192  throw("Unable to open $file for reading: $!");
193 
194  my %scores = ();
195 
196  while (my $line = <$fh>) {
197  chomp $line;
198  my ($old_id, $new_id, $score) = split(/\s+/, $line);
199  $scores{"$old_id:$new_id"} = sprintf("%.6f", $score);
200  }
201 
202  close($fh);
203 
204  return \%scores;
205 }
206 
207 
transcript
public transcript()
compare_scores
public compare_scores()
Bio::EnsEMBL::Utils::ScriptUtils
Definition: ScriptUtils.pm:11
exon
public exon()
Bio::EnsEMBL::Utils::ConfParser
Definition: ConfParser.pm:41
parse_file
public parse_file()
Bio::EnsEMBL::Utils::Logger
Definition: Logger.pm:36
run
public run()
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68