ensembl-hive  2.7.0
dump_scores.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 
18 =head1 NAME
19 
20 dump_scores.pl - dump scores from serialised ScoredMappingMatrix'es for debugging
21 
22 =head1 SYNOPSIS
23 
24 dump_scores.pl [arguments]
25 
26 Required arguments:
27 
28  --basedir=PATH base directory of ID mapping results
29 
30 Optional arguments:
31 
32  --conffile, --conf=FILE read parameters from FILE
33  (default: conf/Conversion.ini)
34 
35  --logfile, --log=FILE log to FILE (default: *STDOUT)
36  --logpath=PATH write logfile to PATH (default: .)
37  --logappend, --log_append append to logfile (default: truncate)
38  --loglevel=LEVEL define log level (default: INFO)
39 
40  -i, --interactive=0|1 run script interactively (default: true)
41  -h, --help, -? print help (this message)
42 
43 =head1 DESCRIPTION
44 
45 This script reads gene, transcript and exon scores from serialised
46 ScoredMappingMatrix files and dumps the data (old_internal_id, new_internal_id,
47 score) to a text file for debugging.
48 
49 Note that if you ran the ID mapping with loglevel=DEBUG, these dummps are
50 generated automatically so you won't need to run this script.
51 
52 
53 =head1 AUTHOR
54 
55 Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
56 
57 =head1 CONTACT
58 
59 Please post comments/questions to the Ensembl development list
60 <http://lists.ensembl.org/mailman/listinfo/dev>
61 
62 =cut
63 
64 use strict;
65 use warnings;
66 no warnings 'uninitialized';
67 
68 use FindBin qw($Bin);
71 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
73 
74 # parse configuration and commandline arguments
75 my $conf = new Bio::EnsEMBL::Utils::ConfParser(
76  -SERVERROOT => "$Bin/../../../..",
77  -DEFAULT_CONF => "$Bin/default.conf"
78 );
79 
80 $conf->parse_options(
81  'basedir|basedir=s' => 1,
82 );
83 
84 # set default logpath
85 unless ($conf->param('logpath')) {
86  $conf->param('logpath', path_append($conf->param('basedir'), 'log'));
87 }
88 
89 # get log filehandle and print heading and parameters to logfile
90 my $logger = new Bio::EnsEMBL::Utils::Logger(
91  -LOGFILE => $conf->param('logfile'),
92  -LOGAUTO => $conf->param('logauto'),
93  -LOGAUTOBASE => 'dump_scores',
94  -LOGAUTOID => $conf->param('logautoid'),
95  -LOGPATH => $conf->param('logpath'),
96  -LOGAPPEND => $conf->param('logappend'),
97  -LOGLEVEL => $conf->param('loglevel'),
98 );
99 
100 # initialise log
101 $logger->init_log($conf->list_param_values);
102 
103 my $dump_path = path_append($conf->param('basedir'), 'matrix');
104 
105 # genes
106 my $gene_matrix = &read_matrix('gene');
107 &dump_scores('gene', $gene_matrix);
108 
109 # transcripts
110 my $transcript_matrix = &read_matrix('transcript');
111 &dump_scores('transcript', $transcript_matrix);
112 
113 # exons
114 my $exon_matrix = &read_matrix('exon_overlap');
115 my $exonerate_matrix = &read_matrix('exon_exonerate');
116 $exon_matrix->merge($exonerate_matrix);
117 &dump_scores('exon', $exon_matrix);
118 
119 
120 # finish logfile
121 $logger->finish_log;
122 
123 
124 ### END main ###
125 
126 
127 sub read_matrix {
128  my $type = shift;
129 
131  -DUMP_PATH => $dump_path,
132  -CACHE_FILE => "${type}_matrix.ser",
133  );
134 
135  my $cache = $matrix->cache_file;
136 
137  if (-s $cache) {
138 
139  # read from file
140  $logger->info("Reading $type scoring matrix from file...\n", 0, 'stamped');
141  $logger->debug("Cache file $cache.\n", 1);
142  $matrix->read_from_file;
143  $logger->info("Done.\n\n", 0, 'stamped');
144 
145  } else {
146  $logger->warning("No cache file found at $cache.\n");
147  }
148 
149  return $matrix;
150 }
151 
152 
153 sub dump_scores {
154  my $type = shift;
155  my $matrix = shift;
156 
157  $logger->info("Dumping $type scores to file...\n", 0, 'stamped');
158 
159  my $debug_path = path_append($conf->param('basedir'), 'debug');
160  my $logfile = "$debug_path/${type}_scores.txt";
161 
162  open(my $fh, '>', $logfile) or
163  throw("Unable to open $logfile for writing: $!");
164 
165  #my $i = 0;
166  foreach my $entry (@{ $matrix->get_all_Entries }) {
167  #$logger->info($entry->to_string."\n");
168  #last if (++$i == 10);
169  print $fh ($entry->to_string."\n");
170  }
171 
172  close($fh);
173 
174  $logger->info("Done.\n\n", 0, 'stamped');
175 }
176 
177 
Bio::EnsEMBL::Utils::ScriptUtils
Definition: ScriptUtils.pm:11
Bio::EnsEMBL::IdMapping::ScoredMappingMatrix
Definition: ScoredMappingMatrix.pm:44
read_matrix
public read_matrix()
Bio::EnsEMBL::IdMapping::Serialisable::cache_file
public String cache_file()
Bio::EnsEMBL::Utils::ConfParser
Definition: ConfParser.pm:41
Bio::EnsEMBL::IdMapping::ScoredMappingMatrix::new
public Bio::EnsEMBL::IdMapping::ScoredMappingMatrix new()
Bio::EnsEMBL::Utils::Logger
Definition: Logger.pm:36
run
public run()
dump_scores
public dump_scores()