2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
20 test_assembly_mapping.pl [arguments]
22 Produce statistics on the quality of feature mappings between two different assemblies.
23 Feature mappings need to be loaded first
using script load_feature_mappings.pl
24 Sample config file: test_assembly_mapping.ini.example
27 --host=HOST test database host HOST
28 --port=PORT test database port PORT
29 --user=USER test database username USER
30 --pass=PASS test database passwort PASS
31 --dbname=NAME test database name NAME
36 --conffile=filename read parameters from FILE
37 (
default: conf/Conversion.ini)
38 --logfile, --log=FILE log to FILE (
default: *STDOUT)
39 --logpath=PATH write logfile to PATH (
default: .)
40 --logappend, --log_append append to logfile (
default: truncate)
46 no warnings
'uninitialized';
55 use vars qw($SERVERROOT);
58 $SERVERROOT =
"$Bin/../";
67 $support->parse_common_options(@_);
69 $support->allowed_params(qw(dbname host port user pass conffile logfile logpath logappend));
71 if ($support->param(
'help') or $support->error) {
72 warn $support->error
if $support->error;
76 # ask user to confirm parameters to proceed
77 $support->confirm_params;
79 # get log filehandle and print heading and parameters to logfile
82 $support->check_required_params(qw(dbname host port user));
84 my ($dba, $dbh, $sql, $sth);
85 $dbh = $support->get_dbconnection();
88 my %mapping_quality = ( 1 =>
'with gaps',
93 my @chrs = @{$dbh->selectcol_arrayref(
"select distinct old_chr from mapping order by old_chr+0")};
95 foreach my $chr (@chrs) {
97 my @feature_types = @{$dbh->selectcol_arrayref(
"select distinct feature_type from mapping where old_chr = '". $chr .
"' order by feature_type")};
99 foreach my $feature_type (@feature_types) {
100 $support->log_stamped(
"processing $feature_type mappings for chromosome $chr\n\n",1);
103 #count all features in the old db
104 $sql =
"select count(1) from mapping where feature_type = '" . $feature_type .
"' and old_chr = '". $chr .
"'" ;
105 my ($total_features_old) = $dbh->selectrow_array($sql);
107 #count all features both in the old and new db
108 $sql =
"select count(1) from mapping where feature_type = '" . $feature_type .
"' and feature_found_in_new_db = 1 and old_chr = '". $chr .
"'";
109 my ($total_features_old_new) = $dbh->selectrow_array($sql);
111 my $perc_features = ($total_features_old_new/$total_features_old) * 100;
112 $perc_features = sprintf
"%.2f", $perc_features;
114 $support->log_stamped(
"$total_features_old_new $feature_type" .
"s found in both old and new assembly dbs ($perc_features\% in old assembly db found in new db)\n\n", 1);
116 #count features which have the same length in the new db
117 $sql =
"select count(1) from mapping where feature_type = '" . $feature_type .
"' and feature_found_in_new_db = 1 and old_length = new_length and old_chr = '". $chr .
"'";
119 my ($total_same_length) = $dbh->selectrow_array($sql);
121 my $ok_mappings_1 = 0;
122 my $perc_mappings_1 = 0;
124 if ($total_same_length > 0) {
125 #count mappings where start and end match feature location in the new db by mapping quality (1-contains gaps, 2 - no gaps
127 $support->log_stamped(
"$total_same_length $feature_type" .
"s where feature length in the old assembly db is the same as in the new assembly db\n\n", 2);
129 $sql =
"select count(1) as mapping_count,mapping_quality from mapping where feature_type = '" . $feature_type .
"' and old_chr = '". $chr .
"' and feature_found_in_new_db = 1 and old_length = new_length and new_start = mapping_start and new_end = mapping_end and (new_strand = mapping_strands or mapping_strands = 'both') and (mapping_chrs = new_chr or mapping_chrs like concat(new_chr,',%')) group by mapping_quality";
131 my %mapping_counts = %{$dbh->selectall_hashref($sql,
"mapping_quality")};
135 foreach my $mapping_q (keys %mapping_counts) {
136 my $perc = ($mapping_counts{$mapping_q}{
'mapping_count'}/$total_same_length) * 100;
137 $perc = sprintf
"%.2f", $perc;
139 $support->log_stamped(
"$mapping_counts{$mapping_q}{'mapping_count'} $feature_type mappings $mapping_quality{$mapping_q} where feature length in the old assembly db is the same as in the new assembly db and mapping start and end match feature location in the new db ($perc\%)\n\n", 3);
141 $ok_mappings += $mapping_counts{$mapping_q}{
'mapping_count'};
145 $ok_mappings_1 += $ok_mappings;
146 my $perc_mappings = ($ok_mappings/$total_same_length) * 100;
147 $perc_mappings = sprintf
"%.2f", $perc_mappings;
149 $support->log_stamped(
"total: $ok_mappings ok $feature_type mappings ($perc_mappings\%):\n\n", 3);
152 $support->log_stamped(
"no $feature_type" .
"s found for chromosome $chr where feature length in the old assembly db is the same as in the new assembly db\n\n", 2);
155 $sql =
"select count(1) from mapping where feature_type = '" . $feature_type .
"' and old_chr = '". $chr .
"' and feature_found_in_new_db = 1 and old_length != new_length";
157 my ($total_diff_length) = $dbh->selectrow_array($sql);
159 if ($total_diff_length > 0) {
161 $support->log_stamped(
"$total_diff_length $feature_type" .
"s found for chromosome $chr where feature length in the old assembly db is different than in the new assembly db\n\n", 2);
163 #count features with different length in the new db where either mapping start or end match new feature start or end respectively, by mapping quality
165 $sql =
"select count(1) as mapping_count,mapping_quality from mapping where feature_type = '" . $feature_type .
"' and old_chr = '". $chr .
"' and feature_found_in_new_db = 1 and old_length != new_length and (new_start = mapping_start or new_end = mapping_end) and (new_strand = mapping_strands or mapping_strands = 'both') and (mapping_chrs = new_chr or mapping_chrs like concat(new_chr,',%')) group by mapping_quality";
167 my %mapping_counts = %{$dbh->selectall_hashref($sql,
"mapping_quality")};
171 foreach my $mapping_q (keys %mapping_counts) {
172 my $perc = ($mapping_counts{$mapping_q}{
'mapping_count'}/$total_diff_length) * 100;
173 $perc = sprintf
"%.2f", $perc;
175 $support->log_stamped(
"$mapping_counts{$mapping_q}{'mapping_count'} $feature_type mappings $mapping_quality{$mapping_q} found for chromosome where feature length in the old assembly db is different than in the new assembly db and either mapping start or end matches the gene location in the new db ($perc\%)\n\n", 3);
177 $ok_mappings += $mapping_counts{$mapping_q}{
'mapping_count'};
181 #count features with different length in the new db where either mapping start or end is within the difference between old and new length from the new feature start or end respectively, by mapping quality
182 $sql =
"select count(1) as mapping_count,mapping_quality from mapping where feature_type = '" . $feature_type .
"' and old_chr = '". $chr .
"' and feature_found_in_new_db = 1 and old_length != new_length and (abs(new_start - mapping_start) <= abs(new_length - old_length) or abs(new_end - mapping_end) <= abs(new_length - old_length)) and new_start != mapping_start and new_end != mapping_end and (new_strand = mapping_strands or mapping_strands = 'both') and (mapping_chrs = new_chr or mapping_chrs like concat(new_chr,',%')) group by mapping_quality";
184 %mapping_counts = %{$dbh->selectall_hashref($sql,
"mapping_quality")};
187 foreach my $mapping_q (keys %mapping_counts) {
188 my $perc = ($mapping_counts{$mapping_q}{
'mapping_count'}/$total_diff_length) * 100;
189 $perc = sprintf
"%.2f", $perc;
191 $support->log_stamped(
"$mapping_counts{$mapping_q}{'mapping_count'} $feature_type mappings $mapping_quality{$mapping_q} found for chromosome where feature length in the old assembly db is different than in the new assembly db and either mapping start or end is within the difference between old and new feature length from the new feature start or end, respectively ($perc\%)\n\n", 3);
192 $ok_mappings += $mapping_counts{$mapping_q}{
'mapping_count'};
196 my $perc_mappings = ($ok_mappings/$total_diff_length) * 100;
197 $perc_mappings = sprintf
"%.2f", $perc_mappings;
198 $support->log_stamped(
"total: $ok_mappings ok $feature_type mappings ($perc_mappings\%):\n\n", 3);
200 $ok_mappings_1 += $ok_mappings;
203 $support->log_stamped(
"no $feature_type" .
"s found for chromosome $chr where feature length in the old assembly db is different than in the new assembly db\n\n", 2);
206 $perc_mappings_1 = ($ok_mappings_1/$total_features_old_new) * 100;
207 $perc_mappings_1 = sprintf
"%.2f", $perc_mappings_1;
208 $support->log_stamped(
"total: $ok_mappings_1 ok $feature_type mappings ($perc_mappings_1\%):\n\n", 1);
216 $support->finish_log;