2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
20 find_overlaps.pl - find assembly entries with overlapping regions
24 find_overlaps.pl [arguments]
28 --dbname, db_name=NAME database name NAME
29 --host, --dbhost, --db_host=HOST database host HOST
30 --port, --dbport, --db_port=PORT database port PORT
31 --user, --dbuser, --db_user=USER database username USER
32 --pass, --dbpass, --db_pass=PASS database passwort PASS
36 --conffile, --conf=FILE read parameters from FILE
37 (
default: conf/Conversion.ini)
39 --logfile, --log=FILE log to FILE (
default: *STDOUT)
40 --logpath=PATH write logfile to PATH (
default: .)
41 --logappend, --log_append append to logfile (
default: truncate)
43 -v, --verbose=0|1 verbose logging (
default:
false)
44 -i, --interactive=0|1
run script interactively (
default:
true)
45 -n, --dry_run, --dry=0|1 don
't write results to database
46 -h, --help, -? print help (this message)
50 This script looks for overlapping assembly and component seq_regions in the assembly table of
51 an Ensembl core-style database. It prints the number of overlapping entries per
52 coordinate system and optionally (when run with the --verbose option) a list of
53 the overlapping assembly entries.
55 The script is intended to detect and debug problems in the assembly. It
56 complements the AssemblyMultipleOverlap healthcheck.
61 Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
65 Please post comments/questions to the Ensembl development list
66 <http://lists.ensembl.org/mailman/listinfo/dev>
72 no warnings 'uninitialized
';
77 use Bio::EnsEMBL::Utils::ConversionSupport;
81 my $support = new Bio::EnsEMBL::Utils::ConversionSupport("$Bin/../../..");
84 $support->parse_common_options(@_);
85 $support->parse_extra_options(
87 $support->allowed_params(
88 $support->get_common_params,
91 if ($support->param('help
') or $support->error) {
92 warn $support->error if $support->error;
96 # ask user to confirm parameters to proceed
97 $support->confirm_params;
99 # get log filehandle and print heading and parameters to logfile
102 # database connection
103 my $dba = $support->get_database('ensembl
');
104 my $dbh = $dba->dbc->db_handle;
107 SELECT cs1.name AS asm_name, cs1.version AS asm_version,
108 cs2.name AS cmp_name, cs2.version AS cmp_version
109 FROM assembly a, seq_region sr1, seq_region sr2,
110 coord_system cs1, coord_system cs2
111 WHERE a.asm_seq_region_id = sr1.seq_region_id
112 AND sr1.coord_system_id = cs1.coord_system_id
113 AND a.cmp_seq_region_id = sr2.seq_region_id
114 AND sr2.coord_system_id = cs2.coord_system_id
115 GROUP BY asm_name, asm_version, cmp_name, cmp_version
119 my $fmt1 = "%6s %6s %10s %10s %10s %10s %3s %3s %3s\n";
122 foreach my $type ( qw(asm cmp)){
123 my $sth = $dbh->prepare($sql);
125 while (my ($asm_name, $asm_version, $cmp_name, $cmp_version) =
126 $sth->fetchrow_array) {
129 SELECT a.*, sr1.name as asm_sr_name, sr2.name as cmp_sr_name
130 FROM assembly a, seq_region sr1, seq_region sr2,
131 coord_system cs1, coord_system cs2
132 WHERE a.asm_seq_region_id = sr1.seq_region_id
133 AND a.cmp_seq_region_id = sr2.seq_region_id
134 AND sr1.coord_system_id = cs1.coord_system_id
135 AND sr2.coord_system_id = cs2.coord_system_id
136 AND cs1.name = '$asm_name
'
137 AND cs2.name = '$cmp_name
'
141 $sql1 .= " AND cs1.version = '$asm_version
'";
143 $sql1 .= " AND cs1.version IS NULL";
144 $asm_version = 'NULL
';
148 $sql1 .= " AND cs2.version = '$cmp_version
'";
150 $sql1 .= " AND cs2.version IS NULL";
151 $cmp_version = 'NULL
';
155 $sql1 .= " ORDER BY a.".$type."_seq_region_id, a.".$type."_start ASC, a.".$type."_end";
157 $support->log_stamped("$asm_name.$asm_version $cmp_name.$cmp_version\n");
159 my $sth1 = $dbh->prepare($sql1);
162 # do an initial fetch
163 my $last = $sth1->fetchrow_hashref;
164 # foreach my $key (qw(asm_seq_region_id asm_sr_name cmp_seq_region_id cmp_sr_name asm_start asm_end cmp_start cmp_end ori)){
165 # print $key." = ".$last->{$key}."\t";
170 $support->log_verbose($type . ' overlaps:\n
');
171 while ($last and (my $r = $sth1->fetchrow_hashref)) {
173 # foreach my $key (qw(asm_seq_region_id asm_sr_name cmp_seq_region_id cmp_sr_name asm_start asm_end cmp_start cmp_end ori)){
174 # print $key." = ".$r->{$key}."\t";
178 # look for overlaps with last segment
179 if ($last->{$type.'_seq_region_id
'} == $r->{$type.'_seq_region_id
'} and
180 $last->{$type.'_end
'} >= $r->{$type.'_start
'}) {
185 $support->log_verbose('last:
'.sprintf($fmt1, map { $last->{$_} }
186 qw(asm_seq_region_id asm_sr_name cmp_seq_region_id cmp_sr_name
187 asm_start asm_end cmp_start cmp_end ori)), 1);
188 $support->log_verbose('this:
'.sprintf($fmt1, map { $r->{$_} }
189 qw(asm_seq_region_id asm_sr_name cmp_seq_region_id cmp_sr_name
190 asm_start asm_end cmp_start cmp_end ori))."\n", 1);
199 $support->log("\nFound $i overlaps.\n\n", 1);
205 $support->finish_log;