2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
20 fix_asm_overlaps.pl - remove overlapping mappings between assemblies from the assembly coordinates (asm_start -asm_end)
25 fix_overlaps.pl [arguments]
29 --dbname, db_name=NAME database name NAME
30 --host, --dbhost, --db_host=HOST database host HOST
31 --port, --dbport, --db_port=PORT database port PORT
32 --user, --dbuser, --db_user=USER database username USER
33 --pass, --dbpass, --db_pass=PASS database passwort PASS
34 --assembly=ASSEMBLY assembly version ASSEMBLY
36 --altassembly=ASSEMBLY alternative assembly version ASSEMBLY
37 --chromosomes, --chr=LIST only process LIST seq_regions
41 --conffile, --conf=FILE read parameters from FILE
42 (
default: conf/Conversion.ini)
44 --logfile, --log=FILE log to FILE (
default: *STDOUT)
45 --logpath=PATH write logfile to PATH (
default: .)
46 --logappend, --log_append append to logfile (
default: truncate)
48 -v, --verbose=0|1 verbose logging (
default:
false)
49 -i, --interactive=0|1
run script interactively (
default:
true)
50 -n, --dry_run, --dry=0|1 don
't write results to database
51 -h, --help, -? print help (this message)
55 This script removes overlapping mappings from the assembly coordinates.
57 projecting between the reference to the non-reference assembly.
62 Monika Komorowska <monika@ebi.ac.uk>, Ensembl core API team
66 Please post comments/questions to the Ensembl development list
73 no warnings
'uninitialized';
85 $support->parse_common_options(@_);
86 $support->parse_extra_options(
91 $support->allowed_params(
92 $support->get_common_params,
98 if ($support->param(
'help') or $support->error) {
99 warn $support->error
if $support->error;
103 $support->comma_to_list(
'chromosomes');
105 # ask user to confirm parameters to proceed
106 $support->confirm_params;
108 # get log filehandle and print heading and parameters to logfile
111 $support->check_required_params(
117 # database connection
118 my $dba = $support->get_database(
'ensembl');
119 my $dbh = $dba->dbc->db_handle;
121 my $assembly = $support->param(
'assembly');
122 my $altassembly = $support->param(
'altassembly');
126 FROM assembly a, seq_region sr1, seq_region sr2,
127 coord_system cs1, coord_system cs2
128 WHERE a.asm_seq_region_id = sr1.seq_region_id
129 AND a.cmp_seq_region_id = sr2.seq_region_id
130 AND sr1.coord_system_id = cs1.coord_system_id
131 AND sr2.coord_system_id = cs2.coord_system_id
132 AND cs1.version =
'$assembly'
133 AND cs2.version =
'$altassembly'
138 my $sth = $dbh->prepare($sql);
140 my $fmt1 =
"%10s %10s %10s %10s %3s\n";
143 foreach my $chr ($support->param(
'chromosomes')) {
144 $support->log_stamped(
"\nToplevel seq_region $chr...\n");
150 # do an initial fetch
151 my $last = $sth->fetchrow_hashref;
153 # skip seq_regions for which we don't have data
155 $support->log(
"No mappings found. Skipping.\n", 1);
165 while ($last and (my $r = $sth->fetchrow_hashref)) {
167 # look for overlaps with last segment
168 if ($last->{
'asm_end'} >= $r->{
'asm_start'}) {
173 $support->log_verbose(
'last: '.sprintf($fmt1,
map { $last->{$_} }
174 qw(asm_start asm_end cmp_start cmp_end ori)), 1);
175 $support->log_verbose(
'before: '.sprintf($fmt1,
map { $r->{$_} }
176 qw(asm_start asm_end cmp_start cmp_end ori)), 1);
178 # skip if this segment ends before the last one
179 if ($r->{
'asm_end'} <= $last->{
'asm_end'}) {
180 $support->log_verbose(
"skipped\n\n", 1);
184 my $overlap = $last->{
'asm_end'} - $r->{
'asm_start'} + 1;
186 $r->{
'asm_start'} += $overlap;
188 if ($r->{
'ori'} == -1) {
189 $r->{
'cmp_end'} -= $overlap;
191 $r->{
'cmp_start'} += $overlap;
194 $support->log_verbose(
'after: '.sprintf($fmt1,
map { $r->{$_} }
195 qw(asm_start asm_end cmp_start cmp_end ori)).
"\n", 1);
202 $support->log(
"Fixed $i mappings.\n", 1);
204 if (!$support->param(
'dry_run') && ($i > 0)) {
206 # delete all current mappings from the db and insert the corrected ones
209 FROM assembly a, seq_region sr1, seq_region sr2,
210 coord_system cs1, coord_system cs2
211 WHERE a.asm_seq_region_id = sr1.seq_region_id
212 AND a.cmp_seq_region_id = sr2.seq_region_id
213 AND sr1.coord_system_id = cs1.coord_system_id
214 AND sr2.coord_system_id = cs2.coord_system_id
215 AND cs1.version =
'$assembly'
216 AND cs2.version =
'$altassembly'
217 AND sr1.name =
'$chr'
220 $support->log(
"\nDeleted $c entries from the assembly table.\n");
222 # now insert the fixed entries
223 $sql = qq(INSERT INTO assembly VALUES (?, ?, ?, ?, ?, ?, ?));
224 my $sth1 = $dbh->prepare($sql);
226 foreach my $r (@rows) {
227 $sth1->execute(
map { $r->{$_} } qw(asm_seq_region_id cmp_seq_region_id asm_start asm_end cmp_start cmp_end ori));
230 $support->log(
"Added ".scalar(@rows).
" fixed entries to the assembly table.\n");
239 $support->finish_log;