2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
20 fix_cmp_overlaps.pl - remove overlapping mappings between assemblies from the component coordinates
24 fix_overlaps.pl [arguments]
28 --dbname, db_name=NAME database name NAME
29 --host, --dbhost, --db_host=HOST database host HOST
30 --port, --dbport, --db_port=PORT database port PORT
31 --user, --dbuser, --db_user=USER database username USER
32 --pass, --dbpass, --db_pass=PASS database passwort PASS
33 --assembly=ASSEMBLY assembly version ASSEMBLY
35 --altassembly=ASSEMBLY alternative assembly version ASSEMBLY
37 --chromosomes, --chr=LIST only process LIST seq_regions
43 --conffile, --conf=FILE read parameters from FILE
44 (
default: conf/Conversion.ini)
46 --logfile, --log=FILE log to FILE (
default: *STDOUT)
47 --logpath=PATH write logfile to PATH (
default: .)
48 --logappend, --log_append append to logfile (
default: truncate)
50 -v, --verbose=0|1 verbose logging (
default:
false)
51 -i, --interactive=0|1
run script interactively (
default:
true)
52 -n, --dry_run, --dry=0|1 don
't write results to database
53 -h, --help, -? print help (this message)
58 This script removes overlapping mappings from the component coordinates.
60 projecting between the non-reference to the reference assembly.
67 no warnings
'uninitialized';
79 $support->parse_common_options(@_);
80 $support->parse_extra_options(
85 $support->allowed_params(
86 $support->get_common_params,
92 if ($support->param(
'help') or $support->error) {
93 warn $support->error
if $support->error;
97 $support->comma_to_list(
'chromosomes');
99 # ask user to confirm parameters to proceed
100 $support->confirm_params;
102 # get log filehandle and print heading and parameters to logfile
105 $support->check_required_params(
111 # database connection
112 my $dba = $support->get_database(
'ensembl');
113 my $dbh = $dba->dbc->db_handle;
115 my $assembly = $support->param(
'assembly');
116 my $altassembly = $support->param(
'altassembly');
120 FROM assembly a, seq_region sr1, seq_region sr2,
121 coord_system cs1, coord_system cs2
122 WHERE a.asm_seq_region_id = sr1.seq_region_id
123 AND a.cmp_seq_region_id = sr2.seq_region_id
124 AND sr1.coord_system_id = cs1.coord_system_id
125 AND sr2.coord_system_id = cs2.coord_system_id
126 AND cs1.version =
'$assembly'
127 AND cs2.version =
'$altassembly'
132 my $sth = $dbh->prepare($sql);
134 my $fmt1 =
"%10s %10s %10s %10s %3s\n";
137 foreach my $chr ($support->param(
'chromosomes')) {
138 $support->log_stamped(
"\nToplevel seq_region $chr...\n");
144 # do an initial fetch
145 my $last = $sth->fetchrow_hashref;
147 # skip seq_regions for which we don't have data
149 $support->log(
"No mappings found. Skipping.\n", 1);
159 while ($last and (my $r = $sth->fetchrow_hashref)) {
161 # look for overlaps with last segment
162 if ($last->{
'cmp_end'} >= $r->{
'cmp_start'}) {
167 $support->log_verbose(
'last: '.sprintf($fmt1,
map { $last->{$_} }
168 qw(asm_start asm_end cmp_start cmp_end ori)), 1);
169 $support->log_verbose(
'before: '.sprintf($fmt1,
map { $r->{$_} }
170 qw(asm_start asm_end cmp_start cmp_end ori)), 1);
172 # skip if this segment ends before the last one
173 if ($r->{
'cmp_end'} <= $last->{
'cmp_end'}) {
174 $support->log_verbose(
"skipped\n\n", 1);
178 my $overlap = $last->{
'cmp_end'} - $r->{
'cmp_start'} + 1;
180 $r->{
'cmp_start'} += $overlap;
182 if ($r->{
'ori'} == -1) {
183 $r->{
'asm_end'} -= $overlap;
185 $r->{
'asm_start'} += $overlap;
188 $support->log_verbose(
'after: '.sprintf($fmt1,
map { $r->{$_} }
189 qw(asm_start asm_end cmp_start cmp_end ori)).
"\n", 1);
196 $support->log(
"Fixed $i mappings.\n", 1);
199 if (!$support->param(
'dry_run') && $i > 0 ) {
201 # delete all current mappings from the db and insert the corrected ones
204 FROM assembly a, seq_region sr1, seq_region sr2,
205 coord_system cs1, coord_system cs2
206 WHERE a.asm_seq_region_id = sr1.seq_region_id
207 AND a.cmp_seq_region_id = sr2.seq_region_id
208 AND sr1.coord_system_id = cs1.coord_system_id
209 AND sr2.coord_system_id = cs2.coord_system_id
210 AND cs1.version =
'$assembly'
211 AND cs2.version =
'$altassembly'
212 AND sr2.name =
'$chr'
215 $support->log(
"\nDeleted $c entries from the assembly table.\n");
217 # now insert the fixed entries
218 $sql = qq(INSERT INTO assembly VALUES (?, ?, ?, ?, ?, ?, ?));
219 my $sth1 = $dbh->prepare($sql);
221 foreach my $r (@rows) {
222 $sth1->execute(
map { $r->{$_} } qw(asm_seq_region_id cmp_seq_region_id asm_start asm_end cmp_start cmp_end ori));
225 $support->log(
"Added ".scalar(@rows).
" fixed entries to the assembly table.\n");
235 $support->finish_log;