2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 ##!/usr/local/ensembl/bin/perl
21 load_patch_fix_to_ref.pl - load patch fixes in the old
22 database (database with alternative assembly), to the
new assembly
23 The patch fixes should have been integrated into the reference assembly
24 If the patch shares contigs with the
new ref, we have a direct mapping
29 load_patch_fix_to_ref.pl [arguments]
33 --host=HOST
new core db host HOST
34 --port=PORT
new core db port PORT
35 --user=USER
new core db username USER
36 --pass=PASS
new core db passwort PASS
37 --dbname=NAME
new core db name NAME
38 --altdbname=NAME old core db name NAME
39 --assembly=NAME
new assembly NAME
40 --altassembly=NAME old assembly NAME
44 --conffile=filename read parameters from FILE
45 (
default: conf/Conversion.ini)
47 (
if different from --host, --port, --user, --pass):
48 --althost=hOST old core db host HOST
49 --altport=PORT old core db port PORT
50 --altuser=USER old core db username USER
51 --altpass=PASS old core db passwort PASS
53 --logfile, --log=FILE log to FILE (
default: *STDOUT)
54 --logpatch=PATH write logfile to PATH (
default: .)
55 --logappend, --log_append append to logfile (
default: truncate)
57 -v, --verbose=0|1 verbose logging (
default:
false)
58 -i, --interactive=0|1
run script interactively (
default:
true)
59 -h, --help, -? print help (
this message)
63 This script will
map patch fixes from the old assembly to the reference in the
new assembly
64 It will load the mappings as
new entries in the assembly table
70 no warnings
'uninitialized';
78 use vars qw($SERVERROOT);
81 $SERVERROOT =
"$Bin/../";
90 $support->parse_common_options(@_);
91 $support->parse_extra_options(
100 $support->allowed_params(
101 $support->get_common_params,
110 if ($support->param(
'help') or $support->error) {
111 warn $support->error
if $support->error;
115 # ask user to confirm parameters to proceed
116 $support->confirm_params;
118 # get log filehandle and print heading and parameters to logfile
121 $support->check_required_params(
132 # connect to database and get adaptors
134 my ($dba, $dbh, $sth);
136 if ( !defined($support->param(
'pass')) ) {
137 $support->param(
'pass',
'');
141 # first set connection parameters for alternative db and test db
142 if ( !defined($support->param(
'althost')) ) { $support->param(
'althost',$support->param(
'host')); }
143 if ( !defined($support->param(
'altport')) ) { $support->param(
'altport',$support->param(
'port')); }
144 if ( !defined($support->param(
'altuser')) ) { $support->param(
'altuser',$support->param(
'user')); }
145 if ( !defined($support->param(
'altpass')) ) { $support->param(
'altpass',$support->param(
'pass')); }
149 -user => $support->param(
'user'),
150 -pass => $support->param(
'pass'),
151 -port => $support->param(
'port'),
152 -dbname => $support->param(
'dbname') );
154 $dbh->{
'ref'} = $dba->{
'ref'}->dbc->db_handle;
155 my $ref_helper = $dba->{
'ref'}->dbc->sql_helper();
157 # database containing the alternative assembly
159 -user => $support->param(
'altuser'),
160 -pass => $support->param(
'altpass'),
161 -port => $support->param(
'altport'),
162 -dbname => $support->param(
'altdbname') );
164 $dbh->{
'alt'} = $dba->{
'alt'}->dbc->db_handle;
165 my $alt_helper = $dba->{
'alt'}->dbc->sql_helper();
168 my (%alt_names_hash, %ref_names_hash);
171 SELECT s.name, seq_region_id
172 FROM seq_region s, coord_system cs
173 WHERE s.coord_system_id = cs.coord_system_id
174 AND cs.name =
'chromosome'
178 my $alt_regions = $ref_helper->execute(-SQL => $region_sql, -PARAMS => [$support->param(
'altassembly')]);
179 foreach my $alt_region (@$alt_regions) {
180 $alt_names_hash{$alt_region->[0]} = $alt_region->[1];
183 my $ref_regions = $ref_helper->execute(-SQL => $region_sql, -PARAMS => [$support->param(
'assembly')]);
184 foreach my $ref_region (@$ref_regions) {
185 $ref_names_hash{$ref_region->[0]} = $ref_region->[1];
188 my ($c, $patch_name, $contig_name, $chr_name, $asm_start, $asm_end, $ori);
189 my ($alt_asm_start, $alt_asm_end, $alt_cmp_start, $alt_cmp_end, $alt_ori);
190 my ($ref_asm_start, $ref_asm_end, $ref_cmp_start, $ref_cmp_end, $ref_ori);
192 ## Get patch to contig mapping from old assembly
195 SELECT s1.name, s2.name, s3.name, asm_start, asm_end, cmp_start, cmp_end,
asm.ori
196 FROM assembly
asm, seq_region s1, coord_system cs1, seq_region s3,
197 seq_region s2, coord_system cs2, assembly_exception ax
198 WHERE ax.exc_type =
"PATCH_FIX" AND ax.seq_region_id = s1.seq_region_id
199 AND s1.seq_region_id = asm_seq_region_id
200 AND s2.seq_region_id = cmp_seq_region_id
201 AND s1.coord_system_id = cs1.coord_system_id AND cs1.name =
"chromosome"
202 AND s2.coord_system_id = cs2.coord_system_id AND cs2.name =
"contig"
203 AND s3.seq_region_id = ax.exc_seq_region_id;
206 ## Get chromosome to contig mapping for the equivalent contig on the new assembly
209 SELECT asm_start, asm_end, cmp_start, cmp_end, ori
210 FROM assembly
asm, seq_region s1, coord_system cs1,
211 seq_region s2, coord_system cs2
212 WHERE s1.seq_region_id = asm_seq_region_id
213 AND s2.seq_region_id = cmp_seq_region_id
214 AND s1.coord_system_id = cs1.coord_system_id AND cs1.name =
"chromosome"
215 AND s2.coord_system_id = cs2.coord_system_id AND cs2.name =
"contig"
216 AND s1.name = ? and s2.name = ?
217 AND (cmp_start = ? OR cmp_end = ?);
221 INSERT IGNORE INTO assembly VALUES(?,?,?,?,?,?,?);
225 my $patch_mappings = $alt_helper->execute(-SQL => $alt_sql);
227 foreach my $mapping (@$patch_mappings) {
229 my @mapping = @$mapping;
230 $patch_name = $mapping->[0];
231 $contig_name = $mapping->[1];
232 $chr_name = $mapping->[2];
233 $alt_asm_start = $mapping->[3];
234 $alt_asm_end = $mapping->[4];
235 $alt_cmp_start = $mapping->[5];
236 $alt_cmp_end = $mapping->[6];
237 $alt_ori = $mapping->[7];
239 my $ref_mappings = $ref_helper->execute(-SQL => $ref_sql, -PARAMS => [$chr_name, $contig_name, $alt_cmp_start, $alt_cmp_end]);
241 if (scalar(@$ref_mappings) == 0) {
242 $support->log_stamped(
"Could not map $patch_name. Found no mapping in reference for $chr_name and $contig_name\n", 1);
245 foreach my $ref_mapping(@$ref_mappings) {
246 $ref_asm_start = $ref_mapping->[0];
247 $ref_asm_end = $ref_mapping->[1];
248 $ref_cmp_start = $ref_mapping->[2];
249 $ref_cmp_end = $ref_mapping->[3];
250 $ref_ori = $ref_mapping->[4];
252 if ($ref_ori == $alt_ori) {
258 if ($ref_cmp_start == $alt_cmp_start && $ref_cmp_end == $alt_cmp_end) {
260 $support->log_stamped(
"About to add direct assembly entry for $chr_name: " . $ref_names_hash{$chr_name} .
", $patch_name: " . $alt_names_hash{$patch_name} .
" on coordinates $ref_asm_start, $ref_asm_end, $alt_asm_start-$alt_asm_end\n", 1);
261 $c += $ref_helper->execute_update(-SQL => $update_sql, -PARAMS => [$ref_names_hash{$chr_name}, $alt_names_hash{$patch_name}, $ref_asm_start, $ref_asm_end, $alt_asm_start, $alt_asm_end, $ori]);
263 } elsif ($ref_cmp_start == $alt_cmp_start) {
265 if ($alt_cmp_end > $ref_cmp_end) {
266 $asm_end = $alt_asm_end - ($alt_cmp_end - $ref_cmp_end);
267 if (($asm_end - $alt_asm_start) != ($ref_asm_end - $ref_asm_start)) {
268 $support->log_stamped(
"$patch_name does not have a correct mapping to $chr_name with $contig_name. $alt_asm_start-$asm_end has different length from $ref_asm_start-$ref_asm_end\n", 1);
271 $support->log_stamped(
"About to add adjusted alt end assembly entry for $chr_name: " . $ref_names_hash{$chr_name} .
", $patch_name: " . $alt_names_hash{$patch_name} .
" on coordinates $ref_asm_start, $ref_asm_end, $alt_asm_start-$asm_end\n", 1);
272 $c += $ref_helper->execute_update(-SQL => $update_sql, -PARAMS => [$ref_names_hash{$chr_name}, $alt_names_hash{$patch_name}, $ref_asm_start, $ref_asm_end, $alt_asm_start, $asm_end, $ori]);
274 $asm_end = $ref_asm_end - ($ref_cmp_end - $alt_cmp_end);
275 if (($asm_end - $ref_asm_start) != ($alt_asm_end - $alt_asm_start)) {
276 $support->log_stamped(
"$patch_name does not have a correct mapping to $chr_name with $contig_name. $alt_asm_start-$alt_asm_end has different length from $ref_asm_start-$asm_end\n", 1);
279 $support->log_stamped(
"About to add adjusted ref end assembly entry for $chr_name: " . $ref_names_hash{$chr_name} .
", $patch_name: " . $alt_names_hash{$patch_name} .
" on coordinates $ref_asm_start, $asm_end, $alt_asm_start-$alt_asm_end\n", 1);
280 $c += $ref_helper->execute_update(-SQL => $update_sql, -PARAMS => [$ref_names_hash{$chr_name}, $alt_names_hash{$patch_name}, $ref_asm_start, $asm_end, $alt_asm_start, $alt_asm_end, $ori]);
283 } elsif ($ref_cmp_end == $alt_cmp_end) {
285 if ($alt_cmp_start < $ref_cmp_start) {
286 $asm_start = $alt_asm_start - $alt_cmp_start + $ref_cmp_start;
287 if (($alt_asm_end - $asm_start) != ($ref_asm_end - $ref_asm_start)) {
288 $support->log_stamped(
"$patch_name does not have a correct mapping to $chr_name with $contig_name. $asm_start-$alt_asm_end has different length from $ref_asm_start-$ref_asm_end\n", 1);
291 $support->log_stamped(
"About to add adjusted alt start assembly entry for $chr_name: " . $ref_names_hash{$chr_name} .
", $patch_name: " . $alt_names_hash{$patch_name} .
" on coordinates $ref_asm_start, $ref_asm_end, $asm_start-$alt_asm_end\n", 1);
292 $c += $ref_helper->execute_update(-SQL => $update_sql, -PARAMS => [$ref_names_hash{$chr_name}, $alt_names_hash{$patch_name}, $ref_asm_start, $ref_asm_end, $asm_start, $alt_asm_end, $ori]);
294 $asm_start = $ref_asm_start - $alt_cmp_start + $ref_cmp_start;
295 if (($ref_asm_end - $asm_start) != ($alt_asm_end - $alt_asm_start)) {
296 $support->log_stamped(
"$patch_name does not have a correct mapping to $chr_name with $contig_name. $alt_asm_start-$alt_asm_end has different length from $asm_start-$ref_asm_end\n", 1);
299 $support->log_stamped(
"About to add adjusted ref start assembly entry for $chr_name: " . $ref_names_hash{$chr_name} .
", $patch_name: " . $alt_names_hash{$patch_name} .
" on coordinates $asm_start, $ref_asm_end, $alt_asm_start-$alt_asm_end\n", 1);
300 $c += $ref_helper->execute_update(-SQL => $update_sql, -PARAMS => [$ref_names_hash{$chr_name}, $alt_names_hash{$patch_name}, $asm_start, $ref_asm_end, $alt_asm_start, $alt_asm_end, $ori]);
304 $support->log_stamped(
"Could not map $patch_name. Found no mapping in reference for $chr_name and $contig_name for $alt_asm_start-$alt_asm_end and $ref_asm_start-$ref_asm_end\n", 1);
311 $support->finish_log;