ensembl-hive  2.8.1
load_patch_fix_to_ref.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 ##!/usr/local/ensembl/bin/perl
18 
19 =head1 NAME
20 
21 load_patch_fix_to_ref.pl - load patch fixes in the old
22 database (database with alternative assembly), to the new assembly
23 The patch fixes should have been integrated into the reference assembly
24 If the patch shares contigs with the new ref, we have a direct mapping
25 
26 
27 =head1 SYNOPSIS
28 
29 load_patch_fix_to_ref.pl [arguments]
30 
31 Required arguments:
32 
33  --host=HOST new core db host HOST
34  --port=PORT new core db port PORT
35  --user=USER new core db username USER
36  --pass=PASS new core db passwort PASS
37  --dbname=NAME new core db name NAME
38  --altdbname=NAME old core db name NAME
39  --assembly=NAME new assembly NAME
40  --altassembly=NAME old assembly NAME
41 
42 Optional arguments:
43 
44  --conffile=filename read parameters from FILE
45  (default: conf/Conversion.ini)
46 
47  (if different from --host, --port, --user, --pass):
48  --althost=hOST old core db host HOST
49  --altport=PORT old core db port PORT
50  --altuser=USER old core db username USER
51  --altpass=PASS old core db passwort PASS
52 
53  --logfile, --log=FILE log to FILE (default: *STDOUT)
54  --logpatch=PATH write logfile to PATH (default: .)
55  --logappend, --log_append append to logfile (default: truncate)
56 
57  -v, --verbose=0|1 verbose logging (default: false)
58  -i, --interactive=0|1 run script interactively (default: true)
59  -h, --help, -? print help (this message)
60 
61 =head1 DESCRIPTION
62 
63 This script will map patch fixes from the old assembly to the reference in the new assembly
64 It will load the mappings as new entries in the assembly table
65 
66 =cut
67 
68 use strict;
69 use warnings;
70 no warnings 'uninitialized';
71 
72 use FindBin qw($Bin);
73 use Getopt::Long;
74 use Pod::Usage;
77 
78 use vars qw($SERVERROOT);
79 
80 BEGIN {
81  $SERVERROOT = "$Bin/../";
82 }
83 
84 
85 $| = 1;
86 
87 my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT);
88 
89 # parse options
90 $support->parse_common_options(@_);
91 $support->parse_extra_options(
92  'althost=s',
93  'altport=n',
94  'altuser=s',
95  'altpass=s',
96  'altdbname=s',
97  'assembly=s',
98  'altassembly=s',
99 );
100 $support->allowed_params(
101  $support->get_common_params,
102  'althost',
103  'altport',
104  'altuser',
105  'altdbname',
106  'assembly',
107  'altassembly',
108 );
109 
110 if ($support->param('help') or $support->error) {
111  warn $support->error if $support->error;
112  pod2usage(1);
113 }
114 
115 # ask user to confirm parameters to proceed
116 $support->confirm_params;
117 
118 # get log filehandle and print heading and parameters to logfile
119 $support->init_log;
120 
121 $support->check_required_params(
122  'altdbname',
123  'assembly',
124  'altassembly',
125  'dbname',
126  'host',
127  'user'
128 );
129 
130 
131 #####
132 # connect to database and get adaptors
133 #
134 my ($dba, $dbh, $sth);
135 
136 if ( !defined($support->param('pass')) ) {
137  $support->param('pass', '');
138 }
139 
140 
141 # first set connection parameters for alternative db and test db
142 if ( !defined($support->param('althost')) ) { $support->param('althost',$support->param('host')); }
143 if ( !defined($support->param('altport')) ) { $support->param('altport',$support->param('port')); }
144 if ( !defined($support->param('altuser')) ) { $support->param('altuser',$support->param('user')); }
145 if ( !defined($support->param('altpass')) ) { $support->param('altpass',$support->param('pass')); }
146 
147 # reference database
148 $dba->{'ref'} = new Bio::EnsEMBL::DBSQL::DBAdaptor( -host => $support->param('host'),
149  -user => $support->param('user'),
150  -pass => $support->param('pass'),
151  -port => $support->param('port'),
152  -dbname => $support->param('dbname') );
153 
154 $dbh->{'ref'} = $dba->{'ref'}->dbc->db_handle;
155 my $ref_helper = $dba->{'ref'}->dbc->sql_helper();
156 
157 # database containing the alternative assembly
158 $dba->{'alt'} = new Bio::EnsEMBL::DBSQL::DBAdaptor( -host => $support->param('althost'),
159  -user => $support->param('altuser'),
160  -pass => $support->param('altpass'),
161  -port => $support->param('altport'),
162  -dbname => $support->param('altdbname') );
163 
164 $dbh->{'alt'} = $dba->{'alt'}->dbc->db_handle;
165 my $alt_helper = $dba->{'alt'}->dbc->sql_helper();
166 
167 
168 my (%alt_names_hash, %ref_names_hash);
169 
170 my $region_sql = qq(
171  SELECT s.name, seq_region_id
172  FROM seq_region s, coord_system cs
173  WHERE s.coord_system_id = cs.coord_system_id
174  AND cs.name = 'chromosome'
175  AND cs.version = ?
176 );
177 
178 my $alt_regions = $ref_helper->execute(-SQL => $region_sql, -PARAMS => [$support->param('altassembly')]);
179 foreach my $alt_region (@$alt_regions) {
180  $alt_names_hash{$alt_region->[0]} = $alt_region->[1];
181 }
182 
183 my $ref_regions = $ref_helper->execute(-SQL => $region_sql, -PARAMS => [$support->param('assembly')]);
184 foreach my $ref_region (@$ref_regions) {
185  $ref_names_hash{$ref_region->[0]} = $ref_region->[1];
186 }
187 
188 my ($c, $patch_name, $contig_name, $chr_name, $asm_start, $asm_end, $ori);
189 my ($alt_asm_start, $alt_asm_end, $alt_cmp_start, $alt_cmp_end, $alt_ori);
190 my ($ref_asm_start, $ref_asm_end, $ref_cmp_start, $ref_cmp_end, $ref_ori);
191 
192 ## Get patch to contig mapping from old assembly
193 
194 my $alt_sql = qq(
195  SELECT s1.name, s2.name, s3.name, asm_start, asm_end, cmp_start, cmp_end, asm.ori
196  FROM assembly asm, seq_region s1, coord_system cs1, seq_region s3,
197  seq_region s2, coord_system cs2, assembly_exception ax
198  WHERE ax.exc_type = "PATCH_FIX" AND ax.seq_region_id = s1.seq_region_id
199  AND s1.seq_region_id = asm_seq_region_id
200  AND s2.seq_region_id = cmp_seq_region_id
201  AND s1.coord_system_id = cs1.coord_system_id AND cs1.name = "chromosome"
202  AND s2.coord_system_id = cs2.coord_system_id AND cs2.name = "contig"
203  AND s3.seq_region_id = ax.exc_seq_region_id;
204 );
205 
206 ## Get chromosome to contig mapping for the equivalent contig on the new assembly
207 
208 my $ref_sql = qq(
209  SELECT asm_start, asm_end, cmp_start, cmp_end, ori
210  FROM assembly asm, seq_region s1, coord_system cs1,
211  seq_region s2, coord_system cs2
212  WHERE s1.seq_region_id = asm_seq_region_id
213  AND s2.seq_region_id = cmp_seq_region_id
214  AND s1.coord_system_id = cs1.coord_system_id AND cs1.name = "chromosome"
215  AND s2.coord_system_id = cs2.coord_system_id AND cs2.name = "contig"
216  AND s1.name = ? and s2.name = ?
217  AND (cmp_start = ? OR cmp_end = ?);
218 );
219 
220 my $update_sql = qq(
221  INSERT IGNORE INTO assembly VALUES(?,?,?,?,?,?,?);
222 );
223 
224 
225 my $patch_mappings = $alt_helper->execute(-SQL => $alt_sql);
226 
227 foreach my $mapping (@$patch_mappings) {
228 
229  my @mapping = @$mapping;
230  $patch_name = $mapping->[0];
231  $contig_name = $mapping->[1];
232  $chr_name = $mapping->[2];
233  $alt_asm_start = $mapping->[3];
234  $alt_asm_end = $mapping->[4];
235  $alt_cmp_start = $mapping->[5];
236  $alt_cmp_end = $mapping->[6];
237  $alt_ori = $mapping->[7];
238 
239  my $ref_mappings = $ref_helper->execute(-SQL => $ref_sql, -PARAMS => [$chr_name, $contig_name, $alt_cmp_start, $alt_cmp_end]);
240 
241  if (scalar(@$ref_mappings) == 0) {
242  $support->log_stamped("Could not map $patch_name. Found no mapping in reference for $chr_name and $contig_name\n", 1);
243  next;
244  } else {
245  foreach my $ref_mapping(@$ref_mappings) {
246  $ref_asm_start = $ref_mapping->[0];
247  $ref_asm_end = $ref_mapping->[1];
248  $ref_cmp_start = $ref_mapping->[2];
249  $ref_cmp_end = $ref_mapping->[3];
250  $ref_ori = $ref_mapping->[4];
251 
252  if ($ref_ori == $alt_ori) {
253  $ori = 1;
254  } else {
255  $ori = -1;
256  }
257 
258  if ($ref_cmp_start == $alt_cmp_start && $ref_cmp_end == $alt_cmp_end) {
259 
260  $support->log_stamped("About to add direct assembly entry for $chr_name: " . $ref_names_hash{$chr_name} . ", $patch_name: " . $alt_names_hash{$patch_name} . " on coordinates $ref_asm_start, $ref_asm_end, $alt_asm_start-$alt_asm_end\n", 1);
261  $c += $ref_helper->execute_update(-SQL => $update_sql, -PARAMS => [$ref_names_hash{$chr_name}, $alt_names_hash{$patch_name}, $ref_asm_start, $ref_asm_end, $alt_asm_start, $alt_asm_end, $ori]);
262 
263  } elsif ($ref_cmp_start == $alt_cmp_start) {
264 
265  if ($alt_cmp_end > $ref_cmp_end) {
266  $asm_end = $alt_asm_end - ($alt_cmp_end - $ref_cmp_end);
267  if (($asm_end - $alt_asm_start) != ($ref_asm_end - $ref_asm_start)) {
268  $support->log_stamped("$patch_name does not have a correct mapping to $chr_name with $contig_name. $alt_asm_start-$asm_end has different length from $ref_asm_start-$ref_asm_end\n", 1);
269  last;
270  }
271  $support->log_stamped("About to add adjusted alt end assembly entry for $chr_name: " . $ref_names_hash{$chr_name} . ", $patch_name: " . $alt_names_hash{$patch_name} . " on coordinates $ref_asm_start, $ref_asm_end, $alt_asm_start-$asm_end\n", 1);
272  $c += $ref_helper->execute_update(-SQL => $update_sql, -PARAMS => [$ref_names_hash{$chr_name}, $alt_names_hash{$patch_name}, $ref_asm_start, $ref_asm_end, $alt_asm_start, $asm_end, $ori]);
273  } else {
274  $asm_end = $ref_asm_end - ($ref_cmp_end - $alt_cmp_end);
275  if (($asm_end - $ref_asm_start) != ($alt_asm_end - $alt_asm_start)) {
276  $support->log_stamped("$patch_name does not have a correct mapping to $chr_name with $contig_name. $alt_asm_start-$alt_asm_end has different length from $ref_asm_start-$asm_end\n", 1);
277  last;
278  }
279  $support->log_stamped("About to add adjusted ref end assembly entry for $chr_name: " . $ref_names_hash{$chr_name} . ", $patch_name: " . $alt_names_hash{$patch_name} . " on coordinates $ref_asm_start, $asm_end, $alt_asm_start-$alt_asm_end\n", 1);
280  $c += $ref_helper->execute_update(-SQL => $update_sql, -PARAMS => [$ref_names_hash{$chr_name}, $alt_names_hash{$patch_name}, $ref_asm_start, $asm_end, $alt_asm_start, $alt_asm_end, $ori]);
281  }
282 
283  } elsif ($ref_cmp_end == $alt_cmp_end) {
284 
285  if ($alt_cmp_start < $ref_cmp_start) {
286  $asm_start = $alt_asm_start - $alt_cmp_start + $ref_cmp_start;
287  if (($alt_asm_end - $asm_start) != ($ref_asm_end - $ref_asm_start)) {
288  $support->log_stamped("$patch_name does not have a correct mapping to $chr_name with $contig_name. $asm_start-$alt_asm_end has different length from $ref_asm_start-$ref_asm_end\n", 1);
289  last;
290  }
291  $support->log_stamped("About to add adjusted alt start assembly entry for $chr_name: " . $ref_names_hash{$chr_name} . ", $patch_name: " . $alt_names_hash{$patch_name} . " on coordinates $ref_asm_start, $ref_asm_end, $asm_start-$alt_asm_end\n", 1);
292  $c += $ref_helper->execute_update(-SQL => $update_sql, -PARAMS => [$ref_names_hash{$chr_name}, $alt_names_hash{$patch_name}, $ref_asm_start, $ref_asm_end, $asm_start, $alt_asm_end, $ori]);
293  } else {
294  $asm_start = $ref_asm_start - $alt_cmp_start + $ref_cmp_start;
295  if (($ref_asm_end - $asm_start) != ($alt_asm_end - $alt_asm_start)) {
296  $support->log_stamped("$patch_name does not have a correct mapping to $chr_name with $contig_name. $alt_asm_start-$alt_asm_end has different length from $asm_start-$ref_asm_end\n", 1);
297  last;
298  }
299  $support->log_stamped("About to add adjusted ref start assembly entry for $chr_name: " . $ref_names_hash{$chr_name} . ", $patch_name: " . $alt_names_hash{$patch_name} . " on coordinates $asm_start, $ref_asm_end, $alt_asm_start-$alt_asm_end\n", 1);
300  $c += $ref_helper->execute_update(-SQL => $update_sql, -PARAMS => [$ref_names_hash{$chr_name}, $alt_names_hash{$patch_name}, $asm_start, $ref_asm_end, $alt_asm_start, $alt_asm_end, $ori]);
301  }
302 
303  } else {
304  $support->log_stamped("Could not map $patch_name. Found no mapping in reference for $chr_name and $contig_name for $alt_asm_start-$alt_asm_end and $ref_asm_start-$ref_asm_end\n", 1);
305  }
306  }
307  }
308 }
309 
310 # finish logfile
311 $support->finish_log;
312 
Bio::EnsEMBL::Utils::ConversionSupport
Definition: ConversionSupport.pm:39
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
map
public map()
BEGIN
public BEGIN()
run
public run()