2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
25 #my $LIMIT = ' LIMIT 1000 ';
31 my ($dbname, $host, $port, $user, $pass);
33 GetOptions(
'dbname=s' => \$dbname,
34 'dnadbname=s' => \$dnadbname,
42 usage() if(!$host || !$user || !$dbname);
50 -dnadbname => $dnadbname);
58 -dbname => $dnadbname);
68 ### convert dna align features
70 my $sth = $db->dbc()->prepare(
"SHOW CREATE TABLE dna_align_feature");
72 my $create_table = $sth->fetchrow_arrayref()->[1];
76 $create_table =~ s/CREATE TABLE dna_align_feature/CREATE TABLE tmp_dna_align_feature/;
77 # difference b/w mysql 4 and mysql 3
78 $create_table =~ s/CREATE TABLE `dna_align_feature`/CREATE TABLE tmp_dna_align_feature/;
80 $sth = $db->dbc()->prepare($create_table);
83 $sth = $db->dbc()->prepare
84 (qq{INSERT INTO tmp_dna_align_feature
85 SELECT daf.dna_align_feature_id,
88 (a.asm_start + daf.seq_region_start - a.cmp_start),
89 (a.asm_start + a.cmp_end - daf.seq_region_end))
92 (a.asm_start + daf.seq_region_end - a.cmp_start),
93 (a.asm_end + a.cmp_end - daf.seq_region_start))
95 a.ori * daf.seq_region_strand as seq_region_strand,
96 daf.hit_start, daf.hit_end, daf.hit_strand, daf.hit_name,
97 daf.analysis_id, daf.score, daf.evalue, daf.perc_ident,
99 FROM $dnadbname.assembly a,
100 $dnadbname.seq_region asm_sr,
101 $dnadbname.seq_region cmp_sr,
102 dna_align_feature daf,
103 $dnadbname.seq_region_attrib sra,
104 $dnadbname.attrib_type at
105 WHERE asm_sr.coord_system_id = ? AND cmp_sr.coord_system_id = ?
106 AND daf.seq_region_id = a.cmp_seq_region_id
107 AND a.asm_seq_region_id = asm_sr.seq_region_id
108 AND a.cmp_seq_region_id = cmp_sr.seq_region_id
109 AND daf.seq_region_start >= a.cmp_start
110 AND daf.seq_region_end <= a.cmp_end
111 AND asm_sr.seq_region_id = sra.seq_region_id
112 AND sra.attrib_type_id = at.attrib_type_id
113 AND at.code =
'toplevel'
116 foreach my $cs_pair (@daf_css) {
117 my ($top_cs, $feat_cs) = @$cs_pair;
118 print STDERR
"Converting dna align features between from coord system ",
119 $feat_cs->name(),
" to coord system ", $top_cs->name(),
"\n";
120 $sth->execute($top_cs->dbID(), $feat_cs->dbID());
125 ### convert protein align features
127 $sth = $db->dbc()->prepare(
"SHOW CREATE TABLE protein_align_feature");
129 $create_table = $sth->fetchrow_arrayref()->[1];
132 $create_table =~ s/CREATE TABLE protein_align_feature/CREATE TABLE tmp_protein_align_feature/;
133 $create_table =~ s/CREATE TABLE `protein_align_feature`/CREATE TABLE tmp_protein_align_feature/;
136 $sth = $db->dbc()->prepare($create_table);
139 $sth = $db->dbc()->prepare
140 (qq{INSERT INTO tmp_protein_align_feature
141 SELECT paf.protein_align_feature_id,
144 (a.asm_start + paf.seq_region_start - a.cmp_start),
145 (a.asm_start + a.cmp_end - paf.seq_region_end))
148 (a.asm_start + paf.seq_region_end - a.cmp_start),
149 (a.asm_end + a.cmp_end - paf.seq_region_start))
151 a.ori * paf.seq_region_strand as seq_region_strand,
152 paf.hit_start, paf.hit_end, paf.hit_name, paf.analysis_id,
153 paf.score, paf.evalue, paf.perc_ident, paf.cigar_line
154 FROM $dnadbname.assembly a,
155 $dnadbname.seq_region asm_sr,
156 $dnadbname.seq_region cmp_sr,
157 protein_align_feature paf,
158 $dnadbname.seq_region_attrib sra,
159 $dnadbname.attrib_type at
160 WHERE asm_sr.coord_system_id = ? AND cmp_sr.coord_system_id = ?
161 AND paf.seq_region_id = a.cmp_seq_region_id
162 AND a.asm_seq_region_id = asm_sr.seq_region_id
163 AND a.cmp_seq_region_id = cmp_sr.seq_region_id
164 AND paf.seq_region_start >= a.cmp_start
165 AND paf.seq_region_end <= a.cmp_end
166 AND asm_sr.seq_region_id = sra.seq_region_id
167 AND sra.attrib_type_id = at.attrib_type_id
168 AND at.code =
'toplevel'
171 foreach my $cs_pair (@paf_css) {
172 my ($top_cs, $feat_cs) = @$cs_pair;
173 print STDERR
"Converting protein align features between from coord system ",
174 $feat_cs->name(),
" to coord system ", $top_cs->name(),
"\n";
175 $sth->execute($top_cs->dbID(), $feat_cs->dbID());
180 print STDERR
"Replacing existing align feature tables with new tables\n";
182 $db->dbc->do(
"drop table protein_align_feature");
183 $db->dbc->do(
"drop table dna_align_feature");
184 $db->dbc->do(
"alter table tmp_protein_align_feature rename protein_align_feature");
185 $db->dbc->do(
"alter table tmp_dna_align_feature rename dna_align_feature");
188 print STDERR
"Updating meta_coord table\n";
190 $db->dbc->do(
"delete from meta_coord where table_name = 'dna_align_feature'");
191 $db->dbc->do(
"delete from meta_coord where table_name = 'protein_align_feature'");
194 $sth = $db->dbc->prepare(
"INSERT INTO meta_coord set table_name = ?, " .
195 "coord_system_id = ?");
199 foreach my $cs_pair (@daf_css) {
200 my ($top_cs) = @$cs_pair;
201 if(!$seen{$top_cs->dbID()}) {
202 $sth->execute(
'dna_align_feature', $top_cs->dbID());
203 $seen{$top_cs->dbID()} = 1;
208 foreach my $cs_pair (@paf_css) {
209 my ($top_cs) = @$cs_pair;
210 if(!$seen{$top_cs->dbID()}) {
211 $sth->execute(
'protein_align_feature', $top_cs->dbID());
212 $seen{$top_cs->dbID()} = 1;
224 my $csa = $db->get_CoordSystemAdaptor();
225 my $mcc = $db->get_MetaCoordContainer();
227 # determine what coordinate systems have top-level seq_regions
229 my $sth = $db->dbc->prepare
230 (qq{SELECT distinct(sr.coord_system_id)
231 FROM seq_region sr, seq_region_attrib sra, attrib_type at
232 WHERE sr.seq_region_id = sra.seq_region_id
233 AND sra.attrib_type_id = at.attrib_type_id
234 AND at.code = 'toplevel'});
238 my @top_css =
map {$csa->fetch_by_dbID($_->[0])}
239 @{$sth->fetchall_arrayref()};
244 # determine what coord systems features are stored in
246 my @feat_css = @{$mcc->fetch_all_CoordSystems_by_feature_type($type)};
250 foreach my $feat_cs (@feat_css) {
251 foreach my $top_cs (@top_css) {
252 if(!$feat_cs->equals($top_cs)) {
253 if($feat_cs->rank() > $top_cs->rank()) {
254 my @mp = @{$csa->get_mapping_path($top_cs, $feat_cs)};
256 if($mp[0]->equals($top_cs)) {
259 die(
"Unexpected: ". $top_cs->name() .
260 " coord system should not be component of " .
261 $feat_cs->name(),
" coord system.");
265 die(
"There is no 1 step mapping path between coord systems " .
266 $feat_cs->name().
" and ".$top_cs->name().
". This script " .
267 "requires one step mapping paths.");
275 die(
"Nothing to do for ${type}s (or missing meta/assembly info)\n");
286 This program will convert the coordinates of alignment features from
287 their current cooridinate systems to coordinates on toplevel sequence
288 regions. This can speed up the retrieval of these features dramatically.
290 Features which are partially or entirely non-golden portions of
291 sequence regions will be discarded.
293 This program is only capable of mapping
if there is a direct relationship
294 between the coordinate systems defined in the assembly and meta tables.
296 This program can use a different database to obtain the assembly information
297 but the database must be on the same MySQL instance. This is useful
for
298 satellite databases such as the est database.
301 perl push_align_features -dbname <dbname> -user <user> -host <host> \
302 [-port <port>] [-pass <pass>] [-dnadbname <dbname>]