3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
25 package SeqStoreConverter::DanioRerio;
31 sub create_coord_systems {
34 $self->debug(
"DanioRerio Specific: creating chromosome, supercontig, clone "
35 .
" and chunk coordinate systems");
37 my $target = $self->target();
38 my $dbh = $self->dbh();
40 my $ass_def = $self->get_default_assembly();
43 ([
"chromosome" , $ass_def,
"default_version", 1],
44 [
"supercontig", $ass_def,
"default_version", 2],
45 [
"clone" , undef,
"default_version", 3],
46 [
"chunk" , undef,
"default_version,sequence_level", 4]);
48 my @assembly_mappings = (
"chromosome:$ass_def|chunk",
50 "supercontig:$ass_def|chunk",
51 "chromosome:$ass_def|chunk|clone",
52 "supercontig:$ass_def|chunk|clone",
53 "chromosome:$ass_def|chunk|supercontig");
55 $self->debug(
"Building coord_system table");
57 my $sth = $dbh->prepare
58 (
"INSERT INTO $target.coord_system (name, version, attrib, rank) " .
63 foreach my $cs (@coords) {
65 $coord_system_ids{$cs->[0]} = $sth->{
'mysql_insertid'};
69 $self->debug(
"Adding assembly.mapping entries to meta table");
71 $sth = $dbh->prepare(
"INSERT INTO $target.meta(meta_key, meta_value) " .
72 "VALUES ('assembly.mapping', ?)");
74 foreach my $mapping (@assembly_mappings) {
75 $sth->execute($mapping);
85 sub create_seq_regions {
88 my $source = $self->source();
89 my $target = $self->target();
90 my $dbh = $self->dbh();
94 # Turn all of the contents of the contig table into 'chunks' and
95 # give them arbitrary names like chunk1, chunk2. Keep old internal
96 # ids for conveneience.
99 $self->debug(
"DanioRerio Specific: creating chunk seq_regions");
101 my $sth = $dbh->prepare
102 (
"INSERT INTO $target.seq_region (seq_region_id, name, coord_system_id, " .
104 "SELECT ctg.contig_id, concat('chunk', ctg.contig_id), " .
105 " cs.coord_system_id, ctg.length " .
106 "FROM $source.contig ctg, $target.coord_system cs " .
107 "WHERE cs.name = 'chunk'");
113 my $insert_sth = $dbh->prepare
114 (
"INSERT INTO $target.seq_region (name, coord_system_id, length) " .
117 my $tmp_chr_insert_sth = $dbh->prepare
118 (
"INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)");
120 my $tmp_supercontig_insert_sth = $dbh->prepare
121 (
"INSERT INTO $target.tmp_superctg_map (name, new_id) VALUES (?,?)");
123 my $tmp_clone_insert_sth = $dbh->prepare
124 (
"INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?,?)");
128 # create a temporary table to hold the ids of all 'toplevel'
129 # seq_regions. Keep the old chromosome_id, and the new seq_region_id
132 (
"CREATE TEMPORARY TABLE $target.tmp_toplevel_map " .
133 "(old_id INT, new_id INT, INDEX new_idx(new_id), INDEX old_idx(old_id))");
135 my $tmp_toplevel_insert_sth = $dbh->prepare
136 (
"INSERT INTO $target.tmp_toplevel_map (old_id, new_id) VALUES (?,?)");
140 # Turn real clones into clones
142 $self->debug(
"DanioRerio Specific: creating clone seq_regions");
144 my $select_sth = $dbh->prepare
145 (
"SELECT ctg.contig_id, ctg.name, ctg.length " .
146 "FROM $source.contig ctg " .
147 "WHERE ctg.name not like 'ctg%' and ctg.name not like 'NA%'");
149 my $cs_id = $self->get_coord_system_id(
'clone');
151 $select_sth->execute();
153 my ($old_id, $name, $length);
154 $select_sth->bind_columns(\$old_id, \$name, \$length);
156 while ($select_sth->fetch()) {
157 #insert into seq_region table
158 $insert_sth->execute($name, $cs_id, $length);
159 #copy old/new mapping into temporary table
160 $tmp_clone_insert_sth->execute($old_id, $insert_sth->{
'mysql_insertid'});
163 $select_sth->finish();
166 # Turn real chromosomes into chromosomes
168 $self->debug(
"DanioRerio Specific: creating chromosome seq_regions");
170 $select_sth = $dbh->prepare
171 (
"SELECT chr.chromosome_id, chr.name, chr.length " .
172 "FROM $source.chromosome chr " .
173 "WHERE length(chr.name) <= 2");
175 $cs_id = $self->get_coord_system_id(
'chromosome');
177 $select_sth->execute();
179 $select_sth->bind_columns(\$old_id, \$name, \$length);
183 while ($select_sth->fetch()) {
184 #insert into seq_region table
185 $insert_sth->execute($name, $cs_id, $length);
186 #copy old/new mapping into temporary table
187 my $new_id = $insert_sth->{
'mysql_insertid'};
188 $tmp_chr_insert_sth->execute($old_id, $new_id);
189 $tmp_toplevel_insert_sth->execute($old_id, $new_id);
190 $chr_id_added{$old_id} = 1;
193 $select_sth->finish();
196 # Turn supercontigs into supercontigs
198 $self->debug(
"DanioRerio Specific: creating supercontig seq_regions");
200 $select_sth = $dbh->prepare
201 (
"SELECT a.chromosome_id, a.superctg_name, " .
202 " MAX(a.chr_end) - MIN(a.chr_start) + 1 " .
203 "FROM $source.assembly a, $target.coord_system cs " .
204 "GROUP BY a.superctg_name");
206 $select_sth->execute();
207 $select_sth->bind_columns(\$old_id, \$name, \$length);
209 $cs_id = $self->get_coord_system_id(
'supercontig');
211 while ($select_sth->fetch()) {
212 #insert into seq_region table
213 $insert_sth->execute($name, $cs_id, $length);
214 #copy old/new mapping into temporary table
215 my $new_id = $insert_sth->{
'mysql_insertid'};
216 $tmp_supercontig_insert_sth->execute($name,$new_id);
218 if(!$chr_id_added{$old_id}) {
219 $chr_id_added{$old_id} = 1;
220 $tmp_toplevel_insert_sth->execute($old_id, $new_id);
224 $select_sth->finish();
225 $tmp_chr_insert_sth->finish();
226 $tmp_supercontig_insert_sth->finish();
227 $tmp_clone_insert_sth->finish();
228 $tmp_toplevel_insert_sth->finish();
229 $insert_sth->finish();
234 sub create_assembly {
237 #chromosomes are made of chunks
238 $self->assembly_contig_chromosome();
240 #supercontigs are made of chunks
241 $self->assembly_contig_supercontig();
243 #clones are made of chunks
244 $self->assembly_contig_clone();
250 sub assembly_contig_clone {
254 $self->debug(
"DanioRerio Specific: building assembly table - chunk/clone");
255 #this is easy, there is simply one entire chunk for a given clone
257 my $source = $self->source();
258 my $target = $self->target();
259 my $dbh = $self->dbh();
262 (
"INSERT INTO $target.assembly (asm_seq_region_id, cmp_seq_region_id, " .
263 " asm_start, asm_end, cmp_start, cmp_end, ori) " .
264 "SELECT tcm.new_id, tcm.old_id, 1, sr.length, 1, sr.length, 1 " .
265 "FROM $target.tmp_cln_map tcm, $target.seq_region sr " .
266 "WHERE sr.seq_region_id = tcm.new_id");
271 # we need to override the transfer of the genes since danio genes can be on
272 # supercontigs and on chromosomes
276 my $target = $self->target();
277 my $source = $self->source();
278 my $dbh = $self->dbh();
281 # Transfer the gene table
284 $self->debug(
"DanioRerio Specific: Building gene table");
286 # first transfer genes on chromosomes
289 (
"INSERT INTO $target.gene " .
290 "SELECT g.gene_id, g.type, g.analysis_id, toplev.new_id, " .
291 "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
292 " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
293 "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
294 " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
295 " a.contig_ori*e.contig_strand as strand, " .
296 " g.display_xref_id " .
297 "FROM $source.transcript t, $source.exon_transcript et, " .
298 " $source.exon e, $source.assembly a, $source.gene g, " .
299 " $target.tmp_toplevel_map toplev " .
300 "WHERE t.transcript_id = et.transcript_id " .
301 "AND et.exon_id = e.exon_id " .
302 "AND e.contig_id = a.contig_id " .
303 "AND g.gene_id = t.gene_id " .
304 "AND a.chromosome_id = toplev.old_id " .
305 "GROUP BY g.gene_id");
309 # Transfer the transcript table
312 $self->debug(
"DanioRerio Specific: Building transcript table ");
314 (
"INSERT INTO $target.transcript " .
315 "SELECT t.transcript_id, t.gene_id, toplev.new_id, " .
316 "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
317 " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
318 "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
319 " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
320 " a.contig_ori*e.contig_strand as strand, " .
321 " t.display_xref_id " .
322 "FROM $source.transcript t, $source.exon_transcript et, " .
323 " $source.exon e, $source.assembly a, " .
324 " $target.tmp_toplevel_map toplev " .
325 "WHERE t.transcript_id = et.transcript_id " .
326 "AND et.exon_id = e.exon_id " .
327 "AND e.contig_id = a.contig_id " .
328 "AND a.chromosome_id = toplev.old_id " .
329 "GROUP BY t.transcript_id");
332 # Transfer the exon table
335 $self->debug(
"DanioRerio Specific: Building exon table ");
338 (
"INSERT INTO $target.exon " .
339 "SELECT e.exon_id, toplev.new_id, " .
340 "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
341 " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
342 "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
343 " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
344 " a.contig_ori*e.contig_strand as strand, " .
345 " e.phase, e.end_phase " .
346 "FROM $source.transcript t, $source.exon_transcript et, " .
347 " $source.exon e, $source.assembly a, $source.gene g, " .
348 " $target.tmp_toplevel_map toplev " .
349 "WHERE t.transcript_id = et.transcript_id " .
350 "AND et.exon_id = e.exon_id " .
351 "AND e.contig_id = a.contig_id " .
352 "AND g.gene_id = t.gene_id " .
353 "AND a.chromosome_id = toplev.old_id " .
354 "GROUP BY e.exon_id");
357 # Transfer translation table
360 $self->debug(
"Building translation table");
363 (
"INSERT INTO $target.translation " .
364 "SELECT tl.translation_id, ts.transcript_id, tl.seq_start, " .
365 " tl.start_exon_id, tl.seq_end, tl.end_exon_id " .
366 "FROM $source.transcript ts, $source.translation tl " .
367 "WHERE ts.translation_id = tl.translation_id");
377 my $target = $self->target();
378 my $dbh = $self->dbh();
380 my $attrib_type_id = $self->add_attrib_code();
382 $self->debug(
"DanioRerio Specific: Setting toplevel attributes of " .
385 my $sth = $dbh->prepare(
"DELETE FROM $target.seq_region_attrib " .
386 "WHERE attrib_type_id = ?");
387 $sth->execute($attrib_type_id);
390 $sth = $dbh->prepare(
"INSERT INTO $target.seq_region_attrib " .
391 ' (seq_region_id, attrib_type_id, value) ' .
392 "SELECT toplev.new_id, $attrib_type_id, 1 " .
393 "FROM $target.tmp_toplevel_map toplev ");