ensembl-hive  2.7.0
DanioRerio.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 use strict;
21 use warnings;
22 
24 
25 package SeqStoreConverter::DanioRerio;
26 
27 use vars qw(@ISA);
28 
30 
31 sub create_coord_systems {
32  my $self = shift;
33 
34  $self->debug("DanioRerio Specific: creating chromosome, supercontig, clone "
35  . " and chunk coordinate systems");
36 
37  my $target = $self->target();
38  my $dbh = $self->dbh();
39 
40  my $ass_def = $self->get_default_assembly();
41 
42  my @coords =
43  (["chromosome" , $ass_def, "default_version", 1],
44  ["supercontig", $ass_def, "default_version", 2],
45  ["clone" , undef, "default_version", 3],
46  ["chunk" , undef, "default_version,sequence_level", 4]);
47 
48  my @assembly_mappings = ("chromosome:$ass_def|chunk",
49  "clone|chunk",
50  "supercontig:$ass_def|chunk",
51  "chromosome:$ass_def|chunk|clone",
52  "supercontig:$ass_def|chunk|clone",
53  "chromosome:$ass_def|chunk|supercontig");
54 
55  $self->debug("Building coord_system table");
56 
57  my $sth = $dbh->prepare
58  ("INSERT INTO $target.coord_system (name, version, attrib, rank) " .
59  "VALUES (?,?,?,?)");
60 
61  my %coord_system_ids;
62 
63  foreach my $cs (@coords) {
64  $sth->execute(@$cs);
65  $coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
66  }
67  $sth->finish();
68 
69  $self->debug("Adding assembly.mapping entries to meta table");
70 
71  $sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
72  "VALUES ('assembly.mapping', ?)");
73 
74  foreach my $mapping (@assembly_mappings) {
75  $sth->execute($mapping);
76  }
77 
78  $sth->finish();
79 
80 
81  return;
82 }
83 
84 
85 sub create_seq_regions {
86  my $self = shift;
87 
88  my $source = $self->source();
89  my $target = $self->target();
90  my $dbh = $self->dbh();
91 
92 
93  #
94  # Turn all of the contents of the contig table into 'chunks' and
95  # give them arbitrary names like chunk1, chunk2. Keep old internal
96  # ids for conveneience.
97  #
98 
99  $self->debug("DanioRerio Specific: creating chunk seq_regions");
100 
101  my $sth = $dbh->prepare
102  ("INSERT INTO $target.seq_region (seq_region_id, name, coord_system_id, " .
103  " length) ".
104  "SELECT ctg.contig_id, concat('chunk', ctg.contig_id), " .
105  " cs.coord_system_id, ctg.length " .
106  "FROM $source.contig ctg, $target.coord_system cs " .
107  "WHERE cs.name = 'chunk'");
108 
109  $sth->execute();
110 
111  $sth->finish();
112 
113  my $insert_sth = $dbh->prepare
114  ("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
115  "VALUES (?,?,?)");
116 
117  my $tmp_chr_insert_sth = $dbh->prepare
118  ("INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)");
119 
120  my $tmp_supercontig_insert_sth = $dbh->prepare
121  ("INSERT INTO $target.tmp_superctg_map (name, new_id) VALUES (?,?)");
122 
123  my $tmp_clone_insert_sth = $dbh->prepare
124  ("INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?,?)");
125 
126 
127  #
128  # create a temporary table to hold the ids of all 'toplevel'
129  # seq_regions. Keep the old chromosome_id, and the new seq_region_id
130  #
131  $dbh->do
132  ("CREATE TEMPORARY TABLE $target.tmp_toplevel_map " .
133  "(old_id INT, new_id INT, INDEX new_idx(new_id), INDEX old_idx(old_id))");
134 
135  my $tmp_toplevel_insert_sth = $dbh->prepare
136  ("INSERT INTO $target.tmp_toplevel_map (old_id, new_id) VALUES (?,?)");
137 
138 
139  #
140  # Turn real clones into clones
141  #
142  $self->debug("DanioRerio Specific: creating clone seq_regions");
143 
144  my $select_sth = $dbh->prepare
145  ("SELECT ctg.contig_id, ctg.name, ctg.length " .
146  "FROM $source.contig ctg " .
147  "WHERE ctg.name not like 'ctg%' and ctg.name not like 'NA%'");
148 
149  my $cs_id = $self->get_coord_system_id('clone');
150 
151  $select_sth->execute();
152 
153  my ($old_id, $name, $length);
154  $select_sth->bind_columns(\$old_id, \$name, \$length);
155 
156  while ($select_sth->fetch()) {
157  #insert into seq_region table
158  $insert_sth->execute($name, $cs_id, $length);
159  #copy old/new mapping into temporary table
160  $tmp_clone_insert_sth->execute($old_id, $insert_sth->{'mysql_insertid'});
161  }
162 
163  $select_sth->finish();
164 
165  #
166  # Turn real chromosomes into chromosomes
167  #
168  $self->debug("DanioRerio Specific: creating chromosome seq_regions");
169 
170  $select_sth = $dbh->prepare
171  ("SELECT chr.chromosome_id, chr.name, chr.length " .
172  "FROM $source.chromosome chr " .
173  "WHERE length(chr.name) <= 2");
174 
175  $cs_id = $self->get_coord_system_id('chromosome');
176 
177  $select_sth->execute();
178 
179  $select_sth->bind_columns(\$old_id, \$name, \$length);
180 
181  my %chr_id_added;
182 
183  while ($select_sth->fetch()) {
184  #insert into seq_region table
185  $insert_sth->execute($name, $cs_id, $length);
186  #copy old/new mapping into temporary table
187  my $new_id = $insert_sth->{'mysql_insertid'};
188  $tmp_chr_insert_sth->execute($old_id, $new_id);
189  $tmp_toplevel_insert_sth->execute($old_id, $new_id);
190  $chr_id_added{$old_id} = 1;
191  }
192 
193  $select_sth->finish();
194 
195  #
196  # Turn supercontigs into supercontigs
197  #
198  $self->debug("DanioRerio Specific: creating supercontig seq_regions");
199 
200  $select_sth = $dbh->prepare
201  ("SELECT a.chromosome_id, a.superctg_name, " .
202  " MAX(a.chr_end) - MIN(a.chr_start) + 1 " .
203  "FROM $source.assembly a, $target.coord_system cs " .
204  "GROUP BY a.superctg_name");
205 
206  $select_sth->execute();
207  $select_sth->bind_columns(\$old_id, \$name, \$length);
208 
209  $cs_id = $self->get_coord_system_id('supercontig');
210 
211  while ($select_sth->fetch()) {
212  #insert into seq_region table
213  $insert_sth->execute($name, $cs_id, $length);
214  #copy old/new mapping into temporary table
215  my $new_id = $insert_sth->{'mysql_insertid'};
216  $tmp_supercontig_insert_sth->execute($name,$new_id);
217 
218  if(!$chr_id_added{$old_id}) {
219  $chr_id_added{$old_id} = 1;
220  $tmp_toplevel_insert_sth->execute($old_id, $new_id);
221  }
222  }
223 
224  $select_sth->finish();
225  $tmp_chr_insert_sth->finish();
226  $tmp_supercontig_insert_sth->finish();
227  $tmp_clone_insert_sth->finish();
228  $tmp_toplevel_insert_sth->finish();
229  $insert_sth->finish();
230 }
231 
232 
233 
234 sub create_assembly {
235  my $self = shift;
236 
237  #chromosomes are made of chunks
238  $self->assembly_contig_chromosome();
239 
240  #supercontigs are made of chunks
241  $self->assembly_contig_supercontig();
242 
243  #clones are made of chunks
244  $self->assembly_contig_clone();
245 
246  return;
247 }
248 
249 
250 sub assembly_contig_clone {
251  my $self = shift;
252 
253 
254  $self->debug("DanioRerio Specific: building assembly table - chunk/clone");
255  #this is easy, there is simply one entire chunk for a given clone
256 
257  my $source = $self->source();
258  my $target = $self->target();
259  my $dbh = $self->dbh();
260 
261  $dbh->do
262  ("INSERT INTO $target.assembly (asm_seq_region_id, cmp_seq_region_id, " .
263  " asm_start, asm_end, cmp_start, cmp_end, ori) " .
264  "SELECT tcm.new_id, tcm.old_id, 1, sr.length, 1, sr.length, 1 " .
265  "FROM $target.tmp_cln_map tcm, $target.seq_region sr " .
266  "WHERE sr.seq_region_id = tcm.new_id");
267 }
268 
269 
270 
271 # we need to override the transfer of the genes since danio genes can be on
272 # supercontigs and on chromosomes
273 sub transfer_genes {
274  my $self = shift;
275 
276  my $target = $self->target();
277  my $source = $self->source();
278  my $dbh = $self->dbh();
279 
280  #
281  # Transfer the gene table
282  #
283 
284  $self->debug("DanioRerio Specific: Building gene table");
285 
286  # first transfer genes on chromosomes
287 
288  $dbh->do
289  ("INSERT INTO $target.gene " .
290  "SELECT g.gene_id, g.type, g.analysis_id, toplev.new_id, " .
291  "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
292  " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
293  "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
294  " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
295  " a.contig_ori*e.contig_strand as strand, " .
296  " g.display_xref_id " .
297  "FROM $source.transcript t, $source.exon_transcript et, " .
298  " $source.exon e, $source.assembly a, $source.gene g, " .
299  " $target.tmp_toplevel_map toplev " .
300  "WHERE t.transcript_id = et.transcript_id " .
301  "AND et.exon_id = e.exon_id " .
302  "AND e.contig_id = a.contig_id " .
303  "AND g.gene_id = t.gene_id " .
304  "AND a.chromosome_id = toplev.old_id " .
305  "GROUP BY g.gene_id");
306 
307 
308  #
309  # Transfer the transcript table
310  #
311 
312  $self->debug("DanioRerio Specific: Building transcript table ");
313  $dbh->do
314  ("INSERT INTO $target.transcript " .
315  "SELECT t.transcript_id, t.gene_id, toplev.new_id, " .
316  "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
317  " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
318  "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
319  " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
320  " a.contig_ori*e.contig_strand as strand, " .
321  " t.display_xref_id " .
322  "FROM $source.transcript t, $source.exon_transcript et, " .
323  " $source.exon e, $source.assembly a, " .
324  " $target.tmp_toplevel_map toplev " .
325  "WHERE t.transcript_id = et.transcript_id " .
326  "AND et.exon_id = e.exon_id " .
327  "AND e.contig_id = a.contig_id " .
328  "AND a.chromosome_id = toplev.old_id " .
329  "GROUP BY t.transcript_id");
330 
331  #
332  # Transfer the exon table
333  #
334 
335  $self->debug("DanioRerio Specific: Building exon table ");
336 
337  $dbh->do
338  ("INSERT INTO $target.exon " .
339  "SELECT e.exon_id, toplev.new_id, " .
340  "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
341  " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
342  "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
343  " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
344  " a.contig_ori*e.contig_strand as strand, " .
345  " e.phase, e.end_phase " .
346  "FROM $source.transcript t, $source.exon_transcript et, " .
347  " $source.exon e, $source.assembly a, $source.gene g, " .
348  " $target.tmp_toplevel_map toplev " .
349  "WHERE t.transcript_id = et.transcript_id " .
350  "AND et.exon_id = e.exon_id " .
351  "AND e.contig_id = a.contig_id " .
352  "AND g.gene_id = t.gene_id " .
353  "AND a.chromosome_id = toplev.old_id " .
354  "GROUP BY e.exon_id");
355 
356  #
357  # Transfer translation table
358  #
359 
360  $self->debug("Building translation table");
361 
362  $dbh->do
363  ("INSERT INTO $target.translation " .
364  "SELECT tl.translation_id, ts.transcript_id, tl.seq_start, " .
365  " tl.start_exon_id, tl.seq_end, tl.end_exon_id " .
366  "FROM $source.transcript ts, $source.translation tl " .
367  "WHERE ts.translation_id = tl.translation_id");
368 
369  return;
370 }
371 
372 
373 
374 sub set_top_level {
375  my $self = shift;
376 
377  my $target = $self->target();
378  my $dbh = $self->dbh();
379 
380  my $attrib_type_id = $self->add_attrib_code();
381 
382  $self->debug("DanioRerio Specific: Setting toplevel attributes of " .
383  "seq_regions");
384 
385  my $sth = $dbh->prepare("DELETE FROM $target.seq_region_attrib " .
386  "WHERE attrib_type_id = ?");
387  $sth->execute($attrib_type_id);
388  $sth->finish();
389 
390  $sth = $dbh->prepare("INSERT INTO $target.seq_region_attrib " .
391  ' (seq_region_id, attrib_type_id, value) ' .
392  "SELECT toplev.new_id, $attrib_type_id, 1 " .
393  "FROM $target.tmp_toplevel_map toplev ");
394 
395  $sth->execute();
396  $sth->finish();
397 }
398 
399 
400 
401 1;
SeqStoreConverter::BasicConverter
Definition: BasicConverter.pm:3