ensembl-hive  2.8.1
CaenorhabditisBriggsae.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 use strict;
21 use warnings;
22 
24 
25 package SeqStoreConverter::CaenorhabditisBriggsae;
26 
27 use vars qw(@ISA);
28 
30 
31 
32 sub create_coord_systems {
33  my $self = shift;
34 
35  $self->debug("CaenorhabditisBriggsae Specific: creating clone, scaffold," .
36  " and contig coordinate systems");
37 
38  my $target = $self->target();
39  my $dbh = $self->dbh();
40 
41  my $ass_def = $self->get_default_assembly();
42 
43  my @coords =
44  (["scaffold" , $ass_def, "default_version", 1 ],
45  ['clone' , undef , 'default_version', 2 ],
46  ["contig" , undef , "default_version,sequence_level", 3]);
47 
48  my @assembly_mappings = ("scaffold:$ass_def|contig",
49  "clone|contig",
50  "scaffold:$ass_def|contig|clone");
51 
52  $self->debug("Building coord_system table");
53 
54  my $sth = $dbh->prepare("INSERT INTO $target.coord_system " .
55  "(name, version, attrib, rank) VALUES (?,?,?,?)");
56 
57  my %coord_system_ids;
58 
59  foreach my $cs (@coords) {
60  $sth->execute(@$cs);
61  $coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
62  }
63  $sth->finish();
64 
65  $self->debug("Adding assembly.mapping entries to meta table");
66 
67  $sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
68  "VALUES ('assembly.mapping', ?)");
69 
70  foreach my $mapping (@assembly_mappings) {
71  $sth->execute($mapping);
72  }
73 
74  $sth->finish();
75 
76  return;
77 }
78 
79 
80 
81 sub create_seq_regions {
82  my $self = shift;
83 
84  $self->debug("CaenorhabditisBriggsae Specific: creating contig, " .
85  "clone, contig and scaffold seq_regions");
86 
87  $self->contig_to_seq_region();
88  $self->clone_to_seq_region();
89  $self->chromosome_to_seq_region('scaffold');
90 }
91 
92 
93 sub chromosome_to_seq_region {
94  my $self = shift;
95  my $target_cs_name = shift;
96 
97  my $target = $self->target();
98  my $source = $self->source();
99  my $dbh = $self->dbh();
100 
101  $target_cs_name ||= "chromosome";
102  my $cs_id = $self->get_coord_system_id($target_cs_name);
103 
104  $self->debug("CaenorhabditisBriggsae Specific: Transforming " .
105  "chromosomes into $target_cs_name seq_regions");
106 
107 
108  ## For consistancy with mart and v19 we need to keep chr name the same for
109  ## now, so the following section is commented out and replaced:
110  ##strip off the leading 'cb25.' from the chromosome name
111  #my $select_sth = $dbh->prepare
112  # ("SELECT chromosome_id,substring(name,6),length FROM $source.chromosome");
113 
114  my $select_sth = $dbh->prepare
115  ("SELECT chromosome_id,name,length FROM $source.chromosome");
116 
117 
118  my $insert_sth = $dbh->prepare
119  ("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
120  "VALUES (?,?,?)");
121 
122  my $tmp_insert_sth = $dbh->prepare
123  ("INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)");
124 
125  $select_sth->execute();
126 
127  my ($chrom_id, $name, $length);
128  $select_sth->bind_columns(\$chrom_id, \$name, \$length);
129 
130  while ($select_sth->fetch()) {
131  #insert into seq_region table
132  $insert_sth->execute($name, $cs_id, $length);
133  #copy old/new mapping into temporary table
134  $tmp_insert_sth->execute($chrom_id, $insert_sth->{'mysql_insertid'});
135  }
136 
137  $select_sth->finish();
138  $insert_sth->finish();
139  $tmp_insert_sth->finish();
140 
141  return;
142 }
143 
144 
145 sub create_assembly {
146  my $self = shift;
147 
148  $self->debug("CaenorhabditisBriggsae Specific: loading assembly data");
149 
150  $self->assembly_contig_chromosome();
151  $self->assembly_contig_clone();
152 }
153 
154 
155 
156 
157 #
158 # Override the assembly contig clone method because the briggsae database
159 # does not have any embl_offsets
160 #
161 sub assembly_contig_clone {
162  my $self = shift;
163 
164  my $target = $self->target();
165  my $source = $self->source();
166  my $dbh = $self->dbh();
167 
168 
169  $self->debug("CaenorhabditisBriggsae Specific: loading contig/clone " .
170  "assembly relationship");
171 
172  my $asm_sth = $dbh->prepare
173  ("INSERT INTO $target.assembly " .
174  "set asm_seq_region_id = ?, ".
175  " asm_start = ?, " .
176  " asm_end = ?, " .
177  " cmp_seq_region_id = ?, ".
178  " cmp_start = ?, " .
179  " cmp_end = ?, " .
180  " ori = ?");
181 
182  # get a list of the contigs that have clones, their ids, and the
183  # corresponding clone ids
184  my $ctg_sth = $dbh->prepare
185  ("SELECT ctg.name, ctg.contig_id, ctg.length, cln.new_id " .
186  "FROM $source.contig ctg, $target.tmp_cln_map cln " .
187  "WHERE ctg.name not like 'c%' " . # only contigs w/ proper accessions
188  "AND ctg.clone_id = cln.old_id");
189 
190  $ctg_sth->execute();
191 
192  my ($ctg_name, $ctg_id, $ctg_len, $cln_id);
193 
194  $ctg_sth->bind_columns(\$ctg_name, \$ctg_id, \$ctg_len, \$cln_id);
195 
196  while($ctg_sth->fetch()) {
197  my (undef,$cln_start, $cln_end) = split(/\./, $ctg_name);
198  my $cln_len = $cln_end - $cln_start + 1;
199  if($cln_len != $ctg_len) {
200  die("Contig len $ctg_len != Clone len $cln_len");
201  }
202 
203  $asm_sth->execute($cln_id, $cln_start, $cln_end,
204  $ctg_id, 1, $ctg_len, 1);
205  }
206 
207  $ctg_sth->finish();
208  $asm_sth->finish();
209 
210  return;
211 }
212 
213 
214 
215 #
216 # Override contig_to_seq_region and clone_to_seq_region to provide
217 # briggsae specific behaviour
218 #
219 
220 # sub contig_to_seq_region {
221 # my $self = shift;
222 # my $target_cs_name = shift;
223 
224 # my $target = $self->target();
225 # my $source = $self->source();
226 # my $dbh = $self->dbh();
227 
228 # $target_cs_name ||= 'contig';
229 
230 # $self->debug("CaenorhabditisBriggsae Specific: Transforming contigs into " .
231 # "$target_cs_name seq_regions");
232 
233 # my $cs_id = $self->get_coord_system_id($target_cs_name);
234 
235 # #There are two types of contigs in briggsae:
236 
237 # #
238 # # cosmids/clones
239 # #
240 # my $sth = $dbh->prepare
241 # ("INSERT INTO $target.seq_region " .
242 # "SELECT contig_id, name, $cs_id, length " .
243 # "FROM $source.contig " .
244 # "WHERE name not like 'c%'");
245 
246 # $sth->execute();
247 # $sth->finish();
248 
249 # #
250 # # WGS contigs
251 # #
252 # $sth = $dbh->prepare
253 # ("INSERT INTO $target.seq_region " .
254 # "SELECT ctg.contig_id, cln.name, $cs_id, length " .
255 # "FROM $source.contig ctg, $source.clone cln " .
256 # "WHERE ctg.clone_id = cln.clone_id " .
257 # "AND ctg.name like 'c%'");
258 
259 # $sth->execute();
260 # $sth->finish();
261 
262 # return;
263 # }
264 
265 
266 
267 sub clone_to_seq_region {
268  my $self = shift;
269  my $target_cs_name = shift;
270 
271  my $target = $self->target();
272  my $source = $self->source();
273  my $dbh = $self->dbh();
274 
275  # target coord_system will have a different ID
276  $target_cs_name ||= "clone";
277  my $cs_id = $self->get_coord_system_id($target_cs_name);
278 
279  $self->debug("CaenorhabditisBriggsae Specific:Transforming clones " .
280  "into $target_cs_name seq_regions");
281 
282  #
283  # We don't want to make clones out of the WGS contigs, only out of
284  # the clones with proper embl accessions. Also for some reason the embl_offset
285  # is not set in the briggsae 17/18/19 databases, which means we have to deduce the
286  # length from the name of the contigs!
287  #
288  my $select_sth = $dbh->prepare
289  ("SELECT cl.clone_id,
290  CONCAT(cl.embl_acc, '.', cl.embl_version),
291  ctg.name
292  FROM $source.clone cl, $source.contig ctg
293  WHERE cl.clone_id = ctg.clone_id
294  AND cl.embl_acc not like 'c%'
295  ORDER BY cl.clone_id");
296 
297  $select_sth->execute();
298 
299  my ($clone_id, $embl_acc, $ctg_name);
300  $select_sth->bind_columns(\$clone_id, \$embl_acc, \$ctg_name);
301 
302  my $highest_end = undef;
303  my $current_clone = undef;
304  my $current_clone_id = undef;
305  my $length;
306 
307  my $insert_sth = $dbh->prepare
308  ("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
309  "VALUES(?,?,?)");
310 
311  my $tmp_insert_sth = $dbh->prepare
312  ("INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?, ?)");
313 
314  while ($select_sth->fetch()) {
315  #extract the end position of the contig
316  my $ctg_end;
317  (undef,undef,$ctg_end) = split(/\./, $ctg_name);
318 
319  if(!defined($current_clone)) {
320  $current_clone = $embl_acc;
321  $current_clone_id = $clone_id;
322  $highest_end = $ctg_end;
323  }
324 
325  if($current_clone ne $embl_acc) {
326  #started new clone, store last one
327 
328  $insert_sth->execute($current_clone, $cs_id, $highest_end);
329  #store mapping of old -> new ids in temp table
330  $tmp_insert_sth->execute($current_clone_id, $insert_sth->{'mysql_insertid'});
331 
332  $current_clone = $embl_acc;
333  $current_clone_id = $clone_id;
334  $highest_end = $ctg_end;
335  } elsif($ctg_end > $highest_end) {
336  #same clone, adjust end if end of contig is highest yet seen
337  $highest_end = $ctg_end;
338  }
339  }
340 
341  #insert the last clone
342  $insert_sth->execute($current_clone, $cs_id, $highest_end);
343  $tmp_insert_sth->execute($current_clone_id, $insert_sth->{'mysql_insertid'});
344 
345 
346  $select_sth->finish();
347  $insert_sth->finish();
348  $tmp_insert_sth->finish();
349 
350  return;
351 }
352 
353 
354 1;
SeqStoreConverter::BasicConverter
Definition: BasicConverter.pm:3