3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
25 package SeqStoreConverter::CaenorhabditisBriggsae;
32 sub create_coord_systems {
35 $self->debug(
"CaenorhabditisBriggsae Specific: creating clone, scaffold," .
36 " and contig coordinate systems");
38 my $target = $self->target();
39 my $dbh = $self->dbh();
41 my $ass_def = $self->get_default_assembly();
44 ([
"scaffold" , $ass_def,
"default_version", 1 ],
45 [
'clone' , undef ,
'default_version', 2 ],
46 [
"contig" , undef ,
"default_version,sequence_level", 3]);
48 my @assembly_mappings = (
"scaffold:$ass_def|contig",
50 "scaffold:$ass_def|contig|clone");
52 $self->debug(
"Building coord_system table");
54 my $sth = $dbh->prepare(
"INSERT INTO $target.coord_system " .
55 "(name, version, attrib, rank) VALUES (?,?,?,?)");
59 foreach my $cs (@coords) {
61 $coord_system_ids{$cs->[0]} = $sth->{
'mysql_insertid'};
65 $self->debug(
"Adding assembly.mapping entries to meta table");
67 $sth = $dbh->prepare(
"INSERT INTO $target.meta(meta_key, meta_value) " .
68 "VALUES ('assembly.mapping', ?)");
70 foreach my $mapping (@assembly_mappings) {
71 $sth->execute($mapping);
81 sub create_seq_regions {
84 $self->debug(
"CaenorhabditisBriggsae Specific: creating contig, " .
85 "clone, contig and scaffold seq_regions");
87 $self->contig_to_seq_region();
88 $self->clone_to_seq_region();
89 $self->chromosome_to_seq_region(
'scaffold');
93 sub chromosome_to_seq_region {
95 my $target_cs_name = shift;
97 my $target = $self->target();
98 my $source = $self->source();
99 my $dbh = $self->dbh();
101 $target_cs_name ||=
"chromosome";
102 my $cs_id = $self->get_coord_system_id($target_cs_name);
104 $self->debug(
"CaenorhabditisBriggsae Specific: Transforming " .
105 "chromosomes into $target_cs_name seq_regions");
108 ## For consistancy with mart and v19 we need to keep chr name the same for
109 ## now, so the following section is commented out and replaced:
110 ##strip off the leading 'cb25.' from the chromosome name
111 #my $select_sth = $dbh->prepare
112 # ("SELECT chromosome_id,substring(name,6),length FROM $source.chromosome");
114 my $select_sth = $dbh->prepare
115 (
"SELECT chromosome_id,name,length FROM $source.chromosome");
118 my $insert_sth = $dbh->prepare
119 (
"INSERT INTO $target.seq_region (name, coord_system_id, length) " .
122 my $tmp_insert_sth = $dbh->prepare
123 (
"INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)");
125 $select_sth->execute();
127 my ($chrom_id, $name, $length);
128 $select_sth->bind_columns(\$chrom_id, \$name, \$length);
130 while ($select_sth->fetch()) {
131 #insert into seq_region table
132 $insert_sth->execute($name, $cs_id, $length);
133 #copy old/new mapping into temporary table
134 $tmp_insert_sth->execute($chrom_id, $insert_sth->{
'mysql_insertid'});
137 $select_sth->finish();
138 $insert_sth->finish();
139 $tmp_insert_sth->finish();
145 sub create_assembly {
148 $self->debug(
"CaenorhabditisBriggsae Specific: loading assembly data");
150 $self->assembly_contig_chromosome();
151 $self->assembly_contig_clone();
158 # Override the assembly contig clone method because the briggsae database
159 # does not have any embl_offsets
161 sub assembly_contig_clone {
164 my $target = $self->target();
165 my $source = $self->source();
166 my $dbh = $self->dbh();
169 $self->debug(
"CaenorhabditisBriggsae Specific: loading contig/clone " .
170 "assembly relationship");
172 my $asm_sth = $dbh->prepare
173 (
"INSERT INTO $target.assembly " .
174 "set asm_seq_region_id = ?, ".
177 " cmp_seq_region_id = ?, ".
182 # get a list of the contigs that have clones, their ids, and the
183 # corresponding clone ids
184 my $ctg_sth = $dbh->prepare
185 (
"SELECT ctg.name, ctg.contig_id, ctg.length, cln.new_id " .
186 "FROM $source.contig ctg, $target.tmp_cln_map cln " .
187 "WHERE ctg.name not like 'c%' " . # only contigs w/ proper accessions
188 "AND ctg.clone_id = cln.old_id");
192 my ($ctg_name, $ctg_id, $ctg_len, $cln_id);
194 $ctg_sth->bind_columns(\$ctg_name, \$ctg_id, \$ctg_len, \$cln_id);
196 while($ctg_sth->fetch()) {
197 my (undef,$cln_start, $cln_end) = split(/\./, $ctg_name);
198 my $cln_len = $cln_end - $cln_start + 1;
199 if($cln_len != $ctg_len) {
200 die(
"Contig len $ctg_len != Clone len $cln_len");
203 $asm_sth->execute($cln_id, $cln_start, $cln_end,
204 $ctg_id, 1, $ctg_len, 1);
216 # Override contig_to_seq_region and clone_to_seq_region to provide
217 # briggsae specific behaviour
220 # sub contig_to_seq_region {
222 # my $target_cs_name = shift;
224 # my $target = $self->target();
225 # my $source = $self->source();
226 # my $dbh = $self->dbh();
228 # $target_cs_name ||= 'contig';
230 # $self->debug("CaenorhabditisBriggsae Specific: Transforming contigs into " .
231 # "$target_cs_name seq_regions");
233 # my $cs_id = $self->get_coord_system_id($target_cs_name);
235 # #There are two types of contigs in briggsae:
240 # my $sth = $dbh->prepare
241 # ("INSERT INTO $target.seq_region " .
242 # "SELECT contig_id, name, $cs_id, length " .
243 # "FROM $source.contig " .
244 # "WHERE name not like 'c%'");
252 # $sth = $dbh->prepare
253 # ("INSERT INTO $target.seq_region " .
254 # "SELECT ctg.contig_id, cln.name, $cs_id, length " .
255 # "FROM $source.contig ctg, $source.clone cln " .
256 # "WHERE ctg.clone_id = cln.clone_id " .
257 # "AND ctg.name like 'c%'");
267 sub clone_to_seq_region {
269 my $target_cs_name = shift;
271 my $target = $self->target();
272 my $source = $self->source();
273 my $dbh = $self->dbh();
275 # target coord_system will have a different ID
276 $target_cs_name ||=
"clone";
277 my $cs_id = $self->get_coord_system_id($target_cs_name);
279 $self->debug(
"CaenorhabditisBriggsae Specific:Transforming clones " .
280 "into $target_cs_name seq_regions");
283 # We don't want to make clones out of the WGS contigs, only out of
284 # the clones with proper embl accessions. Also for some reason the embl_offset
285 # is not set in the briggsae 17/18/19 databases, which means we have to deduce the
286 # length from the name of the contigs!
288 my $select_sth = $dbh->prepare
289 (
"SELECT cl.clone_id,
290 CONCAT(cl.embl_acc, '.', cl.embl_version),
292 FROM $source.clone cl, $source.contig ctg
293 WHERE cl.clone_id = ctg.clone_id
294 AND cl.embl_acc not like 'c%'
295 ORDER BY cl.clone_id");
297 $select_sth->execute();
299 my ($clone_id, $embl_acc, $ctg_name);
300 $select_sth->bind_columns(\$clone_id, \$embl_acc, \$ctg_name);
302 my $highest_end = undef;
303 my $current_clone = undef;
304 my $current_clone_id = undef;
307 my $insert_sth = $dbh->prepare
308 (
"INSERT INTO $target.seq_region (name, coord_system_id, length) " .
311 my $tmp_insert_sth = $dbh->prepare
312 (
"INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?, ?)");
314 while ($select_sth->fetch()) {
315 #extract the end position of the contig
317 (undef,undef,$ctg_end) = split(/\./, $ctg_name);
319 if(!defined($current_clone)) {
320 $current_clone = $embl_acc;
321 $current_clone_id = $clone_id;
322 $highest_end = $ctg_end;
325 if($current_clone ne $embl_acc) {
326 #started new clone, store last one
328 $insert_sth->execute($current_clone, $cs_id, $highest_end);
329 #store mapping of old -> new ids in temp table
330 $tmp_insert_sth->execute($current_clone_id, $insert_sth->{
'mysql_insertid'});
332 $current_clone = $embl_acc;
333 $current_clone_id = $clone_id;
334 $highest_end = $ctg_end;
335 } elsif($ctg_end > $highest_end) {
336 #same clone, adjust end if end of contig is highest yet seen
337 $highest_end = $ctg_end;
341 #insert the last clone
342 $insert_sth->execute($current_clone, $cs_id, $highest_end);
343 $tmp_insert_sth->execute($current_clone_id, $insert_sth->{
'mysql_insertid'});
346 $select_sth->finish();
347 $insert_sth->finish();
348 $tmp_insert_sth->finish();