ensembl-hive  2.8.1
MusMusculus.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 use strict;
21 use warnings;
22 
24 
25 package SeqStoreConverter::MusMusculus;
26 
27 use vars qw(@ISA);
28 
30 
31 
32 sub create_coord_systems {
33  my $self = shift;
34 
35  $self->debug("MusMusculus Specific: loading assembly data");
36 
37  my $target = $self->target();
38  my $dbh = $self->dbh();
39 
40  my $ass_def = $self->get_default_assembly();
41 
42  my @coords =
43  (["chromosome" , $ass_def, "default_version" ,1 ],
44  ["supercontig", undef , "default_version" ,2 ],
45  ['clone' , undef , 'default_version' ,3 ],
46  ["contig" , undef , "default_version,sequence_level",4]);
47 
48  my @assembly_mappings = ("chromosome:$ass_def|contig",
49  "supercontig|contig",
50  "clone|contig",
51  "chromosome:$ass_def|contig|clone",
52  "chromosome:$ass_def|contig|supercontig",
53  "supercontig|contig|clone");
54 
55  $self->debug("Building coord_system table");
56 
57  my $sth = $dbh->prepare("INSERT INTO $target.coord_system " .
58  "(name, version, attrib, rank) VALUES (?,?,?,?)");
59 
60  my %coord_system_ids;
61 
62  foreach my $cs (@coords) {
63  $sth->execute(@$cs);
64  $coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
65  }
66  $sth->finish();
67 
68  $self->debug("Adding assembly.mapping entries to meta table");
69 
70  $sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
71  "VALUES ('assembly.mapping', ?)");
72 
73  foreach my $mapping (@assembly_mappings) {
74  $sth->execute($mapping);
75  }
76 
77  $sth->finish();
78 
79  return;
80 }
81 
82 
83 
84 sub create_seq_regions {
85  my $self = shift;
86 
87  $self->debug("MusMusculus Specific: creating contig, " .
88  "clone, chromosome and supercontig seq_regions");
89 
90  $self->contig_to_seq_region();
91  $self->clone_to_seq_region();
92  $self->chromosome_to_seq_region();
93  $self->supercontig_to_seq_region();
94 }
95 
96 sub create_assembly {
97  my $self = shift;
98 
99  $self->debug("MusMusculus Specific: loading assembly data");
100 
101  $self->assembly_contig_chromosome();
102  $self->assembly_contig_clone();
103  $self->assembly_contig_supercontig();
104 }
105 
106 #
107 # Override contig_to_seq_region and clone_to_seq_region to provide
108 # mouse specific behaviour
109 #
110 
111 # sub contig_to_seq_region {
112 # my $self = shift;
113 # my $target_cs_name = shift;
114 
115 # my $target = $self->target();
116 # my $source = $self->source();
117 # my $dbh = $self->dbh();
118 
119 # $target_cs_name ||= 'contig';
120 
121 # $self->debug("MusMusculus Specific: Transforming contigs into " .
122 # "$target_cs_name seq_regions");
123 
124 # my $cs_id = $self->get_coord_system_id($target_cs_name);
125 
126 # #There are two types of contigs in mouse:
127 
128 # #
129 # # Contigs which form BAC clones
130 # #
131 # my $sth = $dbh->prepare
132 # ("INSERT INTO $target.seq_region " .
133 # "SELECT contig_id, name, $cs_id, length " .
134 # "FROM $source.contig " .
135 # "WHERE name not like 'C%'");
136 
137 # $sth->execute();
138 # $sth->finish();
139 
140 # #
141 # # Contigs which were created from whole genome shotgun
142 # #
143 # $sth = $dbh->prepare
144 # ("INSERT INTO $target.seq_region " .
145 # "SELECT ctg.contig_id, cln.name, $cs_id, length " .
146 # "FROM $source.contig ctg, $source.clone cln " .
147 # "WHERE ctg.clone_id = cln.clone_id " .
148 # "AND ctg.name like 'C%'");
149 
150 # $sth->execute();
151 # $sth->finish();
152 
153 # return;
154 # }
155 
156 
157 
158 sub clone_to_seq_region {
159  my $self = shift;
160  my $target_cs_name = shift;
161 
162  my $target = $self->target();
163  my $source = $self->source();
164  my $dbh = $self->dbh();
165 
166  # target coord_system will have a different ID
167  $target_cs_name ||= "clone";
168  my $cs_id = $self->get_coord_system_id($target_cs_name);
169 
170  $self->debug("MusMusculus Specific:Transforming clones " .
171  "into $target_cs_name seq_regions");
172 
173  #
174  # We don't want to make clones out of the WGS contigs, only out of
175  # the actual BACs with proper embl accessions
176  #
177  my $select_sth = $dbh->prepare
178  ("SELECT cl.clone_id,
179  CONCAT(cl.embl_acc, '.', cl.embl_version),
180  MAX(ctg.embl_offset+ctg.length-1)
181  FROM $source.clone cl, $source.contig ctg
182  WHERE cl.clone_id = ctg.clone_id
183  AND cl.embl_acc not like 'C%'
184  GROUP BY ctg.clone_id");
185 
186  $select_sth->execute();
187 
188  my ($clone_id, $embl_acc, $length);
189  $select_sth->bind_columns(\$clone_id, \$embl_acc, \$length);
190 
191  my $insert_sth = $dbh->prepare
192  ("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
193  "VALUES(?,?,?)");
194 
195  my $tmp_insert_sth = $dbh->prepare
196  ("INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?, ?)");
197 
198  while ($select_sth->fetch()) {
199  $insert_sth->execute("$embl_acc", $cs_id, $length);
200 
201  #store mapping of old -> new ids in temp table
202  $tmp_insert_sth->execute($clone_id, $insert_sth->{'mysql_insertid'});
203  }
204 
205  $select_sth->finish();
206  $insert_sth->finish();
207  $tmp_insert_sth->finish();
208 
209  return;
210 }
211 
212 
213 1;
SeqStoreConverter::BasicConverter
Definition: BasicConverter.pm:3