ensembl-hive  2.7.0
GallusGallus.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 use strict;
21 use warnings;
22 
24 
25 package SeqStoreConverter::GallusGallus;
26 
27 use vars qw(@ISA);
28 
30 
31 
32 sub create_coord_systems {
33  my $self = shift;
34 
35  $self->debug("GallusGallus Specific: loading assembly data");
36 
37  my $target = $self->target();
38  my $dbh = $self->dbh();
39 
40  my $ass_def = $self->get_default_assembly();
41 
42  my @coords =
43  (["chromosome" , $ass_def, "default_version", 1 ],
44  ["supercontig", $ass_def, "default_version", 2 ],
45  ["contig" , undef , "default_version,sequence_level", 3]);
46 
47  my @assembly_mappings = ("chromosome:$ass_def|contig",
48  "supercontig:$ass_def|contig",
49  "chromosome:$ass_def|contig|supercontig:$ass_def");
50 
51  $self->debug("Building coord_system table");
52 
53  my $sth = $dbh->prepare("INSERT INTO $target.coord_system " .
54  "(name, version, attrib, rank) VALUES (?,?,?,?)");
55 
56  my %coord_system_ids;
57 
58  foreach my $cs (@coords) {
59  $sth->execute(@$cs);
60  $coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
61  }
62  $sth->finish();
63 
64  $self->debug("Adding assembly.mapping entries to meta table");
65 
66  $sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
67  "VALUES ('assembly.mapping', ?)");
68 
69  foreach my $mapping (@assembly_mappings) {
70  $sth->execute($mapping);
71  }
72 
73  $sth->finish();
74 
75  return;
76 }
77 
78 
79 
80 sub create_seq_regions {
81  my $self = shift;
82 
83  $self->debug("GallusGallus Specific: creating contig, " .
84  "clone, chromosome and supercontig seq_regions");
85 
86  $self->contig_to_seq_region();
87  $self->chromosome_to_seq_region();
88  $self->supercontig_to_seq_region();
89 }
90 
91 #
92 # overridden to do trimming of contig names
93 #
94 sub contig_to_seq_region {
95  my $self = shift;
96  my $target_cs_name = shift;
97 
98  my $target = $self->target();
99  my $source = $self->source();
100  my $dbh = $self->dbh();
101 
102  $target_cs_name ||= 'contig';
103 
104  $self->debug("GallusGallus Specific: Transforming contigs into " .
105  "$target_cs_name seq_regions");
106 
107  my $cs_id = $self->get_coord_system_id($target_cs_name);
108 
109  # this ugly SQL simply takes the first part of the contig name
110  # but trims everything after and including the second dot
111  my $sth = $dbh->prepare
112  ("INSERT INTO $target.seq_region " .
113  "SELECT contig_id, SUBSTRING(name,1, LOCATE('.',name) + LOCATE('.',SUBSTRING(name,LOCATE('.',name)+1)) -1), $cs_id, length FROM $source.contig");
114 
115  $sth->execute();
116  $sth->finish();
117 }
118 
119 #
120 # overridden so that left over garbage in chromosome table is not used
121 #
122 sub chromosome_to_seq_region {
123  my $self = shift;
124  my $target_cs_name = shift;
125 
126  my $target = $self->target();
127  my $source = $self->source();
128  my $dbh = $self->dbh();
129 
130  $target_cs_name ||= "chromosome";
131  my $cs_id = $self->get_coord_system_id($target_cs_name);
132 
133  $self->debug("GallusGallus Specific: Transforming chromosomes into $target_cs_name seq_regions");
134 
135  # only take chromosomes which are actually in the assembly table
136  my $select_sth = $dbh->prepare
137  ("SELECT c.chromosome_id, c.name, c.length " .
138  "FROM $source.chromosome c, $source.assembly a " .
139  "WHERE c.chromosome_id = a.chromosome_id group by c.chromosome_id");
140 
141  my $insert_sth = $dbh->prepare
142  ("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
143  "VALUES (?,?,?)");
144 
145  my $tmp_insert_sth = $dbh->prepare
146  ("INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)");
147 
148  $select_sth->execute();
149 
150  my ($chrom_id, $name, $length);
151  $select_sth->bind_columns(\$chrom_id, \$name, \$length);
152 
153  while ($select_sth->fetch()) {
154  #insert into seq_region table
155  $insert_sth->execute($name, $cs_id, $length);
156  #copy old/new mapping into temporary table
157  $tmp_insert_sth->execute($chrom_id, $insert_sth->{'mysql_insertid'});
158  }
159 
160  $select_sth->finish();
161  $insert_sth->finish();
162  $tmp_insert_sth->finish();
163 
164  return;
165 }
166 
167 sub create_assembly {
168  my $self = shift;
169 
170  $self->debug("GallusGallus Specific: loading assembly data");
171  $self->assembly_contig_chromosome();
172  $self->assembly_contig_supercontig();
173 }
174 
175 
176 
177 
178 1;
SeqStoreConverter::BasicConverter
Definition: BasicConverter.pm:3