3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
25 package SeqStoreConverter::AnophelesGambiae;
32 sub create_coord_systems {
35 $self->debug(
"AnophelesGambiae Specific: creating scaffold, chunk and, " .
36 "chromosome coord systems");
38 my $target = $self->target();
39 my $dbh = $self->dbh();
41 my $ass_def = $self->get_default_assembly();
44 ([
'chunk', undef,
'default_version,sequence_level', 3],
45 [
'chromosome', $ass_def,
'default_version', 1],
46 [
"scaffold" , undef,
"default_version", 2]);
48 my @assembly_mappings = (
"chromosome:$ass_def|chunk",
49 "chromosome:$ass_def|scaffold",
50 "scaffold|chromosome:$ass_def|chunk");
52 $self->debug(
"Building coord_system table");
54 my $sth = $dbh->prepare(
"INSERT INTO $target.coord_system " .
55 "(name, version, attrib,rank) VALUES (?,?,?,?)");
59 foreach my $cs (@coords) {
61 $coord_system_ids{$cs->[0]} = $sth->{
'mysql_insertid'};
65 $sth = $dbh->prepare(
"INSERT INTO $target.meta(meta_key, meta_value) " .
66 "VALUES ('assembly.mapping', ?)");
68 foreach my $mapping (@assembly_mappings) {
69 $sth->execute($mapping);
78 sub create_seq_regions {
81 $self->debug(
"AnophelesGambiae Specific: creating seq_regions");
83 $self->contig_to_seq_region(
'chunk');
84 $self->supercontig_to_seq_region(
'scaffold');
85 $self->chromosome_to_seq_region();
93 $self->debug(
"AnophelesGambiae Specific: loading assembly table");
95 $self->assembly_contig_chromosome();
96 $self->assembly_supercontig_chromosome();
102 sub transfer_prediction_transcripts {
105 my $source = $self->source();
106 my $target = $self->target();
107 my $dbh = $self->dbh();
109 $self->debug(
"AnophelesGambiae Specific: building prediction_exon table");
112 # In Anopheles the predicion transcripts were computed in chromosomal
113 # coords, so convert them to chromosomal coords and merge any adjacent
118 "SELECT pt.prediction_transcript_id, tcm.new_id as seq_region_id, " .
119 " IF(a.contig_ori=1,(pt.contig_start+a.chr_start-a.contig_start),".
120 " (a.chr_start+a.contig_end-pt.contig_end)) as start, " .
121 " IF(a.contig_ori=1,(pt.contig_end+a.chr_start-a.contig_start)," .
122 " (a.chr_start+a.contig_end-pt.contig_start)) as end, " .
123 " a.contig_ori * pt.contig_strand as strand, " .
124 " pt.start_phase, pt.score, pt.p_value " .
125 "FROM $source.assembly a, $target.tmp_chr_map tcm, " .
126 " $source.prediction_transcript pt " .
127 "WHERE pt.contig_id = a.contig_id " .
128 "AND a.chromosome_id = tcm.old_id " .
129 "ORDER BY pt.prediction_transcript_id, exon_rank";
131 my $sth = $dbh->prepare($sql);
134 my $prev_end = undef;
135 my $prev_start = undef;
141 while(my $row = $sth->fetchrow_arrayref()) {
142 my ($pt_id, $sr_id, $sr_start, $sr_end, $sr_strand, $start_phase,
143 $score, $p_value) = @$row;
145 if(defined($prev_id) && ($prev_id == $pt_id)) {
146 #still in the same transcript
148 if($sr_strand == 1 &&
149 defined($prev_end) && $prev_end == $sr_start-1) {
150 $self->debug(
"merged exon $rank in prediction_transcript $pt_id\n");
151 #adjacent exons forward strand - merge them
152 $prev_exon{
'seq_region_end'} = $sr_end;
154 } elsif($sr_strand == -1 &&
155 defined($prev_start) && $prev_start == $sr_end+1) {
156 $self->debug(
"merged exon $rank in prediction_transcript $pt_id\n");
157 #adjacent exons negative strand - merge them
158 $prev_exon{
'seq_region_start'} = $sr_start;
159 $prev_start = $sr_start;
161 #non-adjacent exons in the same transcript - no merge
164 #store the previous exon
165 $self->store_pexon(\%prev_exon);
167 #make current exon the previous exon
168 %prev_exon = (
'prediction_transcript_id' => $pt_id,
169 'seq_region_id' => $sr_id,
170 'seq_region_start' => $sr_start,
171 'seq_region_end' => $sr_end,
172 'seq_region_strand' => $sr_strand,
173 'start_phase' => $start_phase,
175 'p_value' => $p_value,
180 $self->store_pexon(\%prev_exon)
if(%prev_exon);
186 $prev_start = $sr_start;
187 %prev_exon = (
'prediction_transcript_id' => $pt_id,
188 'seq_region_id' => $sr_id,
189 'seq_region_start' => $sr_start,
190 'seq_region_end' => $sr_end,
191 'seq_region_strand' => $sr_strand,
192 'start_phase' => $start_phase,
194 'p_value' => $p_value,
199 #store the very last exon in the table
200 $self->store_pexon(\%prev_exon)
if(%prev_exon);
205 $self->debug(
"AnophelesGambiae Specific: building prediction_transcript " .
209 (
"INSERT INTO $target.prediction_transcript (prediction_transcript_id, " .
210 " seq_region_id, seq_region_start, seq_region_end, " .
211 " seq_region_strand, analysis_id ) " .
212 "SELECT pt.prediction_transcript_id, tcm.new_id as seq_region_id, " .
213 " MIN(IF(a.contig_ori=1,(pt.contig_start+a.chr_start-a.contig_start),".
214 " (a.chr_start+a.contig_end-pt.contig_end))) as start, " .
215 " MAX(IF(a.contig_ori=1,(pt.contig_end+a.chr_start-a.contig_start)," .
216 " (a.chr_start+a.contig_end-pt.contig_start))) as end, " .
217 " a.contig_ori * pt.contig_strand as strand, " .
219 "FROM $source.assembly a, $target.tmp_chr_map tcm, " .
220 " $source.prediction_transcript pt " .
221 "WHERE pt.contig_id = a.contig_id " .
222 "AND a.chromosome_id = tcm.old_id " .
223 "GROUP BY prediction_transcript_id");
230 # helper function to store prediction exon
237 my $target = $self->target();
238 my $source = $self->source();
239 my $dbh = $self->dbh();
241 my $store_sth = $dbh->prepare
242 (
"INSERT INTO $target.prediction_exon (prediction_transcript_id, " .
243 " exon_rank, seq_region_id, seq_region_start, seq_region_end, " .
244 " seq_region_strand, start_phase, score, p_value) " .
245 "VALUES (?,?,?,?,?,?,?,?,?)");
247 $store_sth->execute($pexon->{
'prediction_transcript_id'},
249 $pexon->{
'seq_region_id'},
250 $pexon->{
'seq_region_start'},
251 $pexon->{
'seq_region_end'},
252 $pexon->{
'seq_region_strand'},
253 $pexon->{
'start_phase'},
255 $pexon->{
'p_value'});
256 $store_sth->finish();