ensembl-hive  2.8.1
AnophelesGambiae.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 use strict;
21 use warnings;
22 
24 
25 package SeqStoreConverter::AnophelesGambiae;
26 
27 use vars qw(@ISA);
28 
30 
31 
32 sub create_coord_systems {
33  my $self = shift;
34 
35  $self->debug("AnophelesGambiae Specific: creating scaffold, chunk and, " .
36  "chromosome coord systems");
37 
38  my $target = $self->target();
39  my $dbh = $self->dbh();
40 
41  my $ass_def = $self->get_default_assembly();
42 
43  my @coords =
44  (['chunk', undef, 'default_version,sequence_level', 3],
45  ['chromosome', $ass_def, 'default_version', 1],
46  ["scaffold" , undef, "default_version", 2]);
47 
48  my @assembly_mappings = ("chromosome:$ass_def|chunk",
49  "chromosome:$ass_def|scaffold",
50  "scaffold|chromosome:$ass_def|chunk");
51 
52  $self->debug("Building coord_system table");
53 
54  my $sth = $dbh->prepare("INSERT INTO $target.coord_system " .
55  "(name, version, attrib,rank) VALUES (?,?,?,?)");
56 
57  my %coord_system_ids;
58 
59  foreach my $cs (@coords) {
60  $sth->execute(@$cs);
61  $coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
62  }
63  $sth->finish();
64 
65  $sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
66  "VALUES ('assembly.mapping', ?)");
67 
68  foreach my $mapping (@assembly_mappings) {
69  $sth->execute($mapping);
70  }
71 
72  $sth->finish();
73 
74  return;
75 }
76 
77 
78 sub create_seq_regions {
79  my $self = shift;
80 
81  $self->debug("AnophelesGambiae Specific: creating seq_regions");
82 
83  $self->contig_to_seq_region('chunk');
84  $self->supercontig_to_seq_region('scaffold');
85  $self->chromosome_to_seq_region();
86 }
87 
88 
89 
90 sub create_assembly {
91  my $self = shift;
92 
93  $self->debug("AnophelesGambiae Specific: loading assembly table");
94 
95  $self->assembly_contig_chromosome();
96  $self->assembly_supercontig_chromosome();
97 
98  return;
99 }
100 
101 
102 sub transfer_prediction_transcripts {
103  my $self = shift;
104 
105  my $source = $self->source();
106  my $target = $self->target();
107  my $dbh = $self->dbh();
108 
109  $self->debug("AnophelesGambiae Specific: building prediction_exon table");
110 
111  #
112  # In Anopheles the predicion transcripts were computed in chromosomal
113  # coords, so convert them to chromosomal coords and merge any adjacent
114  # exons
115  #
116 
117  my $sql =
118  "SELECT pt.prediction_transcript_id, tcm.new_id as seq_region_id, " .
119  " IF(a.contig_ori=1,(pt.contig_start+a.chr_start-a.contig_start),".
120  " (a.chr_start+a.contig_end-pt.contig_end)) as start, " .
121  " IF(a.contig_ori=1,(pt.contig_end+a.chr_start-a.contig_start)," .
122  " (a.chr_start+a.contig_end-pt.contig_start)) as end, " .
123  " a.contig_ori * pt.contig_strand as strand, " .
124  " pt.start_phase, pt.score, pt.p_value " .
125  "FROM $source.assembly a, $target.tmp_chr_map tcm, " .
126  " $source.prediction_transcript pt " .
127  "WHERE pt.contig_id = a.contig_id " .
128  "AND a.chromosome_id = tcm.old_id " .
129  "ORDER BY pt.prediction_transcript_id, exon_rank";
130 
131  my $sth = $dbh->prepare($sql);
132  $sth->execute();
133 
134  my $prev_end = undef;
135  my $prev_start = undef;
136  my $prev_id = undef;
137  my $rank = undef;
138 
139  my %prev_exon = ();
140 
141  while(my $row = $sth->fetchrow_arrayref()) {
142  my ($pt_id, $sr_id, $sr_start, $sr_end, $sr_strand, $start_phase,
143  $score, $p_value) = @$row;
144 
145  if(defined($prev_id) && ($prev_id == $pt_id)) {
146  #still in the same transcript
147 
148  if($sr_strand == 1 &&
149  defined($prev_end) && $prev_end == $sr_start-1) {
150  $self->debug("merged exon $rank in prediction_transcript $pt_id\n");
151  #adjacent exons forward strand - merge them
152  $prev_exon{'seq_region_end'} = $sr_end;
153  $prev_end = $sr_end;
154  } elsif($sr_strand == -1 &&
155  defined($prev_start) && $prev_start == $sr_end+1) {
156  $self->debug("merged exon $rank in prediction_transcript $pt_id\n");
157  #adjacent exons negative strand - merge them
158  $prev_exon{'seq_region_start'} = $sr_start;
159  $prev_start = $sr_start;
160  } else {
161  #non-adjacent exons in the same transcript - no merge
162  $rank++;
163 
164  #store the previous exon
165  $self->store_pexon(\%prev_exon);
166 
167  #make current exon the previous exon
168  %prev_exon = ('prediction_transcript_id' => $pt_id,
169  'seq_region_id' => $sr_id,
170  'seq_region_start' => $sr_start,
171  'seq_region_end' => $sr_end,
172  'seq_region_strand' => $sr_strand,
173  'start_phase' => $start_phase,
174  'score' => $score,
175  'p_value' => $p_value,
176  'rank' => $rank);
177  }
178  } else {
179  #store previous exon
180  $self->store_pexon(\%prev_exon) if(%prev_exon);
181 
182  #new ptranscript
183  $rank = 1;
184  $prev_id = $pt_id;
185  $prev_end = $sr_end;
186  $prev_start = $sr_start;
187  %prev_exon = ('prediction_transcript_id' => $pt_id,
188  'seq_region_id' => $sr_id,
189  'seq_region_start' => $sr_start,
190  'seq_region_end' => $sr_end,
191  'seq_region_strand' => $sr_strand,
192  'start_phase' => $start_phase,
193  'score' => $score,
194  'p_value' => $p_value,
195  'rank' => $rank);
196  }
197  }
198 
199  #store the very last exon in the table
200  $self->store_pexon(\%prev_exon) if(%prev_exon);
201 
202  $sth->finish();
203 
204 
205  $self->debug("AnophelesGambiae Specific: building prediction_transcript " .
206  "table");
207 
208  $dbh->do
209  ("INSERT INTO $target.prediction_transcript (prediction_transcript_id, " .
210  " seq_region_id, seq_region_start, seq_region_end, " .
211  " seq_region_strand, analysis_id ) " .
212  "SELECT pt.prediction_transcript_id, tcm.new_id as seq_region_id, " .
213  " MIN(IF(a.contig_ori=1,(pt.contig_start+a.chr_start-a.contig_start),".
214  " (a.chr_start+a.contig_end-pt.contig_end))) as start, " .
215  " MAX(IF(a.contig_ori=1,(pt.contig_end+a.chr_start-a.contig_start)," .
216  " (a.chr_start+a.contig_end-pt.contig_start))) as end, " .
217  " a.contig_ori * pt.contig_strand as strand, " .
218  " pt.analysis_id " .
219  "FROM $source.assembly a, $target.tmp_chr_map tcm, " .
220  " $source.prediction_transcript pt " .
221  "WHERE pt.contig_id = a.contig_id " .
222  "AND a.chromosome_id = tcm.old_id " .
223  "GROUP BY prediction_transcript_id");
224 
225  return;
226 }
227 
228 
229 #
230 # helper function to store prediction exon
231 #
232 sub store_pexon {
233  my $self = shift;
234 
235  my $pexon = shift;
236 
237  my $target = $self->target();
238  my $source = $self->source();
239  my $dbh = $self->dbh();
240 
241  my $store_sth = $dbh->prepare
242  ("INSERT INTO $target.prediction_exon (prediction_transcript_id, " .
243  " exon_rank, seq_region_id, seq_region_start, seq_region_end, " .
244  " seq_region_strand, start_phase, score, p_value) " .
245  "VALUES (?,?,?,?,?,?,?,?,?)");
246 
247  $store_sth->execute($pexon->{'prediction_transcript_id'},
248  $pexon->{'rank'},
249  $pexon->{'seq_region_id'},
250  $pexon->{'seq_region_start'},
251  $pexon->{'seq_region_end'},
252  $pexon->{'seq_region_strand'},
253  $pexon->{'start_phase'},
254  $pexon->{'score'},
255  $pexon->{'p_value'});
256  $store_sth->finish();
257 
258  return;
259 }
260 
261 
262 
263 1;
SeqStoreConverter::BasicConverter
Definition: BasicConverter.pm:3