3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
21 package XrefMapper::eukaryota;
32 Overrides the
default exonerate method and non
default methods which should be used
for
39 my $default_method =
'ExonerateGappedBest1';
40 my %override_method_for_source = (
41 ExonerateGappedBest5 => [
'RefSeq_mRNA',
'RefSeq_mRNA_predicted',
'RefSeq_ncRNA',
'RefSeq_ncRNA_predicted' ],
44 return $default_method, \%override_method_for_source;
48 =head2 gene_display_xref_sources
50 Overrides the list of sources to use
for assigning gene names
54 sub gene_display_xref_sources {
57 print STDERR
"getting the list of external_dbs for assigning gene names from eukaryota.pm\n";
84 #don't use EntrezGene labels dependent on predicted RefSeqs
86 $ignore{
'EntrezGene'} =<<IEG;
87 SELECT DISTINCT ox.object_xref_id
88 FROM object_xref ox, dependent_xref dx,
90 source smas, source sdep
91 WHERE ox.xref_id = dx.dependent_xref_id AND
92 dx.dependent_xref_id = xdep.xref_id AND
93 dx.master_xref_id = xmas.xref_id AND
94 xmas.source_id = smas.source_id AND
95 xdep.source_id = sdep.source_id AND
96 smas.name like
"Refseq%predicted" AND
97 sdep.name like
"EntrezGene" AND
98 ox.ox_status =
"DUMP_OUT" AND
99 ox.master_xref_id = dx.master_xref_id
102 #don't use labels starting with LOC
104 $ignore{
'LOC_prefix'} =<<LOCP;
105 SELECT object_xref_id
106 FROM object_xref JOIN xref USING(xref_id) JOIN source USING(source_id)
107 WHERE ox_status =
'DUMP_OUT' AND label REGEXP
'^LOC[[:digit:]]+'
110 return [\@list,\%ignore];
114 =head2 transcript_display_xref_sources
116 Overrides the list of sources to use
for assigning
transcript names
120 sub transcript_display_xref_sources {
123 print STDERR
"getting the list of external_dbs for assigning transcript names from eukaryota.pm\n";
129 Uniprot_gn_trans_name
149 #don't use EntrezGene labels dependent on predicted RefSeqs
151 $ignore{
'EntrezGene'} =<<IEG;
152 SELECT DISTINCT ox.object_xref_id
153 FROM object_xref ox, dependent_xref dx,
154 xref xmas, xref xdep,
155 source smas, source sdep
156 WHERE ox.xref_id = dx.dependent_xref_id AND
157 dx.dependent_xref_id = xdep.xref_id AND
158 dx.master_xref_id = xmas.xref_id AND
159 xmas.source_id = smas.source_id AND
160 xdep.source_id = sdep.source_id AND
161 smas.name like
"Refseq%predicted" AND
162 sdep.name like
"EntrezGene" AND
163 ox.ox_status =
"DUMP_OUT" AND
164 ox.master_xref_id = dx.master_xref_id
167 #don't use labels starting with LOC
169 $ignore{
'LOC_prefix'} =<<LOCP;
170 SELECT object_xref_id
171 FROM object_xref JOIN xref USING(xref_id) JOIN source USING(source_id)
172 WHERE ox_status =
'DUMP_OUT' AND label REGEXP
'^LOC[[:digit:]]+'
175 return [\@list,\%ignore];
179 =head2 gene_description_sources
181 Overrides the list of external_db entries to use
for assigning gene descriptions
185 sub gene_description_sources {
189 "PomBase_TRANSCRIPT",
195 "BROAD_G_moniliformis",
202 "BROAD_Magnaporthe_DB",
204 "PHYTOZOME_GMAX_GENE",
212 =head2 transcript_names_from_gene
214 Overrides the
transcript names logic assignment from gene names
215 Avoid adding
'-\d+' suffix to any of them
220 sub transcript_names_from_gene {
223 print
"Assigning transcript names from gene names\n" if ($self->verbose);
225 my $reset_sth = $self->core->dbc->prepare(
"UPDATE transcript SET display_xref_id = null");
226 $reset_sth->execute();
229 my $xref_id_sth = $self->core->dbc->prepare(
"SELECT max(xref_id) FROM xref");
230 my $ox_id_sth = $self->core->dbc->prepare(
"SELECT max(object_xref_id) FROM object_xref");
231 my $del_xref_sth = $self->core->dbc->prepare(
"DELETE x FROM xref x, object_xref ox WHERE x.xref_id = ox.xref_id AND ensembl_object_type = 'Transcript' AND display_label REGEXP '-2[0-9]{2}\$'");
232 my $reuse_xref_sth = $self->core->dbc->prepare(
"SELECT xref_id FROM xref x WHERE external_db_id = ? AND display_label = ? AND version = 0 AND description = ? AND info_type = 'MISC' AND info_text = 'via gene name'");
233 my $del_ox_sth = $self->core->dbc->prepare(
"DELETE ox FROM object_xref ox LEFT JOIN xref x ON x.xref_id = ox.xref_id WHERE isnull(x.xref_id)");
234 my $ins_xref_sth = $self->core->dbc->prepare(
"INSERT IGNORE into xref (xref_id, external_db_id, dbprimary_acc, display_label, version, description, info_type, info_text) values(?, ?, ?, ?, 0, ?, 'MISC', 'via gene name')");
235 my $ins_ox_sth = $self->core->dbc->prepare(
"INSERT into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id) values(?, ?, 'Transcript', ?)");
236 my $update_tran_sth = $self->core->dbc->prepare(
"UPDATE transcript t SET t.display_xref_id= ? WHERE t.transcript_id=?");
238 my $get_genes = $self->core->dbc->prepare(
"SELECT g.gene_id, e.db_name, x.dbprimary_acc, x.display_label, x.description FROM gene g, xref x, external_db e where g.display_xref_id = x.xref_id and e.external_db_id = x.external_db_id");
239 my $get_transcripts = $self->core->dbc->prepare(
"SELECT transcript_id FROM transcript WHERE gene_id = ? ORDER BY seq_region_start, seq_region_end");
240 my $get_source_id = $self->core->dbc->prepare(
"SELECT external_db_id FROM external_db WHERE db_name like ?");
242 $get_genes->execute();
243 my ($gene_id, $external_db, $external_db_id, $acc, $label, $description, $transcript_id, $xref_id, $ox_id, $ext, $reuse_xref_id);
244 $get_genes->bind_columns(\$gene_id, \$external_db, \$acc, \$label, \$description);
245 $xref_id_sth->execute();
246 $xref_id_sth->bind_columns(\$xref_id);
247 $xref_id_sth->fetch();
248 $ox_id_sth->execute();
249 $ox_id_sth->bind_columns(\$ox_id);
251 $del_xref_sth->execute();
252 while ($get_genes->fetch()) {
255 $get_source_id->execute($external_db .
"_trans_name");
256 $get_source_id->bind_columns(\$external_db_id);
257 $get_source_id->fetch();
258 $get_transcripts->execute($gene_id);
259 $get_transcripts->bind_columns(\$transcript_id);
260 while ($get_transcripts->fetch) {
264 $reuse_xref_sth->execute($external_db_id, $label .
'-' . $ext, $description);
267 $reuse_xref_sth->execute($external_db_id, $label, $description);
269 $reuse_xref_sth->bind_columns(\$reuse_xref_id);
270 if ($reuse_xref_sth->fetch()) {
271 $ins_ox_sth->execute($ox_id, $transcript_id, $reuse_xref_id);
272 $update_tran_sth->execute($reuse_xref_id, $transcript_id);
275 $ins_xref_sth->execute($xref_id, $external_db_id, $label.
"-" . $ext, $label .
"-" . $ext, $description);
278 $ins_xref_sth->execute($xref_id, $external_db_id, $label, $label, $description);
280 $ins_ox_sth->execute($ox_id, $transcript_id, $xref_id);
281 $update_tran_sth->execute($xref_id, $transcript_id);
287 $del_xref_sth->finish();
288 $del_ox_sth->execute();
289 $del_ox_sth->finish();
290 $reuse_xref_sth->finish();
291 $xref_id_sth->finish();
292 $ox_id_sth->finish();
293 $get_genes->finish();
294 $get_source_id->finish();
295 $get_transcripts->finish();
296 $ins_xref_sth->finish();
297 $ins_ox_sth->finish();
298 $update_tran_sth->finish();