ensembl-hive  2.7.0
eukaryota.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 package XrefMapper::eukaryota;
22 
24 
25 use vars qw(@ISA);
26 
27 @ISA = qw(XrefMapper::BasicMapper);
28 
29 
30 =head2 set_methods
31 
32  Overrides the default exonerate method and non default methods which should be used for
33  one or more sources.
34 
35 =cut
36 
37 sub set_methods{
38 
39  my $default_method = 'ExonerateGappedBest1';
40  my %override_method_for_source = (
41  ExonerateGappedBest5 => ['RefSeq_mRNA','RefSeq_mRNA_predicted', 'RefSeq_ncRNA', 'RefSeq_ncRNA_predicted' ],
42  );
43 
44  return $default_method, \%override_method_for_source;
45 }
46 
47 
48 =head2 gene_display_xref_sources
49 
50  Overrides the list of sources to use for assigning gene names
51 
52 =cut
53 
54 sub gene_display_xref_sources {
55  my $self = shift;
56 
57  print STDERR "getting the list of external_dbs for assigning gene names from eukaryota.pm\n";
58 
59  my @list = qw(
60  TAIR_SYMBOL
61  RFAM
62  RNAMMER
63  TRNASCAN_SE
64  Uniprot_gn
65  ENA_GENE
66  BROAD_U_maydis
67  BROAD_F_oxysporum
68  BROAD_G_zeae
69  BROAD_G_moniliformis
70  BROAD_P_infestans
71  phyra_jgi_v1.1
72  physo1_jgi_v1.1
73  phatr_jgi_v2
74  phatr_jgi_v2_bd
75  PGD_GENE
76  Mycgr3_jgi_v2.0_gene
77  BROAD_Magnaporthe_DB
78  PHYTOZOME_GMAX_GENE
79  );
80 
81  my %ignore;
82 
83 
84  #don't use EntrezGene labels dependent on predicted RefSeqs
85 
86  $ignore{'EntrezGene'} =<<IEG;
87 SELECT DISTINCT ox.object_xref_id
88  FROM object_xref ox, dependent_xref dx,
89  xref xmas, xref xdep,
90  source smas, source sdep
91  WHERE ox.xref_id = dx.dependent_xref_id AND
92  dx.dependent_xref_id = xdep.xref_id AND
93  dx.master_xref_id = xmas.xref_id AND
94  xmas.source_id = smas.source_id AND
95  xdep.source_id = sdep.source_id AND
96  smas.name like "Refseq%predicted" AND
97  sdep.name like "EntrezGene" AND
98  ox.ox_status = "DUMP_OUT" AND
99  ox.master_xref_id = dx.master_xref_id
100 IEG
101 
102  #don't use labels starting with LOC
103 
104  $ignore{'LOC_prefix'} =<<LOCP;
105 SELECT object_xref_id
106  FROM object_xref JOIN xref USING(xref_id) JOIN source USING(source_id)
107  WHERE ox_status = 'DUMP_OUT' AND label REGEXP '^LOC[[:digit:]]+'
108 LOCP
109 
110  return [\@list,\%ignore];
111 }
112 
113 
114 =head2 transcript_display_xref_sources
115 
116  Overrides the list of sources to use for assigning transcript names
117 
118 =cut
119 
120 sub transcript_display_xref_sources {
121  my $self = shift;
122 
123  print STDERR "getting the list of external_dbs for assigning transcript names from eukaryota.pm\n";
124 
125  my @list = qw(
126  RFAM
127  RNAMMER
128  TRNASCAN_SE
129  Uniprot_gn_trans_name
130  ENA_GENE
131  BROAD_U_maydis
132  BROAD_F_oxysporum
133  BROAD_G_zeae
134  BROAD_G_moniliformis
135  BROAD_P_infestans
136  phyra_jgi_v1.1
137  physo1_jgi_v1.1
138  phatr_jgi_v2
139  phatr_jgi_v2_bd
140  PGD_GENE
141  Mycgr3_jgi_v2.0_gene
142  BROAD_Magnaporthe_DB
143  PHYTOZOME_GMAX_GENE
144  );
145 
146  my %ignore;
147 
148 
149  #don't use EntrezGene labels dependent on predicted RefSeqs
150 
151  $ignore{'EntrezGene'} =<<IEG;
152 SELECT DISTINCT ox.object_xref_id
153  FROM object_xref ox, dependent_xref dx,
154  xref xmas, xref xdep,
155  source smas, source sdep
156  WHERE ox.xref_id = dx.dependent_xref_id AND
157  dx.dependent_xref_id = xdep.xref_id AND
158  dx.master_xref_id = xmas.xref_id AND
159  xmas.source_id = smas.source_id AND
160  xdep.source_id = sdep.source_id AND
161  smas.name like "Refseq%predicted" AND
162  sdep.name like "EntrezGene" AND
163  ox.ox_status = "DUMP_OUT" AND
164  ox.master_xref_id = dx.master_xref_id
165 IEG
166 
167  #don't use labels starting with LOC
168 
169  $ignore{'LOC_prefix'} =<<LOCP;
170 SELECT object_xref_id
171  FROM object_xref JOIN xref USING(xref_id) JOIN source USING(source_id)
172  WHERE ox_status = 'DUMP_OUT' AND label REGEXP '^LOC[[:digit:]]+'
173 LOCP
174 
175  return [\@list,\%ignore];
176 }
177 
178 
179 =head2 gene_description_sources
180 
181  Overrides the list of external_db entries to use for assigning gene descriptions
182 
183 =cut
184 
185 sub gene_description_sources {
186  return (
187  "TAIR_LOCUS",
188  "PomBase_GENE",
189  "PomBase_TRANSCRIPT",
190  "Uniprot/SWISSPROT",
191  "Uniprot/SPTREMBL",
192  "BROAD_U_maydis",
193  "BROAD_F_oxysporum",
194  "BROAD_G_zeae",
195  "BROAD_G_moniliformis",
196  "BROAD_P_infestans",
197  "phyra_jgi_v1.1",
198  "physo1_jgi_v1.1",
199  "phatr_jgi_v2",
200  "phatr_jgi_v2_bd",
201  "PGD_GENE",
202  "BROAD_Magnaporthe_DB",
203  "PGSC_GENE",
204  "PHYTOZOME_GMAX_GENE",
205  "RFAM",
206  "TRNASCAN_SE",
207  "RNAMMER",
208  );
209 }
210 
211 
212 =head2 transcript_names_from_gene
213 
214  Overrides the transcript names logic assignment from gene names
215  Avoid adding '-\d+' suffix to any of them
216 
217 =cut
218 
219 
220 sub transcript_names_from_gene {
221  my $self = shift;
222 
223  print "Assigning transcript names from gene names\n" if ($self->verbose);
224 
225  my $reset_sth = $self->core->dbc->prepare("UPDATE transcript SET display_xref_id = null");
226  $reset_sth->execute();
227  $reset_sth->finish;
228 
229  my $xref_id_sth = $self->core->dbc->prepare("SELECT max(xref_id) FROM xref");
230  my $ox_id_sth = $self->core->dbc->prepare("SELECT max(object_xref_id) FROM object_xref");
231  my $del_xref_sth = $self->core->dbc->prepare("DELETE x FROM xref x, object_xref ox WHERE x.xref_id = ox.xref_id AND ensembl_object_type = 'Transcript' AND display_label REGEXP '-2[0-9]{2}\$'");
232  my $reuse_xref_sth = $self->core->dbc->prepare("SELECT xref_id FROM xref x WHERE external_db_id = ? AND display_label = ? AND version = 0 AND description = ? AND info_type = 'MISC' AND info_text = 'via gene name'");
233  my $del_ox_sth = $self->core->dbc->prepare("DELETE ox FROM object_xref ox LEFT JOIN xref x ON x.xref_id = ox.xref_id WHERE isnull(x.xref_id)");
234  my $ins_xref_sth = $self->core->dbc->prepare("INSERT IGNORE into xref (xref_id, external_db_id, dbprimary_acc, display_label, version, description, info_type, info_text) values(?, ?, ?, ?, 0, ?, 'MISC', 'via gene name')");
235  my $ins_ox_sth = $self->core->dbc->prepare("INSERT into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id) values(?, ?, 'Transcript', ?)");
236  my $update_tran_sth = $self->core->dbc->prepare("UPDATE transcript t SET t.display_xref_id= ? WHERE t.transcript_id=?");
237 
238  my $get_genes = $self->core->dbc->prepare("SELECT g.gene_id, e.db_name, x.dbprimary_acc, x.display_label, x.description FROM gene g, xref x, external_db e where g.display_xref_id = x.xref_id and e.external_db_id = x.external_db_id");
239  my $get_transcripts = $self->core->dbc->prepare("SELECT transcript_id FROM transcript WHERE gene_id = ? ORDER BY seq_region_start, seq_region_end");
240  my $get_source_id = $self->core->dbc->prepare("SELECT external_db_id FROM external_db WHERE db_name like ?");
241 
242  $get_genes->execute();
243  my ($gene_id, $external_db, $external_db_id, $acc, $label, $description, $transcript_id, $xref_id, $ox_id, $ext, $reuse_xref_id);
244  $get_genes->bind_columns(\$gene_id, \$external_db, \$acc, \$label, \$description);
245  $xref_id_sth->execute();
246  $xref_id_sth->bind_columns(\$xref_id);
247  $xref_id_sth->fetch();
248  $ox_id_sth->execute();
249  $ox_id_sth->bind_columns(\$ox_id);
250  $ox_id_sth->fetch();
251  $del_xref_sth->execute();
252  while ($get_genes->fetch()) {
253  my $ext = '';
254  my $index=0;
255  $get_source_id->execute($external_db . "_trans_name");
256  $get_source_id->bind_columns(\$external_db_id);
257  $get_source_id->fetch();
258  $get_transcripts->execute($gene_id);
259  $get_transcripts->bind_columns(\$transcript_id);
260  while ($get_transcripts->fetch) {
261  $xref_id++;
262  $ox_id++;
263  if ($ext ne '') {
264  $reuse_xref_sth->execute($external_db_id, $label . '-' . $ext, $description);
265  }
266  else {
267  $reuse_xref_sth->execute($external_db_id, $label, $description);
268  }
269  $reuse_xref_sth->bind_columns(\$reuse_xref_id);
270  if ($reuse_xref_sth->fetch()) {
271  $ins_ox_sth->execute($ox_id, $transcript_id, $reuse_xref_id);
272  $update_tran_sth->execute($reuse_xref_id, $transcript_id);
273  } else {
274  if ($ext ne '') {
275  $ins_xref_sth->execute($xref_id, $external_db_id, $label. "-" . $ext, $label . "-" . $ext, $description);
276  }
277  else {
278  $ins_xref_sth->execute($xref_id, $external_db_id, $label, $label, $description);
279  }
280  $ins_ox_sth->execute($ox_id, $transcript_id, $xref_id);
281  $update_tran_sth->execute($xref_id, $transcript_id);
282  }
283  $index++;
284  }
285  }
286 
287  $del_xref_sth->finish();
288  $del_ox_sth->execute();
289  $del_ox_sth->finish();
290  $reuse_xref_sth->finish();
291  $xref_id_sth->finish();
292  $ox_id_sth->finish();
293  $get_genes->finish();
294  $get_source_id->finish();
295  $get_transcripts->finish();
296  $ins_xref_sth->finish();
297  $ins_ox_sth->finish();
298  $update_tran_sth->finish();
299 }
300 
301 1;
transcript
public transcript()
XrefMapper::BasicMapper
Definition: BasicMapper.pm:8