ensembl-hive  2.7.0
wormbase.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefMapper::wormbase;
21 
23 
24 use vars qw(@ISA);
25 
26 @ISA = qw(XrefMapper::BasicMapper);
27 
28 
29 # This module is activated by specifying "taxon=wormbase" in the mapping input file
30 # It contains some common config for worms maintained by WormBase (i.e. having genes
31 # with WBGene ids etc)
32 
33 sub set_methods{
34 
35  my $default_method = 'ExonerateGappedBest1';
36  my %override_method_for_source = (
37  ExonerateGappedBest5_subtran => ['RefSeq_mRNA',
38  'RefSeq_mRNA_predicted',
39  'RefSeq_ncRNA',
40  'RefSeq_ncRNA_predicted' ],
41  );
42 
43  return $default_method, \%override_method_for_source;
44 }
45 
46 
47 
48 sub set_display_xrefs {
49  my ($self) = @_;
50 
51  # wormbase_gene
52  # wormbase_transcript
53  # wormbase_locus
54  # wormpep_id
55 
56  print "Building Transcript and Gene display_xrefs using WormBase direct xrefs\n" if ($self->verbose);
57 
58 
59  # strategy:
60  # - populate transcript display xref with wormbase_transcript
61  # - populate gene display xref with wormbase_locus
62 
63  my (%external_dbs, %gene_display_xrefs, %tran_display_xrefs);
64 
65  #
66  # Get external_db ids for the sources we are interested in
67  #
68  my $edb_sth = $self->core->dbc->prepare("SELECT external_db_id, db_name from external_db WHERE db_name like 'wormbase%'");
69  $edb_sth->execute;
70  while( my ($edb_id, $edb_name) = $edb_sth->fetchrow_array ) {
71  $external_dbs{$edb_name} = $edb_id;
72  }
73  $edb_sth->finish;
74 
75  if (not exists $external_dbs{wormbase_transcript} or
76  not exists $external_dbs{wormbase_locus}) {
77  print "Could not find wormbase_transcript and wormbase_locus in external_db table, so doing nothing\n" if $self->verbose;
78  return;
79  }
80 
81  my $query_gseq_sth = $self->core->dbc->prepare("SELECT ensembl_id, x.xref_id " .
82  "FROM object_xref ox, xref x ".
83  "WHERE ox.xref_id = x.xref_id AND external_db_id = " . $external_dbs{wormbase_gseqname});
84  $query_gseq_sth->execute();
85  while( my ($gid, $xid) = $query_gseq_sth->fetchrow_array) {
86  $gene_display_xrefs{$gid} = $xid;
87  }
88  $query_gseq_sth->finish;
89 
90 
91 
92  #
93  # Some genes will have a locus name. Over-write display xrefs for those that do
94  #
95  my $query_gene_sth = $self->core->dbc->prepare("SELECT ensembl_id, x.xref_id " .
96  "FROM object_xref ox, xref x ".
97  "WHERE ox.xref_id = x.xref_id AND external_db_id = " . $external_dbs{wormbase_locus});
98 
99  $query_gene_sth->execute();
100  while( my ($gid, $xid) = $query_gene_sth->fetchrow_array) {
101  $gene_display_xrefs{$gid} = $xid;
102  }
103  $query_gene_sth->finish;
104 
105 
106  #
107  # Get the wormbase_transcript xrefs for the genes
108  #
109  my $query_tran_sth = $self->core->dbc->prepare("SELECT ensembl_id, x.xref_id " .
110  "FROM object_xref ox, xref x " .
111  "WHERE ox.xref_id = x.xref_id and external_db_id = " . $external_dbs{wormbase_transcript});
112  $query_tran_sth->execute();
113  while( my ($gid, $xid) = $query_tran_sth->fetchrow_array) {
114  $tran_display_xrefs{$gid} = $xid;
115  }
116  $query_tran_sth->finish;
117 
118 
119  #
120  # finally, update
121  #
122  my $reset_sth = $self->core->dbc->prepare("UPDATE gene SET display_xref_id = null");
123  $reset_sth->execute();
124  $reset_sth->finish;
125 
126  $reset_sth = $self->core->dbc->prepare("UPDATE transcript SET display_xref_id = null");
127  $reset_sth->execute();
128  $reset_sth->finish;
129 
130  my $update_gene_sth = $self->core->dbc->prepare("UPDATE gene g SET g.display_xref_id= ? WHERE g.gene_id=?");
131  my $update_tran_sth = $self->core->dbc->prepare("UPDATE transcript t SET t.display_xref_id= ? WHERE t.transcript_id=?");
132 
133  foreach my $gid (keys %gene_display_xrefs) {
134  $update_gene_sth->execute( $gene_display_xrefs{$gid}, $gid );
135  }
136  $update_gene_sth->finish;
137 
138  foreach my $tid (keys %tran_display_xrefs) {
139  $update_tran_sth->execute( $tran_display_xrefs{$tid}, $tid );
140  }
141  $update_tran_sth->finish;
142 
143  print "Updated display xrefs in core for genes and transcripts\n" if $self->verbose;
144 }
145 
146 
147 # over-ride the following, to ensure that our carefully constructed transcript
148 # display ids are not stamped over by the default behaviour (propagation from
149 # gene)
150 sub transcript_names_from_gene {
151  return;
152 }
153 
154 
155 sub gene_description_sources {
156 
157  return ("RFAM",
158  "RNAMMER",
159  "TRNASCAN_SE",
160  "miRBase",
161  "HGNC",
162  "IMGT/GENE_DB",
163  "Uniprot/SWISSPROT",
164  "RefSeq_peptide",
165  "Uniprot/SPTREMBL",
166  );
167 
168 }
169 
170 
171 sub gene_description_filter_regexps {
172 
173  return ( '^(Protein \S+\s*)+$',
174  '^Uncharacterized protein\s*\S+\s*',
175  '^Uncharacterized protein\s*',
176  '^Putative uncharacterized protein\s*\S+\s*',
177  '^Putative uncharacterized protein\s*',
178  '^Hypothetical protein\s*\S+\s*',
179  );
180 
181 }
182 
183 1;
XrefMapper::BasicMapper
Definition: BasicMapper.pm:8