ensembl-hive  2.7.0
TestMappings.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefMapper::TestMappings;
21 use strict;
22 
23 use vars '@ISA';
24 @ISA = qw{ XrefMapper::BasicMapper };
25 
26 use warnings;
28 
29 use Cwd;
30 use DBI;
31 use File::Basename;
32 use IPC::Open3;
33 use POSIX;
34 
35 ##########################################
36 # Testing (may be moved to healthchecks)
37 ##########################################
38 
39 ##### Unlinked entries ##############
40 
41 # ERRORS
42 # dependent_xref and xref
43 # primary_xref and xref
44 # transcript_direct_xref and xref
45 # translation_direct_xref and xref
46 # gene_direct_xref and xref
47 # synonym and xref
48 
49 # identity_xref and object_xref
50 
51 # gene_transcript_translation and gene_stable_id
52 # gene_transcript_translation and transcript_stable_id
53 # gene_transcript_translation and translation_stable_id
54 
55 # WARNNGS
56 # gene_direct_xref and gene_stable_id
57 # transcript
58 # translation
59 
60 
61 ##### Numbers between xref and core (xref and object_xref) are similar
62 
63 ##### if human or mouse check the number of gene name changes.
64 
65 sub new {
66  my($class, $mapper) = @_;
67 
68  my $self ={};
69  bless $self,$class;
70  if (defined($mapper->previous_core)) {
71  $self->core($mapper->previous_core);
72  } else {
73  $self->core($mapper->core);
74  }
75  $self->xref($mapper->xref);
76  $self->mapper($mapper);
77  return $self;
78 }
79 
80 
81 sub mapper{
82  my ($self, $arg) = @_;
83 
84  (defined $arg) &&
85  ($self->{_mapper} = $arg );
86  return $self->{_mapper};
87 }
88 
89 
90 sub unlinked_entries{
91  my ($self) = @_;
92 
93  my $failed = 0;
94 
95  my $xref_id;
96  my $count;
97  my $dbi = $self->xref->dbc;
98 
99  my $sth_stat = $dbi->prepare("insert into process_status (status, date) values('tests_started',now())");
100  $sth_stat->execute();
101 
102  # dependent_xref and xref
103  my $count_sql = "select count(1) from dependent_xref d left join xref x on d.master_xref_id = x.xref_id where x.xref_id is null";
104  my $sql = "select distinct(d.master_xref_id) from dependent_xref d left join xref x on d.master_xref_id = x.xref_id where x.xref_id is null limit 10";
105  my $sth = $dbi->prepare($count_sql);
106  $sth->execute();
107  $sth->bind_columns(\$count);
108  $sth->fetch();
109  $sth->finish;
110 
111  if($count){
112  $failed = 1;
113  $sth = $dbi->prepare($sql);
114  $sth->execute();
115  $sth->bind_columns(\$xref_id);
116  print STDERR "SQL QUERY: $sql\n";
117  while($sth->fetch){
118  print STDERR "Problem with master xref $xref_id\n";
119  }
120  $sth->finish;
121  }
122 
123 
124 
125 
126  $count_sql = "select count(1) from dependent_xref d left join xref x on d.dependent_xref_id = x.xref_id where x.xref_id is null";
127 
128  $sql = "select distinct(d.dependent_xref_id) from dependent_xref d left join xref x on d.dependent_xref_id = x.xref_id where x.xref_id is null limit 10";
129 
130  $sth = $dbi->prepare($count_sql);
131  $sth->execute();
132  $sth->bind_columns(\$count);
133  $sth->fetch();
134  $sth->finish;
135 
136  if($count){
137  $failed = 1;
138  $sth = $dbi->prepare($sql);
139  $sth->execute();
140  $sth->bind_columns(\$xref_id);
141  print STDERR "SQL QUERY: $sql\n";
142  while($sth->fetch){
143  print STDERR "Problem with dependent xref $xref_id\n";
144  }
145  $sth->finish;
146  }
147 
148  $count_sql = "select count(1) from primary_xref d left join xref x on d.xref_id = x.xref_id where x.xref_id is null";
149 
150  $sql = "select distinct(d.xref_id) from primary_xref d left join xref x on d.xref_id = x.xref_id where x.xref_id is null limit 10";
151 
152  $sth = $dbi->prepare($count_sql);
153  $sth->execute();
154  $sth->bind_columns(\$count);
155  $sth->fetch();
156  $sth->finish;
157 
158  if($count){
159  $failed = 1;
160  $sth = $dbi->prepare($sql);
161  $sth->execute();
162  $sth->bind_columns(\$xref_id);
163  print STDERR "SQL QUERY: $sql\n";
164  while($sth->fetch){
165  print STDERR "Problem with primary xref $xref_id\n";
166  }
167  $sth->finish;
168  }
169 
170  foreach my $type (qw(transcript translation gene)){
171  $count_sql = "select count(1) from ".$type."_direct_xref d left join xref x on d.general_xref_id = x.xref_id where x.xref_id is null";
172 
173  $sql = "select distinct(d.general_xref_id) from ".$type."_direct_xref d left join xref x on d.general_xref_id = x.xref_id where x.xref_id is null limit 10";
174 
175  $sth = $dbi->prepare($count_sql);
176  $sth->execute();
177  $sth->bind_columns(\$count);
178  $sth->fetch();
179  $sth->finish;
180 
181  if($count){
182  $failed = 1;
183  $sth = $dbi->prepare($sql);
184  $sth->execute();
185  $sth->bind_columns(\$xref_id);
186  print STDERR "SQL QUERY: $sql\n";
187  while($sth->fetch){
188  print STDERR "Problem with ".$type."_direct_xref $xref_id\n";
189  }
190  $sth->finish;
191  }
192 
193  }
194 
195 
196  $count_sql = "select count(1) from synonym d left join xref x on d.xref_id = x.xref_id where x.xref_id is null";
197 
198  $sql = "select distinct(d.xref_id) from synonym d left join xref x on d.xref_id = x.xref_id where x.xref_id is null limit 10";
199 
200  $sth = $dbi->prepare($count_sql);
201  $sth->execute();
202  $sth->bind_columns(\$count);
203  $sth->fetch();
204  $sth->finish;
205 
206  if($count){
207  $failed = 1;
208  $sth = $dbi->prepare($sql);
209  $sth->execute();
210  $sth->bind_columns(\$xref_id);
211  print STDERR "SQL QUERY: $sql\n";
212  while($sth->fetch){
213  print STDERR "Problem with synonym $xref_id\n";
214  }
215  $sth->finish;
216  }
217 
218 
219  $count_sql = "select count(1) from identity_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null";
220 
221  $sql = "select distinct(d.object_xref_id) from identity_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null limit 10";
222 
223  $sth = $dbi->prepare($count_sql);
224  $sth->execute();
225  $sth->bind_columns(\$count);
226  $sth->fetch();
227  $sth->finish;
228 
229  if($count){
230  $failed = 1;
231  $sth = $dbi->prepare($sql);
232  $sth->execute();
233  $sth->bind_columns(\$xref_id);
234  print STDERR "SQL QUERY: $sql\n";
235  while($sth->fetch){
236  print STDERR "Problem with object_xref $xref_id\n";
237  }
238  $sth->finish;
239  }
240 
241  foreach my $type (qw(transcript translation gene)){
242  $count_sql = "select count(1) from gene_transcript_translation d left join ".$type."_stable_id x on d.".$type."_id = x.internal_id where x.internal_id is null and d.".$type."_id is not null";
243 
244  $sql = "select distinct(d.".$type."_id) from gene_transcript_translation d left join ".$type."_stable_id x on d.".$type."_id = x.internal_id where x.internal_id is null and d.".$type."_id is not null limit 10";
245 
246  $sth = $dbi->prepare($count_sql);
247  $sth->execute();
248  $sth->bind_columns(\$count);
249  $sth->fetch();
250  $sth->finish;
251 
252  if($count){
253  $failed = 1;
254  $sth = $dbi->prepare($sql);
255  $sth->execute();
256  $sth->bind_columns(\$xref_id);
257  print STDERR "SQL QUERY: $sql\n";
258  while($sth->fetch){
259  print STDERR "Problem with ".$type."_id $xref_id\n";
260  }
261  $sth->finish;
262  }
263 
264  }
265 
266 
267  if(!$failed){
268  $sth_stat = $dbi->prepare("insert into process_status (status, date) values('tests_finished',now())");
269  $sth_stat->execute();
270  }
271  else{
272  $sth_stat = $dbi->prepare("insert into process_status (status, date) values('tests_failed',now())");
273  $sth_stat->execute();
274  }
275  $sth_stat->finish;
276 
277  return $failed;
278 }
279 
280 
281 sub entry_number_check{
282  my ($self) = @_;
283 
284 
285 # No point doing xrefs object_xrefs are more important and gives a better indication of wether things went okay.
286 
287  my %old_object_xref_count;
288  my %new_object_xref_count;
289  my $dbi = $self->xref->dbc;
290 
291  my $sth = $dbi->prepare('select s.name, count(distinct x.xref_id, ensembl_id) from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id and ox_status = "DUMP_OUT" and s.name not like "AFFY%" group by s.name');
292  $sth->execute();
293  my ($name, $count);
294  $sth->bind_columns(\$name,\$count);
295  while($sth->fetch()){
296  $new_object_xref_count{$name} = $count;
297  }
298  $sth->finish;
299 
300 
301  $sth = $self->core->dbc->prepare('select e.db_name, count(*) from xref x, object_xref ox, external_db e where ox.xref_id = x.xref_id and x.external_db_id = e.external_db_id and e.db_name not like "AFFY%" and (x.info_type is NULL or x.info_type != "PROJECTION") group by e.db_name');
302 
303  $sth->execute();
304  $sth->bind_columns(\$name,\$count);
305  while($sth->fetch()){
306  my $change = 0;
307  $old_object_xref_count{$name} = $count;
308  if(defined($new_object_xref_count{$name})){
309  $change = (($new_object_xref_count{$name} - $count)/$count) * 100;
310  if($change > 5){ # increase of 5%
311  print "WARNING: $name has increased by ".int($change)."\% was $count now ". $new_object_xref_count{$name} . "\n" if($self->mapper->verbose);
312  }
313  elsif($change < -5){ # decrease by 5%
314  print "WARNING: $name has decreased by ".int($change)." \% was $count now ". $new_object_xref_count{$name} . "\n" if($self->mapper->verbose);
315  }
316  }
317  else{
318  print "WARNING: xrefs $name are not in the new database but $count are in the old???\n" if($self->mapper->verbose);
319  }
320  }
321  $sth->finish;
322 
323  foreach my $key (keys %new_object_xref_count){
324  if(!defined($old_object_xref_count{$key})){
325  print "WARNING: $key has ".$new_object_xref_count{$key} ." xrefs in the new database but NONE in the old\n" if($self->mapper->verbose);
326  }
327  }
328 
329  return;
330 }
331 
332 
333 sub name_change_check{
334  my ($self) = @_;
335 
336  my %new_name; # $old_name{$gene_id} = HGNC_%name
337  my %id_to_stable_id;
338  my $dbi = $self->xref->dbc;
339 
340  my $official_name = $self->mapper->get_official_name;
341  if(!defined($official_name)){
342  return;
343  }
344 # print "Checking names\n";
345 
346  my $sql = 'select x.label, gsi.internal_id, gsi.stable_id from object_xref ox, xref x, gene_stable_id gsi, source s where x.xref_id = ox.xref_id and ox.ensembl_object_type = "Gene" and gsi.internal_id = ox.ensembl_id and x.source_id = s.source_id and s.name like "'.$official_name.'_%"';
347 
348  my $sth = $dbi->prepare($sql);
349  $sth->execute();
350  my ($name, $gene_id, $stable_id);
351  $sth->bind_columns(\$name,\$gene_id, \$stable_id);
352  my $count = 0;
353  while($sth->fetch()){
354  $new_name{$gene_id} = $name;
355  $id_to_stable_id{$gene_id} = $stable_id;
356  $count++;
357  }
358  $sth->finish;
359 # print $sql."\n";
360 # print $count." entries found in xref database\n";
361 
362 
363 
364  # Use synonyms as well.
365  my %alias;
366  $sql = 'select x.label, sy.synonym from xref x, synonym sy, source so where x.xref_id = sy.xref_id and so.source_id = x.source_id and so.name like "'.$official_name.'_%" ';
367  $sth = $dbi->prepare($sql);
368  $sth->execute();
369  my ($syn);
370  $sth->bind_columns(\$name,\$syn);
371  $count = 0;
372  while($sth->fetch()){
373  $alias{$syn} = $name;
374  }
375  $sth->finish;
376 
377  $sql = 'select x.label, sy.synonym from xref x, synonym sy, source so where x.xref_id = sy.xref_id and so.source_id = x.source_id and so.name like "EntrezGene"';
378  $sth = $dbi->prepare($sql);
379  $sth->execute();
380  $sth->bind_columns(\$name,\$syn);
381  while($sth->fetch()){
382  $alias{$syn} = $name;
383  }
384  $sth->finish;
385 
386 
387 
388  # NOTE ncRNA has higher priority
389  $sql = "select x.display_label, g.gene_id from gene g, xref x where g.display_xref_id = x.xref_id and biotype = 'protein_coding'";
390 
391  $sth = $self->core->dbc->prepare($sql);
392  $sth->execute();
393  $sth->bind_columns(\$name,\$gene_id);
394  $count =0;
395  my $total_count=0;
396  while($sth->fetch()){
397  if(defined($new_name{$gene_id})){
398  $total_count++;
399  }
400  if(defined($new_name{$gene_id}) and $new_name{$gene_id} ne $name){
401  if(!defined($alias{$name}) or $alias{$name} ne $new_name{$gene_id}){
402  print STDERR "WARN: gene_id ($gene_id) ".$id_to_stable_id{$gene_id}." new = ".$new_name{$gene_id}." old = $name\n";
403  $count++;
404  }
405  }
406  }
407  if($total_count){
408  print STDERR "$count entries with different names out of $total_count protein coding gene comparisons?\n";
409  }
410 }
411 
412 
413 sub direct_stable_id_check{
414  my ($self) = @_;
415 
416  my $dbi = $self->xref->dbc;
417 
418  foreach my $type (qw(gene transcript translation)){
419 
420  my $sql = "select s.name, count(*) from source s, xref x, ".$type."_direct_xref gdx left join ".$type."_stable_id gsi on gdx.ensembl_stable_id = gsi.stable_id where s.source_id = x.source_id and x.xref_id = gdx.general_xref_id and gsi.stable_id is null group by s.name";
421 
422  my $sth = $dbi->prepare($sql);
423  $sth->execute();
424  my ($name, $count);
425  $sth->bind_columns(\$name,\$count);
426  my $total_count=0;
427  while($sth->fetch()){
428  print STDERR "WARNING $name has $count invalid stable ids in ".$type."_direct_xrefs\n";
429  $total_count += $count;
430  }
431  $sth->finish;
432  if($total_count){
433  print STDERR "USEFUL SQL: $sql\n";
434  }
435  }
436 
437 }
438 1;
transcript
public transcript()
XrefMapper::db::dbc
public dbc()
XrefMapper::BasicMapper
Definition: BasicMapper.pm:8
XrefMapper::BasicMapper::core
public XrefMapper::db core()