3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefMapper::TestMappings;
35 ##########################################
36 # Testing (may be moved to healthchecks)
37 ##########################################
39 ##### Unlinked entries ##############
42 # dependent_xref and xref
43 # primary_xref and xref
44 # transcript_direct_xref and xref
45 # translation_direct_xref and xref
46 # gene_direct_xref and xref
49 # identity_xref and object_xref
51 # gene_transcript_translation and gene_stable_id
52 # gene_transcript_translation and transcript_stable_id
53 # gene_transcript_translation and translation_stable_id
56 # gene_direct_xref and gene_stable_id
61 ##### Numbers between xref and core (xref and object_xref) are similar
63 ##### if human or mouse check the number of gene name changes.
66 my($class, $mapper) = @_;
70 if (defined($mapper->previous_core)) {
71 $self->
core($mapper->previous_core);
73 $self->core($mapper->core);
75 $self->xref($mapper->xref);
76 $self->mapper($mapper);
82 my ($self, $arg) = @_;
85 ($self->{_mapper} = $arg );
86 return $self->{_mapper};
97 my $dbi = $self->xref->
dbc;
99 my $sth_stat = $dbi->prepare(
"insert into process_status (status, date) values('tests_started',now())");
100 $sth_stat->execute();
102 # dependent_xref and xref
103 my $count_sql =
"select count(1) from dependent_xref d left join xref x on d.master_xref_id = x.xref_id where x.xref_id is null";
104 my $sql =
"select distinct(d.master_xref_id) from dependent_xref d left join xref x on d.master_xref_id = x.xref_id where x.xref_id is null limit 10";
105 my $sth = $dbi->prepare($count_sql);
107 $sth->bind_columns(\$count);
113 $sth = $dbi->prepare($sql);
115 $sth->bind_columns(\$xref_id);
116 print STDERR
"SQL QUERY: $sql\n";
118 print STDERR
"Problem with master xref $xref_id\n";
126 $count_sql =
"select count(1) from dependent_xref d left join xref x on d.dependent_xref_id = x.xref_id where x.xref_id is null";
128 $sql =
"select distinct(d.dependent_xref_id) from dependent_xref d left join xref x on d.dependent_xref_id = x.xref_id where x.xref_id is null limit 10";
130 $sth = $dbi->prepare($count_sql);
132 $sth->bind_columns(\$count);
138 $sth = $dbi->prepare($sql);
140 $sth->bind_columns(\$xref_id);
141 print STDERR
"SQL QUERY: $sql\n";
143 print STDERR
"Problem with dependent xref $xref_id\n";
148 $count_sql =
"select count(1) from primary_xref d left join xref x on d.xref_id = x.xref_id where x.xref_id is null";
150 $sql =
"select distinct(d.xref_id) from primary_xref d left join xref x on d.xref_id = x.xref_id where x.xref_id is null limit 10";
152 $sth = $dbi->prepare($count_sql);
154 $sth->bind_columns(\$count);
160 $sth = $dbi->prepare($sql);
162 $sth->bind_columns(\$xref_id);
163 print STDERR
"SQL QUERY: $sql\n";
165 print STDERR
"Problem with primary xref $xref_id\n";
170 foreach my $type (qw(
transcript translation gene)){
171 $count_sql =
"select count(1) from ".$type.
"_direct_xref d left join xref x on d.general_xref_id = x.xref_id where x.xref_id is null";
173 $sql =
"select distinct(d.general_xref_id) from ".$type.
"_direct_xref d left join xref x on d.general_xref_id = x.xref_id where x.xref_id is null limit 10";
175 $sth = $dbi->prepare($count_sql);
177 $sth->bind_columns(\$count);
183 $sth = $dbi->prepare($sql);
185 $sth->bind_columns(\$xref_id);
186 print STDERR
"SQL QUERY: $sql\n";
188 print STDERR
"Problem with ".$type.
"_direct_xref $xref_id\n";
196 $count_sql =
"select count(1) from synonym d left join xref x on d.xref_id = x.xref_id where x.xref_id is null";
198 $sql =
"select distinct(d.xref_id) from synonym d left join xref x on d.xref_id = x.xref_id where x.xref_id is null limit 10";
200 $sth = $dbi->prepare($count_sql);
202 $sth->bind_columns(\$count);
208 $sth = $dbi->prepare($sql);
210 $sth->bind_columns(\$xref_id);
211 print STDERR
"SQL QUERY: $sql\n";
213 print STDERR
"Problem with synonym $xref_id\n";
219 $count_sql =
"select count(1) from identity_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null";
221 $sql =
"select distinct(d.object_xref_id) from identity_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null limit 10";
223 $sth = $dbi->prepare($count_sql);
225 $sth->bind_columns(\$count);
231 $sth = $dbi->prepare($sql);
233 $sth->bind_columns(\$xref_id);
234 print STDERR
"SQL QUERY: $sql\n";
236 print STDERR
"Problem with object_xref $xref_id\n";
241 foreach my $type (qw(
transcript translation gene)){
242 $count_sql =
"select count(1) from gene_transcript_translation d left join ".$type.
"_stable_id x on d.".$type.
"_id = x.internal_id where x.internal_id is null and d.".$type.
"_id is not null";
244 $sql =
"select distinct(d.".$type.
"_id) from gene_transcript_translation d left join ".$type.
"_stable_id x on d.".$type.
"_id = x.internal_id where x.internal_id is null and d.".$type.
"_id is not null limit 10";
246 $sth = $dbi->prepare($count_sql);
248 $sth->bind_columns(\$count);
254 $sth = $dbi->prepare($sql);
256 $sth->bind_columns(\$xref_id);
257 print STDERR
"SQL QUERY: $sql\n";
259 print STDERR
"Problem with ".$type.
"_id $xref_id\n";
268 $sth_stat = $dbi->prepare(
"insert into process_status (status, date) values('tests_finished',now())");
269 $sth_stat->execute();
272 $sth_stat = $dbi->prepare(
"insert into process_status (status, date) values('tests_failed',now())");
273 $sth_stat->execute();
281 sub entry_number_check{
285 # No point doing xrefs object_xrefs are more important and gives a better indication of wether things went okay.
287 my %old_object_xref_count;
288 my %new_object_xref_count;
289 my $dbi = $self->xref->dbc;
291 my $sth = $dbi->prepare(
'select s.name, count(distinct x.xref_id, ensembl_id) from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id and ox_status = "DUMP_OUT" and s.name not like "AFFY%" group by s.name');
294 $sth->bind_columns(\$name,\$count);
295 while($sth->fetch()){
296 $new_object_xref_count{$name} = $count;
301 $sth = $self->core->dbc->prepare(
'select e.db_name, count(*) from xref x, object_xref ox, external_db e where ox.xref_id = x.xref_id and x.external_db_id = e.external_db_id and e.db_name not like "AFFY%" and (x.info_type is NULL or x.info_type != "PROJECTION") group by e.db_name');
304 $sth->bind_columns(\$name,\$count);
305 while($sth->fetch()){
307 $old_object_xref_count{$name} = $count;
308 if(defined($new_object_xref_count{$name})){
309 $change = (($new_object_xref_count{$name} - $count)/$count) * 100;
310 if($change > 5){ # increase of 5%
311 print
"WARNING: $name has increased by ".int($change).
"\% was $count now ". $new_object_xref_count{$name} .
"\n" if($self->mapper->verbose);
313 elsif($change < -5){ # decrease by 5%
314 print
"WARNING: $name has decreased by ".int($change).
" \% was $count now ". $new_object_xref_count{$name} .
"\n" if($self->mapper->verbose);
318 print
"WARNING: xrefs $name are not in the new database but $count are in the old???\n" if($self->mapper->verbose);
323 foreach my $key (keys %new_object_xref_count){
324 if(!defined($old_object_xref_count{$key})){
325 print
"WARNING: $key has ".$new_object_xref_count{$key} .
" xrefs in the new database but NONE in the old\n" if($self->mapper->verbose);
333 sub name_change_check{
336 my %new_name; # $old_name{$gene_id} = HGNC_%name
338 my $dbi = $self->xref->dbc;
340 my $official_name = $self->mapper->get_official_name;
341 if(!defined($official_name)){
344 # print "Checking names\n";
346 my $sql =
'select x.label, gsi.internal_id, gsi.stable_id from object_xref ox, xref x, gene_stable_id gsi, source s where x.xref_id = ox.xref_id and ox.ensembl_object_type = "Gene" and gsi.internal_id = ox.ensembl_id and x.source_id = s.source_id and s.name like "'.$official_name.
'_%"';
348 my $sth = $dbi->prepare($sql);
350 my ($name, $gene_id, $stable_id);
351 $sth->bind_columns(\$name,\$gene_id, \$stable_id);
353 while($sth->fetch()){
354 $new_name{$gene_id} = $name;
355 $id_to_stable_id{$gene_id} = $stable_id;
360 # print $count." entries found in xref database\n";
364 # Use synonyms as well.
366 $sql =
'select x.label, sy.synonym from xref x, synonym sy, source so where x.xref_id = sy.xref_id and so.source_id = x.source_id and so.name like "'.$official_name.
'_%" ';
367 $sth = $dbi->prepare($sql);
370 $sth->bind_columns(\$name,\$syn);
372 while($sth->fetch()){
373 $alias{$syn} = $name;
377 $sql =
'select x.label, sy.synonym from xref x, synonym sy, source so where x.xref_id = sy.xref_id and so.source_id = x.source_id and so.name like "EntrezGene"';
378 $sth = $dbi->prepare($sql);
380 $sth->bind_columns(\$name,\$syn);
381 while($sth->fetch()){
382 $alias{$syn} = $name;
388 # NOTE ncRNA has higher priority
389 $sql =
"select x.display_label, g.gene_id from gene g, xref x where g.display_xref_id = x.xref_id and biotype = 'protein_coding'";
391 $sth = $self->core->dbc->prepare($sql);
393 $sth->bind_columns(\$name,\$gene_id);
396 while($sth->fetch()){
397 if(defined($new_name{$gene_id})){
400 if(defined($new_name{$gene_id}) and $new_name{$gene_id} ne $name){
401 if(!defined($alias{$name}) or $alias{$name} ne $new_name{$gene_id}){
402 print STDERR
"WARN: gene_id ($gene_id) ".$id_to_stable_id{$gene_id}.
" new = ".$new_name{$gene_id}.
" old = $name\n";
408 print STDERR
"$count entries with different names out of $total_count protein coding gene comparisons?\n";
413 sub direct_stable_id_check{
416 my $dbi = $self->xref->dbc;
418 foreach my $type (qw(gene
transcript translation)){
420 my $sql =
"select s.name, count(*) from source s, xref x, ".$type.
"_direct_xref gdx left join ".$type.
"_stable_id gsi on gdx.ensembl_stable_id = gsi.stable_id where s.source_id = x.source_id and x.xref_id = gdx.general_xref_id and gsi.stable_id is null group by s.name";
422 my $sth = $dbi->prepare($sql);
425 $sth->bind_columns(\$name,\$count);
427 while($sth->fetch()){
428 print STDERR
"WARNING $name has $count invalid stable ids in ".$type.
"_direct_xrefs\n";
429 $total_count += $count;
433 print STDERR
"USEFUL SQL: $sql\n";