ensembl-hive  2.8.1
XrefLoader.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefMapper::XrefLoader;
21 use strict;
22 
23 use vars '@ISA';
24 @ISA = qw{ XrefMapper::BasicMapper };
25 
26 use warnings;
28 
29 use Cwd;
30 use DBI;
31 use File::Basename;
32 use IPC::Open3;
33 
34 sub new {
35  my($class, $mapper) = @_;
36 
37  my $self ={};
38  bless $self,$class;
39  $self->core($mapper->core);
40  $self->xref($mapper->xref);
41  $self->mapper($mapper);
42  return $self;
43 }
44 
45 
46 sub mapper{
47  my ($self, $arg) = @_;
48 
49  (defined $arg) &&
50  ($self->{_mapper} = $arg );
51  return $self->{_mapper};
52 }
53 
54 sub update{
55  my ($self, $arg) = @_;
56  # remove xref, object_xref, identity_xref, depenedent_xref, unmapped_object, external_synonym, projections.
57 
58 
59  my $verbose = $self->mapper->verbose;
60  my $core_dbi = $self->core->dbc;
61  my $xref_dbi = $self->xref->dbc;
62 
63  #####################################
64  # first remove all the projections. #
65  #####################################
66  print "Deleting all PROJECTIONs from this database\n" if $verbose;
67 
68  my $sql = "TRUNCATE table ontology_xref";
69  my $sth = $core_dbi->prepare($sql);
70  $sth->execute();
71  print "\tDeleted all ontology_xref rows\n" if $verbose;
72 
73  $sql = "UPDATE gene g INNER JOIN xref x ON g.display_xref_id=x.xref_id SET g.display_xref_id=NULL,g.description=NULL WHERE x.info_type='PROJECTION'";
74  $sth = $core_dbi->prepare($sql);
75  my $affected_rows = $sth->execute();
76  print "\tSet display_xref_id and description to NULL in $affected_rows gene row(s) relating to PROJECTED xrefs\n" if $verbose;
77 
78  $sql = "DELETE es FROM xref x, external_synonym es WHERE x.xref_id = es.xref_id and x.info_type = 'PROJECTION'";
79  $sth = $core_dbi->prepare($sql);
80  $affected_rows = $sth->execute();
81  print "\tDeleted $affected_rows PROJECTED external_synonym row(s)\n" if $verbose;
82 
83  $sql = "DELETE dx FROM dependent_xref dx, xref x, external_db ed WHERE x.xref_id = dx.dependent_xref_id AND x.external_db_id = ed.external_db_id and ed.db_name = 'GO'";
84  $sth = $core_dbi->prepare($sql);
85  $affected_rows = $sth->execute();
86  print "\tDeleted $affected_rows GO dependent_xref row(s)\n" if $verbose;
87 
88  $sql = "DELETE ox FROM object_xref ox, xref x, external_db e WHERE x.xref_id = ox.xref_id AND x.external_db_id =e.external_db_id and e.db_name = 'GO'";
89  $sth = $core_dbi->prepare($sql);
90  $sth->execute();
91 
92  $sql = "DELETE dx FROM dependent_xref dx, xref x WHERE x.xref_id = dx.dependent_xref_id AND x.info_type = 'PROJECTION'";
93  $sth = $core_dbi->prepare($sql);
94  $affected_rows = $sth->execute();
95  print "\tDeleted $affected_rows PROJECTED dependent_xref row(s)\n" if $verbose;
96 
97  $sql = "DELETE object_xref FROM object_xref, xref WHERE object_xref.xref_id = xref.xref_id AND xref.info_type = 'PROJECTION'";
98  $sth = $core_dbi->prepare($sql);
99  $affected_rows = $sth->execute();
100  print "\tDeleted $affected_rows PROJECTED object_xref row(s)\n" if $verbose;
101 
102  $sql = "DELETE xref FROM xref WHERE xref.info_type = 'PROJECTION'";
103  $sth = $core_dbi->prepare($sql);
104  $affected_rows = $sth->execute();
105  print "\tDeleted $affected_rows PROJECTED xref row(s)\n" if $verbose;
106 
107  $sth->finish;
108 
109  #########################################
110  # Get source_id to external_db_id #
111  #########################################
112 
113  my %name_to_external_db_id;
114  $sql = "select external_db_id, db_name from external_db";
115  $sth = $core_dbi->prepare($sql);
116  $sth->execute();
117  my ($id, $name);
118  $sth->bind_columns(\$id, \$name);
119  while($sth->fetch()){
120  $name_to_external_db_id{$name} = $id;
121  }
122  $sth->finish;
123 
124  my %source_id_to_external_db_id;
125 
126  $sql = 'select s.source_id, s.name from source s, xref x where x.source_id = s.source_id group by s.source_id'; # only get those of interest
127  $sth = $xref_dbi->prepare($sql);
128  $sth->execute();
129  $sth->bind_columns(\$id, \$name);
130  while($sth->fetch()){
131  if(defined($name_to_external_db_id{$name})){
132  $source_id_to_external_db_id{$id} = $name_to_external_db_id{$name};
133  }
134  elsif( $name =~ /notransfer$/){
135  }
136  else{
137  die "ERROR: Could not find $name in external_db table please add this too continue";
138  }
139  }
140  $sth->finish;
141 
142 
143  $sth = $xref_dbi->prepare("update xref set dumped = null where dumped != 'NO_DUMP_ANOTHER_PRIORITY'"); # just incase this is being ran again
144  $sth->execute;
145  $sth->finish;
146 
147 
148 
149 
150  ######################################
151  # For each external_db to be updated #
152  # Delete the existing ones #
153  ######################################
154  my ($count);
155  $sth = $xref_dbi->prepare('select s.name, count(*) from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id group by s.name');
156  $sth->execute();
157  $sth->bind_columns(\$name,\$count);
158 
159  my $synonym_sth = $core_dbi->prepare('DELETE external_synonym FROM external_synonym, xref WHERE external_synonym.xref_id = xref.xref_id AND xref.external_db_id = ?');
160  my $identity_sth = $core_dbi->prepare('DELETE identity_xref FROM identity_xref, object_xref, xref WHERE identity_xref.object_xref_id = object_xref.object_xref_id AND object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?');
161  my $object_sth = $core_dbi->prepare('DELETE object_xref FROM object_xref, xref WHERE object_xref.xref_id = xref.xref_id AND xref.external_db_id = ?');
162  my $master_sth = $core_dbi->prepare('DELETE ox, d FROM xref mx, xref x, dependent_xref d LEFT JOIN object_xref ox ON ox.object_xref_id = d.object_xref_id WHERE mx.xref_id = d.master_xref_id AND dependent_xref_id = x.xref_id AND mx.external_db_id = ?');
163  my $dependent_sth = $core_dbi->prepare('DELETE d FROM dependent_xref d, xref x WHERE d.dependent_xref_id = x.xref_id and x.external_db_id = ?');
164  my $xref_sth = $core_dbi->prepare('DELETE FROM xref WHERE xref.external_db_id = ?');
165  my $unmapped_sth = $core_dbi->prepare('DELETE FROM unmapped_object WHERE type="xref" and external_db_id = ?');
166 
167  my $gene_reset_sth = $core_dbi->prepare('UPDATE gene g INNER JOIN xref x ON g.display_xref_id=x.xref_id SET g.display_xref_id=NULL,g.description=NULL WHERE x.external_db_id=?');
168 
169 
170  my $transaction_start_sth = $core_dbi->prepare('start transaction');
171  my $transaction_end_sth = $core_dbi->prepare('commit');
172 
173 #
174 # ?? Is it faster to delete them all in one go with a external_db_id in (....) ???
175 # alternative load ottt etc that are not obtained from xrefs into xref table and then delete tables fully??
176 #
177 
178 
179 
180 # my $test =1; # Can take a while so make optional when testing
181 # if(!$test){
182  $transaction_start_sth->execute();
183  while($sth->fetch()){
184  if(!defined($name_to_external_db_id{$name})){
185  next; #must end in notransfer
186  }
187 
188  my $ex_id = $name_to_external_db_id{$name};
189 
190  print "Setting display_xref_id and description to NULL if it refers to xrefs that are to be deleted\n" if ($verbose);
191  $affected_rows = $gene_reset_sth->execute($ex_id);
192  print "\tSet display_xref_id=NULL and description=NULL for $affected_rows gene row(s)\n" if ($verbose);
193 
194  print "Deleting data for $name from core before updating from new xref database\n" if ($verbose);
195  $affected_rows = $synonym_sth->execute($ex_id);
196  print "\tDeleted $affected_rows external_synonym row(s)\n" if $verbose;
197  $affected_rows = $identity_sth->execute($ex_id);
198  print "\tDeleted $affected_rows identity_xref row(s)\n" if $verbose;
199  $affected_rows = $object_sth->execute($ex_id);
200  print "\tDeleted $affected_rows object_xref row(s)\n" if $verbose;
201  $affected_rows = $master_sth->execute($ex_id);
202  print "\tDeleted $affected_rows xref, object_xref and dependent_xref row(s)\n" if $verbose;
203  $affected_rows = $dependent_sth->execute($ex_id);
204  print "\tDeleted $affected_rows dependent_xref row(s)\n" if $verbose;
205  $affected_rows = $xref_sth->execute($ex_id);
206  print "\tDeleted $affected_rows xref row(s)\n" if $verbose;
207  $affected_rows = $unmapped_sth->execute($ex_id);
208  print "\tDeleted $affected_rows unmapped_object row(s)\n" if $verbose;
209  }
210  $sth->finish;
211  $transaction_end_sth->execute();
212  $synonym_sth->finish;
213  $identity_sth->finish;
214  $object_sth->finish;
215  $dependent_sth->finish;
216  $xref_sth->finish;
217  $unmapped_sth->finish;
218 
219 
220 
221  ##########################################
222  # Get the offsets for object_xref, xref #
223  ##########################################
224 
225  $sth = $core_dbi->prepare('select MAX(xref_id) from xref');
226  my $xref_offset;
227  $sth->execute;
228  $sth->bind_columns(\$xref_offset);
229  $sth->fetch();
230  $sth->finish;
231  $xref_offset = 0 if(!defined($xref_offset));
232 
233  $self->add_meta_pair("xref_offset", $xref_offset);
234 
235  $sth = $core_dbi->prepare('select MAX(object_xref_id) from object_xref');
236  my $object_xref_offset;
237  $sth->execute;
238  $sth->bind_columns(\$object_xref_offset);
239  $sth->fetch();
240  $sth->finish;
241  $object_xref_offset = 0 if(!defined($object_xref_offset));
242 
243  $self->add_meta_pair("object_xref_offset", $object_xref_offset);
244 
245 
246  ####################
247  # Get analysis id's
248  ####################
249 
250  my %analysis_ids = $self->get_analysis();
251  my $checksum_analysis_id; #do not populate until we know we need this
252 
253 
254  print "xref offset is $xref_offset, object_xref offset is $object_xref_offset\n" if ($verbose);
255 
256  #####################################
257  # Now add the new ones #
258  #####################################
259 
260  ###########################
261  # SQL to get data from xref
262  ###########################
263 
264  my $seq_sql =(<<DIRS);
265 SELECT x.xref_id, x.accession, x.label, x.version, x.description, x.info_text,
266  ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type,
267  i.query_identity, i.target_identity, i.hit_start, i.hit_end,
268  i.translation_start, i.translation_end, i.cigar_line, i.score, i.evalue
269  FROM xref x, object_xref ox, identity_xref i
270  WHERE ox.ox_status = "DUMP_OUT" AND
271  i.object_xref_id = ox.object_xref_id AND
272  ox.xref_id = x.xref_id AND
273  x.source_id = ? AND
274  x.info_type = ? order by x.xref_id
275 DIRS
276 
277  my $seq_sth = $xref_dbi->prepare($seq_sql);
278 
279 
280  ###########################
281  # SQL to get data from xref without identity xref
282  ###########################
283 
284 
285  my $dir_sql =(<<DIRS);
286 SELECT x.xref_id, x.accession, x.label, x.version, x.description, x.info_text,
287  ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type
288  FROM xref x, object_xref ox
289  WHERE ox.ox_status = "DUMP_OUT" AND
290  ox.xref_id = x.xref_id AND
291  x.source_id = ? AND
292  x.info_type = ? order by x.xref_id
293 DIRS
294 
295  my $dir_sth = $xref_dbi->prepare($dir_sql);
296 
297 # $dependent_sth = $xref_dbi->prepare('select x.xref_id, x.accession, x.label, x.version, x.description, x.info_text, ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, d.master_xref_id from xref x, object_xref ox, dependent_xref d where ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and d.object_xref_id = ox.object_xref_id and x.source_id = ? and x.info_type = ? order by x.xref_id, ox.ensembl_id');
298 
299  my $dep_sql =(<<DSQL);
300 SELECT x.xref_id, x.accession, x.label, x.version, x.description, x.info_text,
301  ox.object_xref_id, ox.ensembl_id, ox.ensembl_object_type, ox.master_xref_id
302  FROM xref x, object_xref ox, xref mx, source s
303  WHERE ox.ox_status = "DUMP_OUT" and
304  ox.xref_id = x.xref_id and
305  ox.master_xref_id = mx.xref_id and
306  mx.source_id = s.source_id and
307  x.source_id = ? and
308  x.info_type = ?
309  ORDER BY x.xref_id, ox.ensembl_id, s.ordered
310 DSQL
311 
312  $dependent_sth = $xref_dbi->prepare($dep_sql);
313 
314 
315  # SQL to add data to core
316  #########################
317 
318  my $add_identity_xref_sth = $core_dbi->prepare('insert ignore into identity_xref (object_xref_id, xref_identity, ensembl_identity, xref_start, xref_end, ensembl_start, ensembl_end, cigar_line, score, evalue) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
319  my $add_dependent_xref_sth = $core_dbi->prepare('insert ignore into dependent_xref (object_xref_id, master_xref_id, dependent_xref_id) values (?, ?, ?)');
320  my $add_syn_sth = $core_dbi->prepare('insert ignore into external_synonym (xref_id, synonym) values (?, ?)');
321  my $add_release_info_sth = $core_dbi->prepare('update external_db set db_release = ? where external_db_id = ?');
322 
323  $sth = $xref_dbi->prepare('select s.name, s.source_id, count(*), x.info_type, s.priority_description, s.source_release from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id and ox_status = "DUMP_OUT" group by s.name, s.source_id, x.info_type');
324  $sth->execute();
325  my ($type, $source_id, $where_from, $release_info);
326  $sth->bind_columns(\$name,\$source_id, \$count, \$type, \$where_from, \$release_info);
327 
328  $transaction_start_sth->execute();
329 
330  while($sth->fetch()){
331 
332  next if(!defined($name_to_external_db_id{$name}));
333 
334  if(defined($where_from) and $where_from ne ""){
335  $where_from = "Generated via $where_from";
336  }
337  my $ex_id = $name_to_external_db_id{$name};
338 
339  print "updating ($source_id) $name in core (for $type xrefs)\n" if ($verbose);
340 
341  my @xref_list=(); # process at end. Add synonyms and set dumped = 1;
342 
343  # dump SEQUENCE_MATCH, DEPENDENT, DIRECT, COORDINATE_OVERLAP, INFERRED_PAIR, (MISC?? same as direct come from official naming)
344 
345  ### If DIRECT , xref, object_xref, (order by xref_id) # maybe linked to more than one?
346  ### if INFERRED_PAIR xref, object_xref
347  ### if MISC xref, object_xref
348 
349 
350  if($type eq "DIRECT" or $type eq "INFERRED_PAIR" or $type eq "MISC"){
351  my $count = 0;
352  $seq_sth->execute($source_id, $type);
353  my ($xref_id, $acc, $label, $version, $desc, $info, $object_xref_id, $ensembl_id, $ensembl_type);
354  my ( $query_identity, $target_identity, $hit_start, $hit_end, $translation_start, $translation_end, $cigar_line, $score, $evalue);
355  $seq_sth->bind_columns(\$xref_id, \$acc, \$label, \$version, \$desc, \$info, \$object_xref_id, \$ensembl_id, \$ensembl_type,
356  \$query_identity, \$target_identity, \$hit_start, \$hit_end, \$translation_start, \$translation_end, \$cigar_line, \$score, \$evalue);
357  my $last_xref = 0;
358  while($seq_sth->fetch){
359  if($last_xref != $xref_id){
360  push @xref_list, $xref_id;
361  $count++;
362  $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from, $core_dbi);
363  $last_xref = $xref_id;
364  }
365  $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}, $core_dbi);
366  $add_identity_xref_sth->execute( ($object_xref_id+$object_xref_offset), $query_identity, $target_identity, $hit_start, $hit_end,
367  $translation_start, $translation_end, $cigar_line, $score, $evalue) if $translation_start;
368  }
369  print "DIRECT $count\n" if ($verbose);
370  }
371  ### IF CHECKSUM, xref, object_xref
372  # 1:m mapping between object & xref
373  elsif($type eq 'CHECKSUM') {
374  #If we had a checksum then get the analysis. Avoids unecessary analysis entries
375  if(! defined $checksum_analysis_id) {
376  $checksum_analysis_id = $self->get_single_analysis('xrefchecksum');
377  }
378  my $count = 0;
379  $dir_sth->execute($source_id, $type);
380  my $last_xref = 0;
381  while(my $row = $dir_sth->fetchrow_arrayref()) {
382  my ($xref_id, $acc, $label, $version, $desc, $info, $object_xref_id, $ensembl_id, $ensembl_type) = @{$row};
383  if($last_xref != $xref_id) {
384  push @xref_list, $xref_id;
385  $count++;
386  $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from, $core_dbi);
387  $last_xref = $xref_id;
388  }
389  $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $checksum_analysis_id, $core_dbi);
390  }
391  print "CHECKSUM $count\n" if ($verbose);
392  }
393 
394  ### If DEPENDENT, xref, object_xref , dependent_xref (order by xref_id) # maybe linked to more than one?
395 
396  elsif($type eq "DEPENDENT"){
397  my $count = 0;
398  my $ox_count = 0;
399  my @master_problems;
400  my $err_master_count=0;
401  $dependent_sth->execute($source_id, $type);
402  my ($xref_id, $acc, $label, $version, $desc, $info, $object_xref_id, $ensembl_id, $ensembl_type, $master_xref_id);
403  $dependent_sth->bind_columns(\$xref_id, \$acc, \$label, \$version, \$desc, \$info, \$object_xref_id, \$ensembl_id, \$ensembl_type, \$master_xref_id);
404  my $last_xref = 0;
405  my $last_ensembl = 0;
406  while($dependent_sth->fetch){
407  if($last_xref != $xref_id){
408  push @xref_list, $xref_id;
409  $count++;
410  $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label || $acc, $version, $desc, $type, $info || $where_from, $core_dbi);
411  }
412  if($last_xref != $xref_id or $last_ensembl != $ensembl_id){
413  $object_xref_id = $self->add_object_xref($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}, $core_dbi);
414  if (defined($master_xref_id)){ # need to sort this out for FlyBase since there are EMBL direct entries from the GFF and dependent xrefs from Uniprot
415  $add_dependent_xref_sth->execute(($object_xref_id+$object_xref_offset), ($master_xref_id+$xref_offset), ($xref_id+$xref_offset) );
416  }
417  else{
418  if($err_master_count < 10){
419  push @master_problems, $acc;
420  }
421  $err_master_count++;
422  }
423  $ox_count++;
424  }
425  $last_xref = $xref_id;
426  $last_ensembl = $ensembl_id;
427  }
428  if(@master_problems){
429  print "WARNING:: for $name $err_master_count problem master xrefs\nExamples are :-\t";
430  print join ", ",@master_problems;
431  print "\n";
432  }
433  print "DEP $count xrefs, $ox_count object_xrefs\n" if ($verbose);
434  }
435  ### If SEQUENCE_MATCH xref, object_xref, identity_xref (order by xref_id) # maybe linked to more than one?
436 
437  elsif($type eq "SEQUENCE_MATCH"){
438  my $count = 0;
439  $seq_sth->execute($source_id, $type);
440  my ($xref_id, $acc, $label, $version, $desc, $info, $object_xref_id, $ensembl_id, $ensembl_type);
441  my ( $query_identity, $target_identity, $hit_start, $hit_end, $translation_start, $translation_end, $cigar_line, $score, $evalue);
442  $seq_sth->bind_columns(\$xref_id, \$acc, \$label, \$version, \$desc, \$info, \$object_xref_id, \$ensembl_id, \$ensembl_type,
443  \$query_identity, \$target_identity, \$hit_start, \$hit_end, \$translation_start, \$translation_end, \$cigar_line, \$score, \$evalue);
444  my $last_xref = 0;
445  while($seq_sth->fetch){
446  if($last_xref != $xref_id){
447  push @xref_list, $xref_id;
448  $count++;
449  $xref_id = $self->add_xref($xref_offset, $xref_id, $ex_id, $acc, $label, $version, $desc, $type, $info || $where_from, $core_dbi);
450  $last_xref = $xref_id;
451  }
452  $object_xref_id = $self->add_object_xref ($object_xref_offset, $object_xref_id, $ensembl_id, $ensembl_type, ($xref_id+$xref_offset), $analysis_ids{$ensembl_type}, $core_dbi);
453  $add_identity_xref_sth->execute( ($object_xref_id+$object_xref_offset), $query_identity, $target_identity, $hit_start, $hit_end,
454  $translation_start, $translation_end, $cigar_line, $score, $evalue);
455  }
456  print "SEQ $count\n" if ($verbose);
457  }
458  else{
459  print "PROBLEM:: what type is $type\n";
460  }
461 
462 
463  # Transfer data for synonym and set xref database xrefs to dumped.
464  if(@xref_list){
465  my $syn_count = 0;
466  my $syn_sql = "select xref_id, synonym from synonym where xref_id in(".join(", ",@xref_list).")";
467  my $syn_sth = $xref_dbi->prepare($syn_sql);
468  $syn_sth->execute();
469 
470  my ($xref_id, $syn);
471  $syn_sth->bind_columns(\$xref_id, \$syn);
472  while($syn_sth->fetch()){
473  $add_syn_sth->execute(($xref_id+$xref_offset), $syn);
474  $syn_count++;
475  }
476  $syn_sth->finish;
477 
478  print "\tadded $syn_count synonyms\n" if($syn_count);
479  my $xref_dumped_sth = $xref_dbi->prepare("update xref set dumped = 'MAPPED' where xref_id in (".join(", ",@xref_list).")");
480  $xref_dumped_sth->execute() || die "Could not set dumped status";
481  $xref_dumped_sth->finish;
482  }
483 
484  # Update the core databases release in for source form the xref database
485  if(defined($release_info) and $release_info ne "1"){
486  $add_release_info_sth->execute($release_info, $ex_id) || die "Failed to add release info **$release_info** for external source $ex_id\n";
487  }
488  }
489  $sth->finish;
490  $seq_sth->finish;
491  $dir_sth->finish;
492  $transaction_end_sth->execute();
493 
494 
495  #######################################
496  # Remember to do unmapped entries
497  ########################################
498 
499 
500  ##########
501  # DIRECT #
502  ##########
503 
504  $sql =(<<DIR);
505  UPDATE source s,xref x
506  LEFT JOIN object_xref ox ON ox.xref_id = x.xref_id
507  SET dumped = 'UNMAPPED_NO_STABLE_ID'
508  WHERE x.source_id = s.source_id
509  AND x.dumped is null
510  AND ox.ox_status != 'FAILED_PRIORITY'
511  AND x.info_type = 'DIRECT'
512 DIR
513 
514  my $direct_unmapped_sth = $xref_dbi->prepare($sql);
515  $direct_unmapped_sth->execute();
516 
517  ########
518  # MISC #
519  ########
520 
521  $sql =(<<MIS);
522  UPDATE xref x, source s
523  SET dumped = 'UNMAPPED_NO_MAPPING'
524  WHERE x.source_id = s.source_id
525  AND x.dumped is null
526  AND x.info_type = 'MISC'
527 MIS
528 
529  my $misc_unmapped_sth = $xref_dbi->prepare($sql);
530  $misc_unmapped_sth->execute();
531 
532  #############
533  # DEPENDENT #
534  #############
535 
536  $sql = (<<DEP);
537  UPDATE xref mx, source s, xref x
538  LEFT JOIN dependent_xref dx ON dx.dependent_xref_id = x.xref_id
539  LEFT JOIN object_xref ox ON ox.xref_id = x.xref_id
540  SET x.dumped = 'UNMAPPED_MASTER_FAILED'
541  WHERE x.source_id = s.source_id
542  AND dx.master_xref_id = mx.xref_id
543  AND x.dumped is null
544  AND ox.ox_status != 'FAILED_PRIORITY'
545  AND x.info_type = 'DEPENDENT'
546 DEP
547 
548  my $dep_unmapped_sth = $xref_dbi->prepare($sql);
549  $dep_unmapped_sth->execute();
550 
551  ##################
552  # SEQUENCE_MATCH #
553  ##################
554 
555  $sql = (<<SEQ);
556  UPDATE source s, primary_xref px, xref x
557  LEFT JOIN object_xref ox ON ox.xref_id = x.xref_id
558  LEFT JOIN identity_xref ix ON ix.object_xref_id = ox.object_xref_id
559  SET x.dumped = 'UNMAPPED_NO_MAPPING'
560  WHERE x.source_id = s.source_id
561  AND px.xref_id = x.xref_id
562  AND x.dumped is null
563  AND x.info_type = 'SEQUENCE_MATCH'
564 SEQ
565 
566  my $seq_unmapped_sth = $xref_dbi->prepare($sql);
567  $seq_unmapped_sth->execute();
568 
569  ###########################
570  # WEL (What ever is left).#
571  ###########################
572 
573  # These are those defined as dependent but the master never existed and the xref and their descriptions etc are loaded first
574  # with the dependencys added later so did not know they had no masters at time of loading.
575  # (e.g. EntrezGene, WikiGene, MIN_GENE, MIM_MORBID)
576 
577  $sql = (<<WEL);
578  UPDATE source s, xref x
579  SET dumped = 'UNMAPPED_NO_MASTER'
580  WHERE x.source_id = s.source_id
581  AND x.dumped is null
582  AND x.info_type = 'DEPENDENT'
583 WEL
584 
585 
586  my $wel_unmapped_sth = $xref_dbi->prepare($sql);
587  $wel_unmapped_sth->execute();
588 
589  my $sth_stat = $xref_dbi->prepare("insert into process_status (status, date) values('core_loaded',now())");
590  $sth_stat->execute();
591  $sth_stat->finish;
592 
593 
594 
595 }
596 
597 
598 sub get_analysis{
599  my $self = shift;
600  my %typeToLogicName = ( 'Gene' => 'xrefexoneratedna',
601  'Transcript' => 'xrefexoneratedna',
602  'Translation' => 'xrefexonerateprotein');
603  my %analysis_id;
604  foreach my $key (qw(Gene Transcript Translation)){
605  my $logic_name = $typeToLogicName{$key};
606  $analysis_id{$key} = $self->get_single_analysis($logic_name);
607  }
608  return %analysis_id;
609 }
610 
611 sub get_single_analysis {
612  my ($self, $logic_name) = @_;
613  my $h = $self->core->dbc()->sql_helper();
614  my $analysis_ids = $h->execute_simple(
615  -SQL => 'SELECT analysis_id FROM analysis WHERE logic_name=?',
616  -PARAMS => [$logic_name]
617  );
618  my $analysis_id;
619 
620  if(@{$analysis_ids}) {
621  $analysis_id = $analysis_ids->[0];
622  }
623  else {
624  print "No analysis with logic_name $logic_name found, creating ...\n" if ($self->verbose);
625  # TODO - other fields in analysis table
626  $self->core()->dbc()->sql_helper()->execute_update(
627  -SQL => 'INSERT INTO analysis (logic_name, created) VALUES (?,NOW())',
628  -PARAMS => [$logic_name],
629  -CALLBACK => sub {
630  my ($sth) = @_;
631  $analysis_id = $sth->{'mysql_insertid'};
632  return;
633  }
634  );
635  }
636 
637  return $analysis_id;
638 }
639 
640 
641 sub add_xref {
642  my ($self, $offset, $xref_id, $external_db_id, $dbprimary_acc, $display_label, $version, $description, $info_type, $info_text, $dbc) = @_;
643  my $select_sth = $dbc->prepare("select xref_id from xref where dbprimary_acc = ? and external_db_id = ? and info_type = ? and info_text = ? and version = ?");
644  my $insert_sth = $dbc->prepare("insert into xref (xref_id, external_db_id, dbprimary_acc, display_label, version, description, info_type, info_text) values (?, ?, ?, ?, ?, ?, ?, ?)");
645  my $new_xref_id;
646  $select_sth->execute($dbprimary_acc, $external_db_id, $info_type, $info_text, $version);
647  $select_sth->bind_columns(\$new_xref_id);
648  $select_sth->fetch();
649  if (!$new_xref_id) {
650  $insert_sth->execute(($xref_id+$offset), $external_db_id, $dbprimary_acc, $display_label, $version, $description, $info_type, $info_text);
651  return $xref_id;
652  } else {
653  return $new_xref_id - $offset;
654  }
655 }
656 
657 sub add_object_xref {
658  my ($self, $offset, $object_xref_id, $ensembl_id, $ensembl_object_type, $xref_id, $analysis_id, $dbc) = @_;
659  my $select_sth = $dbc->prepare("select object_xref_id from object_xref where xref_id = ? and ensembl_object_type = ? and ensembl_id = ? and analysis_id = ?");
660  my $insert_sth = $dbc->prepare("insert ignore into object_xref (object_xref_id, ensembl_id, ensembl_object_type, xref_id, analysis_id) values (?, ?, ?, ?, ?)");
661  my $new_object_xref_id;
662  $select_sth->execute($xref_id, $ensembl_object_type, $ensembl_id, $analysis_id);
663  $select_sth->bind_columns(\$new_object_xref_id);
664  $select_sth->fetch();
665  if (!$new_object_xref_id) {
666  $insert_sth->execute(($object_xref_id+$offset), $ensembl_id, $ensembl_object_type, $xref_id, $analysis_id);
667  return $object_xref_id;
668  } else {
669  return $new_object_xref_id - $offset;
670  }
671 }
672 
673 
674 
675 1;
XrefMapper::db::dbc
public dbc()
XrefMapper::BasicMapper
Definition: BasicMapper.pm:8
XrefMapper::BasicMapper::core
public XrefMapper::db core()