ensembl-hive  2.8.1
BaseParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefParser::BaseParser;
21 
22 use strict;
23 use warnings;
24 
28 use Carp;
29 use DBI;
30 use Getopt::Long;
31 
32 my $base_dir = File::Spec->curdir();
33 
34 my %xref_dependent_mapped;
35 
36 
37 my $verbose;
38 
39 
40 ###################################################
41 # Create new object.
42 # set global $verbose
43 # Store the dbi form the database for easy access
44 ###################################################
45 sub new
46 {
47  my ($proto, $database, $is_verbose) = @_;
48  my $dbh;
49  if(!defined $database){
50  croak 'No database specified';
51  } elsif ( $database->isa('XrefParser::Database') ) {
52  $dbh = $database->dbi;
53  } elsif ( $database->isa('DBI::db') ) {
54  $dbh = $database;
55  } else {
56  croak sprintf 'I do not recognise your %s class. It must be XrefParser::Database or a DBI $dbh instance'."\n",ref $database;
57  }
58  $verbose = $is_verbose;
59 
60  my $class = ref $proto || $proto;
61  my $self = bless {}, $class;
62  $self->dbi($dbh);
63  return $self;
64 }
65 
66 
67 ##################################
68 # Getter/Setter for the dbi object
69 ##################################
70 sub dbi {
71  my ($self, $arg) = @_;
72 
73  (defined $arg) &&
74  ($self->{_dbi} = $arg );
75  return $self->{_dbi};
76 }
77 
78 
79 #######################################################################
80 # Given a file name, returns a IO::Handle object. If the file is
81 # gzipped, the handle will be to an unseekable stream coming out of a
82 # zcat pipe. If the given file name doesn't correspond to an existing
83 # file, the routine will try to add '.gz' to the file name or to remove
84 # any .'Z' or '.gz' and try again, printing a warning to stderr if the
85 # alternative name does match an existing file.
86 # Possible error states:
87 # - throws if no file name has been given, or if neither the provided
88 # name nor the alternative one points to an existing file
89 # - returns undef if an uncompressed file could not be opened for
90 # reading, or if the command 'zcat' is not in the search path
91 # - at least on Perl installations with multi-threading enabled,
92 # returns a VALID FILE HANDLE if zcat can be called but it cannot
93 # open the target file for reading - zcat will only report an error
94 # upon the first attempt to read from the handle
95 #######################################################################
97 {
98  my ($self, $file_name) = @_;
99 
100  my $io =undef;
101 
102  if(!(defined $file_name) or $file_name eq ''){
103  confess "No file name";
104  }
105  my $alt_file_name = $file_name;
106  $alt_file_name =~ s/\.(gz|Z)$//x;
107 
108  if ( $alt_file_name eq $file_name ) {
109  $alt_file_name .= '.gz';
110  }
111 
112  if ( !-e $file_name ) {
113  if ( ! -e $alt_file_name ) {
114  confess "Could not find either '$file_name' or '$alt_file_name'";
115  }
116  carp( "File '$file_name' does not exist, "
117  . "will try '$alt_file_name'" );
118  $file_name = $alt_file_name;
119  }
120 
121  if ( $file_name =~ /\.(gz|Z)$/x ) {
122  # Read from zcat pipe
123  $io = IO::File->new("zcat $file_name |")
124  or carp("Can not call 'zcat' to open file '$file_name'");
125  } else {
126  # Read file normally
127  $io = IO::File->new($file_name)
128  or carp("Can not open file '$file_name'");
129  }
130 
131  if ( !defined $io ) { return }
132 
133  if ($verbose) {
134  print "Reading from '$file_name'...\n" || croak 'Could not print out message';
135  }
136 
137  return $io;
138 }
139 
140 
141 #############################################
142 # Get source ID for a particular source name
143 #
144 # Arg[1] source name
145 # Arg[2] priority description
146 #
147 # Returns source_id, or throws if not found
148 #############################################
149 sub get_source_id_for_source_name {
150  my ($self, $source_name,$priority_desc, $dbi) = @_;
151  $dbi = $self->dbi unless defined $dbi;
152 
153  my $low_name = lc $source_name;
154  my $sql = "SELECT source_id FROM source WHERE LOWER(name)='$low_name'";
155  if(defined $priority_desc){
156  $low_name = lc $priority_desc;
157  $sql .= " AND LOWER(priority_description)='$low_name'";
158  $source_name .= " ($priority_desc)";
159  }
160  my $sth = $dbi->prepare($sql);
161  $sth->execute();
162  my @row = $sth->fetchrow_array();
163  my $source_id;
164  if (@row) {
165  $source_id = $row[0];
166  } else {
167  my $msg = "No source_id for source_name='${source_name}'";
168  if ( defined $priority_desc ) {
169  $msg .= "priority_desc='${priority_desc}'";
170  }
171  confess $msg;
172  }
173  $sth->finish();
174  return $source_id;
175 }
176 
177 
178 
179 ############################################################
180 # Get a set of source IDs matching a source name pattern
181 #
182 # Adds % to each end of the source name and doe a like query
183 # to find all the matching source names source_ids.
184 #
185 # Returns an empty list if none found.
186 ############################################################
187 sub get_source_ids_for_source_name_pattern {
188 
189  my ($self, $source_name, $dbi) = @_;
190  $dbi = $self->dbi unless defined $dbi;
191 
192  my $big_name = uc $source_name;
193  my $sql = "SELECT source_id FROM source WHERE upper(name) LIKE '%${big_name}%'";
194 
195  my $sth = $dbi->prepare($sql);
196  my @sources;
197  $sth->execute();
198  while(my @row = $sth->fetchrow_array()){
199  push @sources,$row[0];
200  }
201  $sth->finish;
202 
203  return @sources;
204 
205 }
206 
207 
208 ###############################
209 # From a source_id get the name
210 ###############################
211 sub get_source_name_for_source_id {
212  my ($self, $source_id, $dbi) = @_;
213  $dbi = $self->dbi unless defined $dbi;
214  my $source_name;
215 
216  my $sql = "SELECT name FROM source WHERE source_id= '$source_id'";
217  my $sth = $dbi->prepare($sql);
218  $sth->execute();
219  my @row = $sth->fetchrow_array();
220  if (@row) {
221  $source_name = $row[0];
222  } else {
223  carp "There is no entity with source-id $source_id in the source-table of the \n";
224  carp "xref-database. The source-id and the name of the source-id is hard-coded in populate_metadata.sql\n" ;
225  carp "and in the parser\n";
226  carp "Couldn't get source name for source ID $source_id\n";
227  $source_name = '-1';
228  }
229  $sth->finish;
230  return $source_name;
231 }
232 
233 
234 
235 ####################################################
236 # Get a hash to go from accession of a dependent xref
237 # to master_xref_id for all of source names given
238 #####################################################
239 sub get_valid_xrefs_for_dependencies{
240  my ($self, $dependent_name, $dbi, @reverse_ordered_source_list) = @_;
241  $dbi = $self->dbi unless defined $dbi;
242 
243  my %dependent_2_xref;
244 
245  my $sql = 'select source_id from source where LOWER(name) =?';
246  my $sth = $dbi->prepare($sql);
247  my @dependent_sources;
248  $sth->execute(lc $dependent_name);
249  while(my @row = $sth->fetchrow_array()){
250  push @dependent_sources,$row[0];
251  }
252 
253  my @sources;
254  foreach my $name (@reverse_ordered_source_list){
255  $sth->execute(lc $name);
256  while(my @row = $sth->fetchrow_array()){
257  push @sources,$row[0];
258  }
259  }
260  $sth->finish;
261 
262  my $dep_sql = (<<'DSS');
263  SELECT d.master_xref_id, x2.accession
264  FROM dependent_xref d, xref x1, xref x2
265  WHERE x1.xref_id = d.master_xref_id AND
266  x1.source_id = ? AND
267  x2.xref_id = d.dependent_xref_id AND
268  x2.source_id = ?
269 DSS
270 
271  $sth = $dbi->prepare($dep_sql);
272  foreach my $d (@dependent_sources){
273  foreach my $s (@sources){
274  $sth->execute($s,$d);
275  while(my @row = $sth->fetchrow_array()){
276  $dependent_2_xref{$row[1]} = $row[0];
277  }
278  }
279  }
280  $sth->finish;
281  return \%dependent_2_xref;
282 }
283 
284 
285 
286 ####################################################
287 # Get a hash to go from accession of a direct xref
288 # to master_xref_id for all of source names given
289 #####################################################
290 sub get_valid_xrefs_for_direct_xrefs{
291  my ($self, $direct_name, $separator, $dbi) = @_;
292  $dbi = $self->dbi unless defined $dbi;
293 
294  my %direct_2_xref;
295 
296  my $sql = 'select source_id from source where name like ?';
297  my $sth = $dbi->prepare($sql);
298  my @direct_sources;
299  $sth->execute("${direct_name}%");
300  while(my @row = $sth->fetchrow_array()){
301  push @direct_sources,$row[0];
302  }
303  $sth->finish;
304 
305  my $gen_sql =(<<"GDS");
306 SELECT d.general_xref_id, d.ensembl_stable_id, 'TYPE', d.linkage_xref, x1.accession
307  FROM TABLE_direct_xref d, xref x1
308  WHERE x1.xref_id = d.general_xref_id AND
309  x1.source_id=?
310 GDS
311 
312  my @sth;
313  my $i=0;
314  foreach my $type (qw(Gene Transcript Translation)){
315  my $t_sql = $gen_sql;
316  my $table = lc $type;
317  $t_sql =~ s/TABLE/$table/xsm;
318  $t_sql =~ s/TYPE/$type/xsm;
319 
320  $sth[$i++] = $dbi->prepare($t_sql);
321  }
322 
323  foreach my $d (@direct_sources){
324  for my $ii (0..2) {
325  $sth[$ii]->execute($d);
326  while(my ($gen_xref_id, $stable_id, $type, $link, $acc) = $sth[$ii]->fetchrow_array()){
327  $direct_2_xref{$acc} = $gen_xref_id.$separator.$stable_id.$separator.$type.$separator.$link;
328  }
329  $sth[$ii]->finish();
330  }
331  }
332 
333  return \%direct_2_xref;
334 }
335 
336 
337 #############################################
338 # Get a hash of label to acc for a particular
339 # source name and species_id
340 #############################################
341 sub label_to_acc{
342 
343  my ($self,$source_name,$species_id, $dbi) =@_;
344  $dbi = $self->dbi unless defined $dbi;
345 
346  # First cache synonyms so we can quickly add them later
347  my %synonyms;
348  my $syn_sth = $dbi->prepare('SELECT xref_id, synonym FROM synonym');
349  $syn_sth->execute();
350 
351  my ($xref_id, $synonym);
352  $syn_sth->bind_columns(\$xref_id, \$synonym);
353  while ($syn_sth->fetch()) {
354 
355  push @{$synonyms{$xref_id}}, $synonym;
356 
357  }
358  $syn_sth->finish;
359 
360  my %valid_codes;
361  my @sources;
362 
363  my $big_name = uc $source_name;
364  my $sql = "select source_id from source where upper(name) like '%${big_name}%'";
365  my $sth = $dbi->prepare($sql);
366  $sth->execute();
367  while(my @row = $sth->fetchrow_array()){
368  push @sources,$row[0];
369  }
370  $sth->finish;
371 
372  foreach my $source (@sources){
373  $sql = "select label, xref_id from xref where species_id = $species_id and source_id = $source";
374  $sth = $dbi->prepare($sql);
375  $sth->execute();
376  while(my @row = $sth->fetchrow_array()){
377  $valid_codes{$row[0]} =$row[1];
378  # add any synonyms for this xref as well
379  foreach my $syn (@{$synonyms{$row[1]}}) {
380  $valid_codes{$syn} = $row[1];
381  }
382  }
383  }
384  $sth->finish;
385  return \%valid_codes;
386 }
387 
388 
389 ####################################################
390 # get_valid_codes
391 #
392 # hash of accessions to array of xref dbIDs.
393 # This is an array becouse more than one entry can
394 # exist. i.e. for uniprot and refseq we have direct
395 # and sequence match sets and we need to give both.
396 #
397 # This list is a cache of acceptable IDs in order to
398 # reduce querying when attaching external accessions
399 # to xrefs.
400 ####################################################
401 sub get_valid_codes{
402 
403  my ($self,$source_name,$species_id, $dbi) =@_;
404 
405  my %valid_codes;
406  my @sources;
407  $dbi = $self->dbi unless defined $dbi;
408 
409  my $big_name = uc $source_name;
410  my $sql = "select source_id from source where upper(name) like '%$big_name%'";
411  my $sth = $dbi->prepare($sql);
412  $sth->execute();
413  while(my @row = $sth->fetchrow_array()){
414  push @sources,$row[0];
415  }
416  $sth->finish;
417 
418  foreach my $source (@sources){
419  $sql = "select accession, xref_id from xref where species_id = $species_id and source_id = $source";
420  $sth = $dbi->prepare($sql);
421  $sth->execute();
422  while(my @row = $sth->fetchrow_array()){
423  push @{$valid_codes{$row[0]}}, $row[1];
424  }
425  }
426  $sth->finish();
427  return \%valid_codes;
428 }
429 
430 ##############################
431 # Upload xrefs to the database
432 ##############################
433 sub upload_xref_object_graphs {
434  my ($self, $rxrefs, $dbi) = @_;
435 
436  my $count = scalar @{$rxrefs};
437  if($verbose) {
438  print "count = $count\n" || croak 'Could not print out count';
439  }
440 
441  if ($count) {
442 
443  #################
444  # upload new ones
445  ##################
446  if ($verbose) {
447  print "Uploading xrefs\n"
448  || croak 'Could not print string';
449  }
450 
451 
452  #################################################################################
453  # Start of sql needed to add xrefs, primary_xrefs, synonym, dependent_xrefs etc..
454  #################################################################################
455  $dbi = $self->dbi unless defined $dbi;
456  my $xref_sth = $dbi->prepare('INSERT INTO xref (accession,version,label,description,source_id,species_id, info_type) VALUES(?,?,?,?,?,?,?)');
457  my $pri_insert_sth = $dbi->prepare('INSERT INTO primary_xref VALUES(?,?,?,?)');
458  my $pri_update_sth = $dbi->prepare('UPDATE primary_xref SET sequence=? WHERE xref_id=?');
459  my $syn_sth = $dbi->prepare('INSERT IGNORE INTO synonym ( xref_id, synonym ) VALUES(?,?)');
460  my $xref_update_label_sth = $dbi->prepare('UPDATE xref SET label=? WHERE xref_id=?');
461  my $xref_update_descr_sth = $dbi->prepare('UPDATE xref SET description=? WHERE xref_id=?');
462  my $pair_sth = $dbi->prepare('INSERT INTO pairs VALUES(?,?,?)');
463  my $xref_id_sth = $dbi->prepare("SELECT xref_id FROM xref WHERE accession = ? AND source_id = ? AND species_id = ?");
464  my $primary_xref_id_sth = $dbi->prepare('SELECT xref_id FROM primary_xref WHERE xref_id=?');
465 
466 
467 
468  # disable error handling here as we'll do it ourselves
469  # reenabled it, as errorcodes are really unhelpful
470  $xref_sth->{RaiseError} = 0;
471  $xref_sth->{PrintError} = 0;
472 
473  #################################################################################
474  # End of sql needed to add xrefs, primary_xrefs, synonym, dependent_xrefs etc..
475  #################################################################################
476 
477 
478  foreach my $xref (@{$rxrefs}) {
479  my ($xref_id, $direct_xref_id);
480  if(!(defined $xref->{ACCESSION} )){
481  print "Your xref does not have an accession-number,so it can't be stored in the database\n"
482  || croak 'Could not write message';
483  return;
484  }
485 
486  ########################################
487  # Create entry in xref table and note ID
488  ########################################
489  if(! $xref_sth->execute($xref->{ACCESSION},
490  $xref->{VERSION} || 0,
491  $xref->{LABEL}|| $xref->{ACCESSION},
492  $xref->{DESCRIPTION},
493  $xref->{SOURCE_ID},
494  $xref->{SPECIES_ID},
495  $xref->{INFO_TYPE} || 'MISC')){
496  #
497  # if we failed to add the xref it must already exist so go find the xref_id for this
498  #
499  if(!(defined $xref->{SOURCE_ID})){
500  print "your xref: $xref->{ACCESSION} does not have a source-id\n";
501  return;
502  }
503  $xref_id_sth->execute(
504  $xref->{ACCESSION},
505  $xref->{SOURCE_ID},
506  $xref->{SPECIES_ID} );
507  $xref_id = ($xref_id_sth->fetchrow_array())[0];
508  if(defined $xref->{LABEL} ) {
509  $xref_update_label_sth->execute($xref->{LABEL},$xref_id) ;
510  }
511  if(defined $xref->{DESCRIPTION} ){
512  $xref_update_descr_sth->execute($xref->{DESCRIPTION},$xref_id);
513  }
514  }
515  else{
516  $xref_id_sth->execute(
517  $xref->{ACCESSION},
518  $xref->{SOURCE_ID},
519  $xref->{SPECIES_ID} );
520  $xref_id = ($xref_id_sth->fetchrow_array())[0];
521  }
522 
523  foreach my $direct_xref (@{$xref->{DIRECT_XREFS}}) {
524  $xref_sth->execute( $xref->{ACCESSION},
525  $xref->{VERSION} || 0,
526  $xref->{LABEL} || $xref->{ACCESSION},
527  $xref->{DESCRIPTION},
528  $direct_xref->{SOURCE_ID},
529  $xref->{SPECIES_ID},
530  $direct_xref->{LINKAGE_TYPE});
531  $xref_id_sth->execute(
532  $xref->{ACCESSION},
533  $direct_xref->{SOURCE_ID},
534  $xref->{SPECIES_ID} );
535  $direct_xref_id = ($xref_id_sth->fetchrow_array())[0];
536  $self->add_direct_xref($direct_xref_id, $direct_xref->{STABLE_ID}, $direct_xref->{ENSEMBL_TYPE},$direct_xref->{LINKAGE_TYPE}, $dbi);
537  }
538 
539  ################
540  # Error checking
541  ################
542  if(!((defined $xref_id) and $xref_id)){
543  print STDERR "xref_id is not set for :\n".
544  "$xref->{ACCESSION}\n$xref->{LABEL}\n".
545  "$xref->{DESCRIPTION}\n$xref->{SOURCE_ID}\n".
546  "$xref->{SPECIES_ID}\n";
547  }
548 
549 
550  #############################################################################
551  # create entry in primary_xref table with sequence; if this is a "cumulative"
552  # entry it may already exist, and require an UPDATE rather than an INSERT
553  #############################################################################
554  if(defined $xref->{SEQUENCE} ){
555  $primary_xref_id_sth->execute($xref_id) or croak( $dbi->errstr() );
556  my @row = $primary_xref_id_sth->fetchrow_array();
557  my $exists = $row[0];
558  if ( $exists ) {
559  $pri_update_sth->execute( $xref->{SEQUENCE}, $xref_id )
560  or croak( $dbi->errstr() );
561  } else {
562  $pri_insert_sth->execute( $xref_id, $xref->{SEQUENCE},
563  $xref->{SEQUENCE_TYPE},
564  $xref->{STATUS} )
565  or croak( $dbi->errstr() );
566  }
567  }
568 
569  ##########################################################
570  # if there are synonyms, add entries in the synonym table
571  ##########################################################
572  foreach my $syn ( @{ $xref->{SYNONYMS} } ) {
573  $syn_sth->execute( $xref_id, $syn )
574  or croak( $dbi->errstr() . "\n $xref_id\n $syn\n" );
575  }
576 
577  #######################################################################
578  # if there are dependent xrefs, add xrefs and dependent xrefs for them
579  #######################################################################
580  DEPENDENT_XREF:
581  foreach my $depref (@{$xref->{DEPENDENT_XREFS}}) {
582  my %dep = %{$depref};
583 
584  #####################################
585  # Insert the xref and get its xref_id
586  #####################################
587  # print "inserting $dep{ACCESSION},$dep{VERSION},$dep{LABEL},$dep{DESCRIPTION},$dep{SOURCE_ID},${\$xref->{SPECIES_ID}}\n";
588  my $dep_xref_id = $self->add_xref({
589  'acc' => $dep{ACCESSION},
590  'source_id' => $dep{SOURCE_ID},
591  'species_id' => $xref->{SPECIES_ID},
592  'label' => $dep{LABEL},
593  'desc' => $dep{DESCRIPTION},
594  'version' => $dep{VERSION},
595  'info_type' => 'DEPENDENT',
596  'dbi' => $dbi,
597  });
598  if( ! $dep_xref_id ) {
599  next DEPENDENT_XREF;
600  }
601 
602  #
603  # Add the linkage_annotation and source id it came from
604  #
605  $self->add_dependent_xref_maponly( $dep_xref_id,
606  $dep{LINKAGE_SOURCE_ID},
607  $xref_id,
608  $dep{LINKAGE_ANNOTATION});
609 
610  #########################################################
611  # if there are synonyms, add entries in the synonym table
612  #########################################################
613  foreach my $syn ( @{ $dep{SYNONYMS} } ) {
614  $syn_sth->execute( $dep_xref_id, $syn )
615  or croak( $dbi->errstr() . "\n $xref_id\n $syn\n" );
616  } # foreach syn
617 
618  } # foreach dep
619 
620  #################################################
621  # Add the pair data. refseq dna/pep pairs usually
622  #################################################
623  if(defined $xref_id and defined $xref->{PAIR} ){
624  $pair_sth->execute($xref->{SOURCE_ID},$xref->{ACCESSION},$xref->{PAIR});
625  }
626 
627 
628  ###########################
629  # tidy up statement handles
630  ###########################
631  if(defined $xref_sth) {$xref_sth->finish()};
632  if(defined $pri_insert_sth) {$pri_insert_sth->finish()} ;
633  if(defined $pri_update_sth) {$pri_update_sth->finish()};
634  if(defined $syn_sth) { $syn_sth->finish()};
635  if(defined $xref_update_label_sth) { $xref_update_label_sth->finish()};
636  if(defined $xref_update_descr_sth) { $xref_update_descr_sth->finish()};
637  if(defined $pair_sth) { $pair_sth->finish()};
638  if(defined $xref_id_sth) { $xref_id_sth->finish()};
639  if(defined $primary_xref_id_sth) { $primary_xref_id_sth->finish()};
640 
641  } # foreach xref
642 
643  }
644  return 1;
645 }
646 
647 ######################################################################################
648 # Add direct xref to the table XXX_direct_xref. (XXX -> Gene.Transcript or Translation
649 # Xref has to exist already, this module just adds ot yo the direct_xref table.
650 # $direct_xref is a reference to an array of hash objects.
651 ######################################################################################
652 sub upload_direct_xrefs{
653  my ($self, $direct_xref, $dbi) = @_;
654  $dbi = $self->dbi unless defined $dbi;
655  for my $dr(@{$direct_xref}) {
656 
657  ################################################
658  # Find the xref_id for this accession and source
659  ################################################
660  my $general_xref_id = $self->get_xref($dr->{ACCESSION},$dr->{SOURCE_ID},$dr->{SPECIES_ID}, $dbi);
661 
662  #######################################################
663  # If found add the direct xref else write error message
664  #######################################################
665  if ($general_xref_id){
666  $self->add_direct_xref($general_xref_id, $dr->{ENSEMBL_STABLE_ID},$dr->{ENSEMBL_TYPE},$dr->{LINKAGE_XREF}, $dbi);
667  }
668  else{
669  print {*STDERR} 'Problem Could not find accession '.$dr->{ACCESSION}.' for source '.$dr->{SOURCE}.
670  ' so not able to add direct xref to '.$dr->{ENSEMBL_STABLE_ID}."\n";
671  }
672  }
673  return;
674 }
675 
676 
677 
678 
679 ###############################################
680 # Insert into the meta table the key and value.
681 ###############################################
682 sub add_meta_pair {
683 
684  my ($self, $key, $value, $dbi) = @_;
685  $dbi = $self->dbi unless defined $dbi;
686 
687  my $sth = $dbi->prepare('insert into meta (meta_key, meta_value, date) values("'.$key.'", "'.$value.'", now())');
688  $sth->execute;
689  $sth->finish;
690  return;
691 }
692 
693 
694 
695 #################################################
696 # Create a hash of all the source names for xrefs
697 #################################################
698 sub get_xref_sources {
699 
700  my $self = shift;
701  my $dbi = shift;
702  $dbi = $self->dbi unless defined $dbi;
703  my %sourcename_to_sourceid;
704 
705  my $sth = $dbi->prepare('SELECT name,source_id FROM source');
706  $sth->execute() or croak( $dbi->errstr() );
707  while(my @row = $sth->fetchrow_array()) {
708  my $source_name = $row[0];
709  my $source_id = $row[1];
710  $sourcename_to_sourceid{$source_name} = $source_id;
711  }
712  $sth->finish;
713 
714  return %sourcename_to_sourceid;
715 }
716 
717 ########################################################################
718 # Create and return a hash that that goes from species_id to taxonomy_id
719 ########################################################################
720 sub species_id2taxonomy {
721 
722  my $self = shift;
723  my $dbi = shift;
724  $dbi = $self->dbi unless defined $dbi;
725 
726  my %species_id2taxonomy;
727 
728  my $sth = $dbi->prepare('SELECT species_id, taxonomy_id FROM species');
729  $sth->execute() or croak( $dbi->errstr() );
730  while(my @row = $sth->fetchrow_array()) {
731  my $species_id = $row[0];
732  my $taxonomy_id = $row[1];
733  if(defined $species_id2taxonomy{$species_id} ){
734  push @{$species_id2taxonomy{$species_id}}, $taxonomy_id;
735  }
736  else{
737  $species_id2taxonomy{$species_id} = [$taxonomy_id];
738  }
739  }
740  $sth->finish();
741  return %species_id2taxonomy;
742 }
743 
744 
745 
746 #########################################################################
747 # Create and return a hash that that goes from species_id to species name
748 #########################################################################
749 sub species_id2name {
750  my $self = shift;
751  my $dbi = shift;
752  $dbi = $self->dbi unless defined $dbi;
753 
754  my %species_id2name;
755 
756  my $sth = $dbi->prepare('SELECT species_id, name FROM species');
757  $sth->execute() or croak( $dbi->errstr() );
758  while ( my @row = $sth->fetchrow_array() ) {
759  my $species_id = $row[0];
760  my $name = $row[1];
761  $species_id2name{$species_id} = [ $name ];
762  }
763  $sth->finish();
764 
765  ##############################################
766  # Also populate the hash with all the aliases.
767  ##############################################
768  $sth = $dbi->prepare('SELECT species_id, aliases FROM species');
769  $sth->execute() or croak( $dbi->errstr() );
770  while ( my @row = $sth->fetchrow_array() ) {
771  my $species_id = $row[0];
772  foreach my $name ( split /,\s*/xms, $row[1] ) {
773  $species_id2name{$species_id} ||= [];
774  push @{$species_id2name{$species_id}}, $name;
775  }
776  }
777  $sth->finish();
778 
779  return %species_id2name;
780 } ## end sub species_id2name
781 
782 
783 ###########################################################################
784 # If there was an error, an xref with the same acc & source already exists.
785 # If so, find its ID, otherwise get ID of xref just inserted
786 ###########################################################################
787 sub get_xref_id {
788  my ($self, $arg_ref) = @_;
789  my $sth = $arg_ref->{sth} || croak 'Need a statement handle for get_xref_id';
790  my $acc = $arg_ref->{acc} || croak 'Need an accession for get_xref_id';
791  my $source = $arg_ref->{source_id} || croak 'Need an source_id for get_xref_id';
792  my $species = $arg_ref->{species_id} || confess 'Need an species_id for get_xref_id';
793  my $error = $arg_ref->{error};
794  my $dbi = $arg_ref->{dbi};
795 
796  my $id = $self->get_xref($acc, $source, $species, $dbi);
797 
798  return $id;
799 }
800 
801 ##################################################################
802 # If primary xref already exists for a partiuclar xref_id return 1
803 # else return 0;
804 ##################################################################
805 sub primary_xref_id_exists {
806 
807  my ($self, $xref_id, $dbi) = @_;
808  $dbi = $self->dbi unless defined $dbi;
809 
810  my $exists = 0;
811 
812  my $sth = $dbi->prepare('SELECT xref_id FROM primary_xref WHERE xref_id=?');
813  $sth->execute($xref_id) or croak( $dbi->errstr() );
814  my @row = $sth->fetchrow_array();
815  my $result = $row[0];
816  if (defined $result) {$exists = 1; }
817  $sth->finish();
818 
819  return $exists;
820 
821 }
822 
823 ############################################
824 # Get the tax id for a particular species id
825 ############################################
826 sub get_taxonomy_from_species_id{
827  my ($self,$species_id, $dbi) = @_;
828  my %hash;
829 
830  $dbi = $self->dbi unless defined $dbi;
831  my $sth = $dbi->prepare("SELECT taxonomy_id FROM species WHERE species_id = $species_id");
832  $sth->execute() or croak( $dbi->errstr() );
833  while(my @row = $sth->fetchrow_array()) {
834  $hash{$row[0]} = 1;
835  }
836  $sth->finish;
837  return \%hash;
838 }
839 
840 
841 #################################################
842 # xref_ids for a given stable id and linkage_xref
843 #################################################
844 sub get_direct_xref{
845  my ($self,$stable_id,$type,$link, $dbi) = @_;
846  $dbi = $self->dbi unless defined $dbi;
847 
848  $type = lc $type;
849 
850  my $sql = "select general_xref_id from ${type}_direct_xref d where ensembl_stable_id = ? and linkage_xref ";
851  my @sql_params = ( $stable_id );
852  if ( defined $link ) {
853  $sql .= '= ?';
854  push @sql_params, $link;
855  }
856  else {
857  $sql .= 'is null';
858  }
859  my $direct_sth = $dbi->prepare($sql);
860 
861  $direct_sth->execute( @sql_params ) || croak( $dbi->errstr() );
862  if ( wantarray () ) {
863  # Generic behaviour
864 
865  my @results;
866 
867  my $all_rows = $direct_sth->fetchall_arrayref();
868  foreach my $row_ref ( @{ $all_rows } ) {
869  push @results, $row_ref->[0];
870  }
871 
872  return @results;
873  }
874  else {
875  # Backwards-compatible behaviour. FIXME: can we get rid of it?
876  # There seem to be no parsers present relying on the old behaviour
877  # any more
878  if ( my @row = $direct_sth->fetchrow_array() ) {
879  return $row[0];
880  }
881  }
882  $direct_sth->finish();
883  return;
884 }
885 
886 
887 ###################################################################
888 # return the xref_id for a particular accession, source and species
889 # if not found return undef;
890 ###################################################################
891 sub get_xref{
892  my ($self,$acc,$source, $species_id, $dbi) = @_;
893  $dbi = $self->dbi unless defined $dbi;
894 
895  #
896  # If the statement handle does nt exist create it.
897  #
898  my $sql = 'select xref_id from xref where accession = ? and source_id = ? and species_id = ?';
899  my $get_xref_sth = $dbi->prepare($sql);
900 
901  #
902  # Find the xref_id using the sql above
903  #
904  $get_xref_sth->execute( $acc, $source, $species_id ) or croak( $dbi->errstr() );
905  if(my @row = $get_xref_sth->fetchrow_array()) {
906  return $row[0];
907  }
908  $get_xref_sth->finish();
909  return;
910 }
911 
912 ###################################################################
913 # return the object_xref_id for a particular xref_id, ensembl_id and ensembl_object_type
914 # if not found return undef;
915 ###################################################################
916 sub get_object_xref {
917  my ($self, $xref_id, $ensembl_id, $object_type, $dbi) = @_;
918  $dbi = $self->dbi unless defined $dbi;
919 
920  my $sql = 'select object_xref_id from object_xref where xref_id = ? and ensembl_object_type = ? and ensembl_id = ?';
921  my $get_object_xref_sth = $dbi->prepare($sql);
922 
923  #
924  # Find the object_xref_id using the sql above
925  #
926  $get_object_xref_sth->execute( $xref_id, $ensembl_id, $object_type) or croak( $dbi->errstr() );
927  if(my @row = $get_object_xref_sth->fetchrow_array()) {
928  return $row[0];
929  }
930  $get_object_xref_sth->finish();
931  return;
932 }
933 
934 
935 ###########################################################
936 # Create an xref..
937 # If it already exists it return that xrefs xref_id
938 # else creates it and return the new xre_id
939 ###########################################################
940 sub add_xref {
941  my ( $self, $arg_ref) = @_;
942 
943  my $acc = $arg_ref->{acc} || croak 'add_xref needs aa acc';
944  my $source_id = $arg_ref->{source_id} || croak 'add_xref needs a source_id';
945  my $species_id = $arg_ref->{species_id} || croak 'add_xref needs a species_id';
946  my $label = $arg_ref->{label} // $acc;
947  my $description = $arg_ref->{desc};
948  my $version = $arg_ref->{version} // 0;
949  my $info_type = $arg_ref->{info_type} // 'MISC';
950  my $info_text = $arg_ref->{info_text} // q{};
951  my $dbi = $arg_ref->{dbi};
952 
953  $dbi = $self->dbi unless defined $dbi;
954 
955  ##################################################################
956  # See if it already exists. It so return the xref_id for this one.
957  ##################################################################
958  my $xref_id = $self->get_xref($acc,$source_id, $species_id, $dbi);
959  if(defined $xref_id){
960  return $xref_id;
961  }
962 
963  my $add_xref_sth =
964  $dbi->prepare( 'INSERT INTO xref '
965  . '(accession,version,label,description,source_id,species_id, info_type, info_text) '
966  . 'VALUES(?,?,?,?,?,?,?,?)' );
967 
968  ######################################################################
969  # If the description is more than 255 characters, chop it off and add
970  # an indication that it has been truncated to the end of it.
971  ######################################################################
972  if (defined $description && ((length $description) > 255 ) ) {
973  my $truncmsg = ' /.../';
974  substr $description, 255 - (length $truncmsg),
975  length $truncmsg, $truncmsg;
976  }
977 
978 
979  ####################################
980  # Add the xref and croak if it fails
981  ####################################
982  $add_xref_sth->execute( $acc, $version || 0, $label,
983  $description, $source_id, $species_id, $info_type, $info_text
984  ) or croak("$acc\t$label\t\t$source_id\t$species_id\n");
985 
986  $add_xref_sth->finish();
987  return $add_xref_sth->{'mysql_insertid'};
988 } ## end sub add_xref
989 
990 
991 ###########################################################
992 # Create an object_xref..
993 # If it already exists it return the object_xref_id
994 # else creates it and returns the new object_xref_id
995 ###########################################################
996 
997 sub add_object_xref {
998  my ($self, $arg_ref) = @_;
999 
1000  my $xref_id = $arg_ref->{xref_id} || croak 'add_object_xref needs an xref_id';
1001  my $ensembl_id = $arg_ref->{ensembl_id} || croak 'add_object_xref needs a ensembl_id';
1002  my $object_type = $arg_ref->{object_type} || croak 'add_object_xref needs an object_type';
1003  my $dbi = $arg_ref->{dbi};
1004 
1005  $dbi = $self->dbi unless defined $dbi;
1006 
1007  ##################################################################
1008  # See if it already exists. It so return the xref_id for this one.
1009  ##################################################################
1010 
1011  my $object_xref_id = $self->get_object_xref($xref_id, $ensembl_id, $object_type, $dbi);
1012  if(defined $object_xref_id){
1013  return $object_xref_id;
1014  }
1015 
1016  my $add_object_xref_sth =
1017  $dbi->prepare( 'INSERT INTO object_xref'
1018  . '(ensembl_id, ensembl_object_type, xref_id) '
1019  . 'VALUES(?,?,?)' );
1020 
1021  ####################################
1022  # Add the object_xref and croak if it fails
1023  ####################################
1024  $add_object_xref_sth->execute($ensembl_id, $object_type, $xref_id
1025  ) or croak("$ensembl_id\t$object_type\t\t$xref_id\n");
1026 
1027  $add_object_xref_sth->finish();
1028  return $add_object_xref_sth->{'mysql_insertid'};
1029 }
1030 
1031 ###########################################################
1032 # Create an identity_xref
1033 ###########################################################
1034 
1035 sub add_identity_xref {
1036  my ($self, $arg_ref) = @_;
1037 
1038  my $object_xref_id = $arg_ref->{object_xref_id} || croak 'add_identity_xref needs an object_xref_id';
1039  my $score = $arg_ref->{score} || croak 'add_identity_xref needs a score';
1040  my $target_identity = $arg_ref->{target_identity} || croak 'add_identity_xref needs a target_identity';
1041  my $query_identity = $arg_ref->{query_identity} || croak 'add_identity_xref needs a query_identity';
1042  my $dbi = $arg_ref->{dbi};
1043 
1044  $dbi = $self->dbi unless defined $dbi;
1045 
1046  my $add_identity_xref_sth =
1047  $dbi->prepare( 'INSERT INTO identity_xref'
1048  . '(object_xref_id, score, query_identity, target_identity) '
1049  . 'VALUES(?,?,?,?)' );
1050 
1051  ####################################
1052  # Add the object_xref and croak if it fails
1053  ####################################
1054  $add_identity_xref_sth->execute($object_xref_id, $score, $query_identity, $target_identity
1055  ) or croak("$object_xref_id\t$score\t\t$query_identity\t$target_identity\n");
1056  $add_identity_xref_sth->finish();
1057  return;
1058 }
1059 
1060 
1061 ###################################################################
1062 # Create new xref if needed and add as a direct xref to a stable_id
1063 # Note that a corresponding method for dependent xrefs is called add_dependent_xref()
1064 ###################################################################
1065 sub add_to_direct_xrefs{
1066  my ($self, $arg_ref) = @_;
1067 
1068  my $stable_id = $arg_ref->{stable_id} || croak ('Need a direct_xref on which this xref linked too' );
1069  my $type = $arg_ref->{type} || croak ('Need a table type on which to add');
1070  my $acc = $arg_ref->{acc} || croak ('Need an accession of this direct xref' );
1071  my $source_id = $arg_ref->{source_id} || croak ('Need a source_id for this direct xref' );
1072  my $species_id = $arg_ref->{species_id} || croak ('Need a species_id for this direct xref' );
1073  my $version = $arg_ref->{version} // 0;
1074  my $label = $arg_ref->{label} // $acc;
1075  my $description = $arg_ref->{desc};
1076  my $linkage = $arg_ref->{linkage};
1077  my $info_text = $arg_ref->{info_text} // q{};
1078  my $dbi = $arg_ref->{dbi};
1079 
1080  $dbi = $self->dbi unless defined $dbi;
1081 
1082  my $sql = (<<'AXX');
1083  INSERT INTO xref (accession,version,label,description,source_id,species_id, info_type, info_text)
1084  VALUES (?,?,?,?,?,?,?,?)
1085 AXX
1086  my $add_xref_sth = $dbi->prepare($sql);
1087 
1088  ###############################################################
1089  # If the acc already has an xrefs find it else cretae a new one
1090  ###############################################################
1091  my $direct_id = $self->get_xref($acc, $source_id, $species_id, $dbi);
1092  if(!(defined $direct_id)){
1093  $add_xref_sth->execute(
1094  $acc, $version || 0, $label,
1095  $description, $source_id, $species_id, 'DIRECT', $info_text
1096  ) or croak("$acc\t$label\t\t$source_id\t$species_id\n");
1097  }
1098  $add_xref_sth->finish();
1099 
1100  $direct_id = $self->get_xref($acc, $source_id, $species_id, $dbi);
1101 
1102  #########################
1103  # Now add the direct info
1104  #########################
1105  $self->add_direct_xref($direct_id, $stable_id, $type, $linkage, $dbi);
1106  return;
1107 }
1108 
1109 
1110 ##################################################################
1111 # Add a single record to the direct_xref table.
1112 # Note that an xref must already have been added to the xref table
1113 # Note that a corresponding method for dependent xrefs is called add_dependent_xref_maponly()
1114 ##################################################################
1115 sub add_direct_xref {
1116  my ($self, $general_xref_id, $ensembl_stable_id, $ensembl_type, $linkage_type, $dbi, $update_info_type) = @_;
1117 
1118  $dbi = $self->dbi unless defined $dbi;
1119 
1120  # Check if such a mapping exists yet. Make sure get_direct_xref() is
1121  # invoked in list context, otherwise it will fall back to legacy
1122  # behaviour of returning a single xref_id even when multiple ones
1123  # match.
1124  # Note: get_direct_xref() does not currently cache its output,
1125  # consider changing this should performance become an issue
1126  my @existing_xref_ids = $self->get_direct_xref($ensembl_stable_id,
1127  $ensembl_type,
1128  $linkage_type,
1129  $dbi);
1130  if ( scalar grep { $_ == $general_xref_id } @existing_xref_ids ) {
1131  return;
1132  }
1133 
1134  $ensembl_type = lc($ensembl_type);
1135  my $sql = "INSERT INTO " . $ensembl_type . "_direct_xref VALUES (?,?,?)";
1136  my $add_direct_xref_sth = $dbi->prepare($sql);
1137 
1138  $add_direct_xref_sth->execute($general_xref_id, $ensembl_stable_id, $linkage_type);
1139  $add_direct_xref_sth->finish();
1140 
1141  if ( $update_info_type ) {
1142  $self->_update_xref_info_type( $general_xref_id, 'DIRECT', $dbi );
1143  }
1144 
1145  return;
1146 }
1147 
1148 
1149 ##########################################################
1150 # Create/Add xref and add it as a dependency of the master
1151 # Note that a corresponding method for direct xrefs is called add_to_direct_xrefs()
1152 ##########################################################
1153 sub add_dependent_xref{
1154  my ($self, $arg_ref) = @_;
1155 
1156  my $master_xref = $arg_ref->{master_xref_id} || croak( 'Need a master_xref_id on which this xref depends on' );
1157  my $acc = $arg_ref->{acc} || croak( 'Need an accession of this dependent xref' );
1158  my $source_id = $arg_ref->{source_id} || croak( 'Need a source_id for this dependent xref' );
1159  my $species_id = $arg_ref->{species_id} || croak( 'Need a species_id for this dependent xref' );
1160  my $version = $arg_ref->{version} // 0;
1161  my $label = $arg_ref->{label} // $acc;
1162  my $description = $arg_ref->{desc};
1163  my $linkage = $arg_ref->{linkage};
1164  my $info_text = $arg_ref->{info_text} // q{};
1165  my $dbi = $arg_ref->{dbi};
1166 
1167  $dbi = $self->dbi unless defined $dbi;
1168 
1169  my $sql = (<<'IXR');
1170 INSERT INTO xref
1171  (accession,version,label,description,source_id,species_id, info_type, info_text)
1172  VALUES (?,?,?,?,?,?,?,?)
1173 IXR
1174  my $add_xref_sth = $dbi->prepare($sql);
1175 
1176  ####################################################
1177  # Does the xref already exist. If so get its xref_id
1178  # else create it and get the new xref_id
1179  ####################################################
1180  my $dependent_id = $self->get_xref($acc, $source_id, $species_id, $dbi);
1181  if(!(defined $dependent_id)){
1182  $add_xref_sth->execute(
1183  $acc, $version, $label,
1184  $description, $source_id, $species_id, 'DEPENDENT', $info_text
1185  ) or croak("$acc\t$label\t\t$source_id\t$species_id\n");
1186  }
1187  $add_xref_sth->finish();
1188 
1189  ################################################
1190  # Croak if we have failed to create/get the xref
1191  ################################################
1192  $dependent_id = $self->get_xref($acc, $source_id, $species_id, $dbi);
1193  if ( !$dependent_id ) {
1194  croak("$acc\t$label\t\t$source_id\t$species_id\n");
1195  }
1196 
1197  ################################
1198  # Now add the dependency mapping
1199  ################################
1200  $self->add_dependent_xref_maponly( $dependent_id, $source_id,
1201  $master_xref, $linkage,
1202  $dbi );
1203 
1204  return $dependent_id;
1205 }
1206 
1207 
1208 ##################################################################
1209 # Add a single record to the dependent_xref table.
1210 # Note that an xref must already have been added to the xref table
1211 # Note that a corresponding method for direct xrefs is called add_direct_xref()
1212 ##################################################################
1213 
1214 sub add_dependent_xref_maponly {
1215  my ( $self, $dependent_id, $dependent_source_id, $master_id, $master_source_id, $dbi, $update_info_type ) = @_;
1216 
1217  $dbi //= $self->dbi;
1218 
1219  my $sql = (<<'ADX');
1220 INSERT INTO dependent_xref
1221  (master_xref_id,dependent_xref_id,linkage_annotation,linkage_source_id)
1222  VALUES (?,?,?,?)
1223 ADX
1224  my $add_dependent_xref_sth = $dbi->prepare($sql);
1225 
1226  # If the dependency cannot be found in %xref_dependent_mapped,
1227  # i.e. has not been set yet, add it
1228  if ( ( ! defined $xref_dependent_mapped{"$master_id|$dependent_id"} )
1229  || $xref_dependent_mapped{"$master_id|$dependent_id"} ne $master_source_id ) {
1230 
1231  $add_dependent_xref_sth->execute( $master_id, $dependent_id,
1232  $master_source_id, $dependent_source_id )
1233  || croak("$master_id\t$dependent_id\t$master_source_id\t$dependent_source_id");
1234 
1235  $xref_dependent_mapped{"$master_id|$dependent_id"} = $master_source_id;
1236  }
1237 
1238  $add_dependent_xref_sth->finish();
1239 
1240  if ( $update_info_type ) {
1241  $self->_update_xref_info_type( $dependent_id, 'DEPENDENT', $dbi );
1242  }
1243 
1244  return;
1245 }
1246 
1247 
1248 ##################################################################
1249 # Add synonyms for a particular accession for one or more sources.
1250 # This is for priority xrefs where we have more than one source
1251 # but want to write synonyms for each with the same accession
1252 ##################################################################
1253 sub add_to_syn_for_mult_sources{
1254  my ($self, $acc, $sources, $syn, $species_id, $dbi) = @_;
1255 
1256  $dbi = $self->dbi unless defined $dbi;
1257  my $add_synonym_sth = $dbi->prepare('INSERT IGNORE INTO synonym ( xref_id, synonym ) VALUES(?,?)');
1258 
1259  foreach my $source_id (@{$sources}){
1260  my $xref_id = $self->get_xref($acc, $source_id, $species_id, $dbi);
1261  if(defined $xref_id){
1262  $add_synonym_sth->execute( $xref_id, $syn )
1263  or croak( $dbi->errstr() . "\n $xref_id\n $syn\n" );
1264  }
1265  }
1266  $add_synonym_sth->finish();
1267  return;
1268 }
1269 
1270 
1271 ##########################################################
1272 # Add synomyn for an xref given by accession and source_id
1273 ##########################################################
1274 sub add_to_syn{
1275  my ($self, $acc, $source_id, $syn, $species_id, $dbi) = @_;
1276 
1277  $dbi = $self->dbi unless defined $dbi;
1278  my $add_synonym_sth = $dbi->prepare('INSERT IGNORE INTO synonym ( xref_id, synonym ) VALUES(?,?)');
1279  my $xref_id = $self->get_xref($acc, $source_id, $species_id, $dbi);
1280  if(defined $xref_id){
1281  $add_synonym_sth->execute( $xref_id, $syn )
1282  or croak( $dbi->errstr() . "\n $xref_id\n $syn\n" );
1283  }
1284  else {
1285  carp ( "Could not find acc $acc in "
1286  . "xref table source = $source_id of species $species_id\n" );
1287  }
1288  $add_synonym_sth->finish();
1289  return;
1290 }
1291 
1292 
1293 ##########################################
1294 # Add synomyn for an xref given by xref_id
1295 ##########################################
1296 sub add_synonym{
1297  my ($self, $xref_id, $syn, $dbi) = @_;
1298 
1299  $dbi = $self->dbi unless defined $dbi;
1300  my $add_synonym_sth = $dbi->prepare_cached('INSERT IGNORE INTO synonym ( xref_id, synonym ) VALUES(?,?)');
1301 
1302  $add_synonym_sth->execute( $xref_id, $syn )
1303  or croak( $dbi->errstr()."\n $xref_id\n $syn\n\n" );
1304 
1305  $add_synonym_sth->finish();
1306  return;
1307 }
1308 
1309 ########################################################
1310 # Create a hash that uses the label as a key
1311 # and the acc as the value. Also add synonyms for these
1312 # as keys.
1313 #######################################################
1314 sub get_label_to_acc{
1315  my ($self, $name, $species_id, $prio_desc, $dbi) = @_;
1316  my %hash1=();
1317 
1318  $dbi = $self->dbi unless defined $dbi;
1319 
1320  my $sql =(<<"GLA");
1321 SELECT xref.accession, xref.label
1322  FROM xref, source
1323  WHERE source.name LIKE '$name%' AND
1324  xref.source_id = source.source_id
1325 GLA
1326  if(defined $prio_desc){
1327  $sql .= " and source.priority_description like '$prio_desc'";
1328  }
1329  if(defined $species_id){
1330  $sql .= " and xref.species_id = $species_id";
1331  }
1332  my $sub_sth = $dbi->prepare($sql);
1333 
1334  $sub_sth->execute();
1335  while(my @row = $sub_sth->fetchrow_array()) {
1336  $hash1{$row[1]} = $row[0];
1337  }
1338 
1339 
1340  ####################
1341  # Remember synonyms
1342  ####################
1343 
1344  $sql =(<<"GLS");
1345 SELECT xref.accession, synonym.synonym
1346  FROM xref, source, synonym
1347  WHERE synonym.xref_id = xref.xref_id AND
1348  source.name like '$name%' AND
1349  xref.source_id = source.source_id
1350 GLS
1351 
1352  if(defined $prio_desc){
1353  $sql .= " AND source.priority_description LIKE '$prio_desc'";
1354  }
1355  if(defined $species_id){
1356  $sql .= " AND xref.species_id = $species_id";
1357  }
1358  $sub_sth = $dbi->prepare($sql);
1359 
1360  $sub_sth->execute();
1361  while(my @row = $sub_sth->fetchrow_array()) {
1362  $hash1{$row[1]} = $row[0];
1363  }
1364  $sub_sth->finish();
1365 
1366  return \%hash1;
1367 }
1368 
1369 ########################################################
1370 # Create a hash that uses the accession as a key
1371 # and the label as the value.
1372 #######################################################
1373 sub get_acc_to_label{
1374  my ($self, $name, $species_id, $prio_desc, $dbi) = @_;
1375  my %hash1=();
1376 
1377  $dbi = $self->dbi unless defined $dbi;
1378 
1379  my $sql =(<<"GLA");
1380 SELECT xref.accession, xref.label
1381  FROM xref, source
1382  WHERE source.name LIKE '$name%' AND
1383  xref.source_id = source.source_id
1384 GLA
1385  if(defined $prio_desc){
1386  $sql .= " and source.priority_description like '$prio_desc'";
1387  }
1388  if(defined $species_id){
1389  $sql .= " and xref.species_id = $species_id";
1390  }
1391  my $sub_sth = $dbi->prepare($sql);
1392 
1393  $sub_sth->execute();
1394  while(my @row = $sub_sth->fetchrow_array()) {
1395  $hash1{$row[0]} = $row[1];
1396  }
1397  $sub_sth->finish();
1398 
1399  return \%hash1;
1400 }
1401 
1402 
1403 ########################################################
1404 # Create a hash that uses the label as a key
1405 # and the desc as the value. Also add synonyms for these
1406 # as keys.
1407 #######################################################
1408 sub get_label_to_desc{
1409  my ($self, $name, $species_id, $prio_desc, $dbi) = @_;
1410  my %hash1=();
1411 
1412  $dbi = $self->dbi unless defined $dbi;
1413 
1414  my $sql =(<<"GDH");
1415  SELECT xref.description, xref.label
1416  FROM xref, source
1417  WHERE source.name LIKE '$name%' AND
1418  xref.source_id = source.source_id
1419 GDH
1420  if(defined $prio_desc){
1421  $sql .= " and source.priority_description like '$prio_desc'";
1422  }
1423  if(defined $species_id){
1424  $sql .= " and xref.species_id = $species_id";
1425  }
1426  my $sub_sth = $dbi->prepare($sql);
1427 
1428  $sub_sth->execute();
1429  while(my @row = $sub_sth->fetchrow_array()) {
1430  $hash1{$row[1]} = $row[0];
1431  }
1432 
1433  ###########################
1434  # Also include the synonyms
1435  ###########################
1436 
1437  my $syn_sql =(<<"GDS");
1438  SELECT xref.description, synonym.synonym
1439  FROM xref, source, synonym
1440  WHERE synonym.xref_id = xref.xref_id AND
1441  source.name like '$name%' AND
1442  xref.source_id = source.source_id
1443 GDS
1444 
1445  if(defined $prio_desc){
1446  $syn_sql .= " AND source.priority_description LIKE '$prio_desc'";
1447  }
1448  if(defined $species_id){
1449  $syn_sql .= " AND xref.species_id = $species_id";
1450  }
1451  $sub_sth = $dbi->prepare($syn_sql);
1452 
1453  $sub_sth->execute();
1454  while(my @row = $sub_sth->fetchrow_array()) {
1455  $hash1{$row[1]} = $row[0];
1456  }
1457  $sub_sth->finish();
1458 
1459  return \%hash1;
1460 }
1461 
1462 
1463 ########################################
1464 # Set release for a particular source_id.
1465 ########################################
1466 sub set_release{
1467  my ($self, $source_id, $s_release, $dbi ) = @_;
1468 
1469  $dbi = $self->dbi unless defined $dbi;
1470 
1471  my $sth =
1472  $dbi->prepare('UPDATE source SET source_release=? WHERE source_id=?');
1473 
1474  if($verbose) { print "Setting release to '$s_release' for source ID '$source_id'\n"; }
1475 
1476  $sth->execute( $s_release, $source_id );
1477  $sth->finish();
1478  return;
1479 }
1480 
1481 
1482 #############################################################################
1483 # create a hash of all the dependent mapping that exist for a given source_id
1484 # Of the format {master_xref_id|dependent_xref_id}
1485 #############################################################################
1486 sub get_dependent_mappings {
1487  my $self = shift;
1488  my $source_id = shift;
1489  my $dbi = shift;
1490 
1491  $dbi = $self->dbi unless defined $dbi;
1492 
1493  my $sql =(<<"GDM");
1494  SELECT d.master_xref_id, d.dependent_xref_id, d.linkage_annotation
1495  FROM dependent_xref d, xref x
1496  WHERE x.xref_id = d.dependent_xref_id AND
1497  x.source_id = $source_id
1498 GDM
1499  my $sth = $dbi->prepare($sql);
1500  $sth->execute();
1501  my $master_xref;
1502  my $dependent_xref;
1503  my $linkage;
1504  $sth->bind_columns(\$master_xref, \$dependent_xref, \$linkage);
1505  while($sth->fetch){
1506  $xref_dependent_mapped{"$master_xref|$dependent_xref"} = $linkage;
1507  }
1508  $sth->finish;
1509  return;
1510 }
1511 
1512 
1513 ##########################################################
1514 # Create a has that uses the accession and labels for keys
1515 # and an array of the synonyms as the vaules
1516 ##########################################################
1517 sub get_ext_synonyms{
1518  my $self = shift;
1519  my $source_name = shift;
1520  my $dbi = shift;
1521  $dbi = $self->dbi unless defined $dbi;
1522  my %ext_syns;
1523  my %seen; # can be in more than once fro each type of external source.
1524  my $separator = qw{:};
1525 
1526  my $sql =(<<"GES");
1527  SELECT x.accession, x.label, sy.synonym
1528  FROM xref x, source so, synonym sy
1529  WHERE x.xref_id = sy.xref_id AND
1530  so.source_id = x.source_id AND
1531  so.name like '$source_name'
1532 GES
1533  my $sth = $dbi->prepare($sql);
1534 
1535  $sth->execute;
1536  my ($acc, $label, $syn);
1537  $sth->bind_columns(\$acc, \$label, \$syn);
1538 
1539  my $count = 0;
1540  while($sth->fetch){
1541  if(!(defined $seen{$acc.$separator.$syn})){
1542  push @{$ext_syns{$acc}}, $syn;
1543  push @{$ext_syns{$label}}, $syn;
1544  $count++;
1545  }
1546  $seen{$acc.$separator.$syn} = 1;
1547  }
1548  $sth->finish;
1549 
1550  return \%ext_syns;
1551 
1552 }
1553 
1554 
1555 ######################################################################
1556 # Store data needed to beable to revert to same stage as after parsing
1557 ######################################################################
1558 sub parsing_finished_store_data {
1559  my $self = shift;
1560  my $dbi = shift;
1561  $dbi = $self->dbi unless defined $dbi;
1562 
1563  # Store max id for
1564 
1565  # gene/transcript/translation_direct_xref general_xref_id #Does this change??
1566 
1567  # xref xref_id
1568  # dependent_xref object_xref_id is all null
1569  # object_xref object_xref_id
1570  # identity_xref object_xref_id
1571 
1572  my %table_and_key =
1573  ( 'xref' => 'xref_id', 'object_xref' => 'object_xref_id' );
1574 
1575  foreach my $table ( keys %table_and_key ) {
1576  my $sth = $dbi->prepare(
1577  'select MAX(' . $table_and_key{$table} . ") from $table" );
1578  $sth->execute;
1579  my $max_val;
1580  $sth->bind_columns( \$max_val );
1581  $sth->fetch;
1582  $sth->finish;
1583  $self->add_meta_pair( 'PARSED_' . $table_and_key{$table},
1584  $max_val || 1, $dbi );
1585  }
1586  return;
1587 } ## end sub parsing_finished_store_data
1588 
1589 
1590 sub get_meta_value {
1591  my ($self, $key, $dbi) = @_;
1592  $dbi = $self->dbi unless defined $dbi;
1593 
1594  my $sth = $dbi->prepare('select meta_value from meta where meta_key like "'.$key.'" order by meta_id');
1595  $sth->execute();
1596  my $value;
1597  $sth->bind_columns(\$value);
1598  while($sth->fetch){ # get the last one
1599  }
1600  $sth->finish;
1601 
1602  return $value;
1603 }
1604 
1605 
1606 ######################################
1607 # Update info_type of an existing xref
1608 ######################################
1609 sub _update_xref_info_type {
1610  my ($self, $xref_id, $info_type, $dbi) = @_;
1611 
1612  $dbi //= $self->dbi;
1613 
1614  my $sth = $dbi->prepare('UPDATE xref SET info_type=? where xref_id=?');
1615  if ( ! $sth->execute( $info_type, $xref_id ) ) {
1616  croak $dbi->errstr() . "\n $xref_id\n $info_type\n\n";
1617  }
1618 
1619  $sth->finish();
1620  return;
1621 }
1622 
1623 
1624 1;
1625 
accession
public accession()
XrefParser::FetchFiles
Definition: FetchFiles.pm:16
get_filehandle
public get_filehandle()
XrefParser::Database::dbi
public dbi()
XrefParser::Database
Definition: Database.pm:8
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68