ensembl-hive  2.8.1
ChecksumMapper.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefMapper::ChecksumMapper;
21 
22 use strict;
23 use warnings;
24 
25 use Bio::EnsEMBL::Utils::Exception qw(throw);
27 
28 use base qw(XrefMapper::BasicMapper);
29 
30 my $DEFAULT_METHOD = 'XrefMapper::Methods::MySQLChecksum';
31 
32 sub new {
33  my($class, $mapper) = @_;
34  my $self = bless {}, $class;
35  $self->core($mapper->core);
36  $self->xref($mapper->xref);
37  $self->mapper($mapper);
38  return $self;
39 }
40 
41 sub _xref_helper {
42  my ($self) = @_;
43  return $self->xref()->dbc()->sql_helper();
44 }
45 
46 sub logic_name {
47  my ($self) = @_;
48  return 'xrefchecksum';
49 }
50 
51 sub mapper {
52  my ($self, $mapper) = @_;
53  $self->{mapper} = $mapper if defined $mapper;
54  return $self->{mapper};
55 }
56 
57 sub method {
58  my ($self, $method) = @_;
59  $self->{method} = $method if defined $method;
60  return $self->{method};
61 }
62 
63 sub verbose {
64  my ($self) = @_;
65  return $self->mapper()->verbose();
66 }
67 
68 # No default target file, implemented by subclasses where necessary
69 sub target {
70  return;
71 }
72 
73 sub process {
74  my ($self, $db_url, $species_id) = @_;
75 
76  $self->_update_status('checksum_xrefs_started');
77  my $source_id = $self->source_id();
78  my $target = $self->target();
79  my $object_type = $self->object_type;
80 
81  if($self->_map_checksums($db_url)) {
82  my $method = $self->get_method();
83  my $results = $method->run($target, $source_id, $object_type, $db_url);
84  $self->log_progress('Starting upload');
85  $self->upload($results, $species_id);
86  }
87 
88  $self->_update_status('checksum_xrefs_finished');
89  return;
90 }
91 
92 sub upload {
93  my ($self, $results, $species_id) = @_;
94  #The elements come in as an array looking like
95  # [ { id => 1, upi => 'UPI00000A', object_type => 'Translation' } ]
96 
97  my $insert_xref = <<'SQL';
98 INSERT INTO xref (source_id, accession, label, version, species_id, info_type)
99 values (?,?,?,?,?,?)
100 SQL
101  my $insert_object_xref = <<'SQL';
102 INSERT INTO object_xref (ensembl_id, ensembl_object_type, xref_id, linkage_type, ox_status)
103 values (?,?,?,?,?)
104 SQL
105 
106  my $h = $self->_xref_helper();
107  my $source_id = $self->source_id();
108  $species_id = $self->species_id() unless defined $species_id;
109  if (!defined $species_id) { return; }
110 
111  $h->transaction(-CALLBACK => sub {
112 
113  $self->log_progress('Deleting records from previous possible upload runs');
114  $self->_delete_entries('object_xref');
115  $self->_delete_entries('xref');
116 
117  $self->log_progress('Starting xref insertion');
118  #Record UPIs to make sure we do not attempt to insert duplicate UPIs
119  my %upi_xref_id;
120  $h->batch(-SQL => $insert_xref, -CALLBACK => sub {
121  my ($sth) = @_;
122  foreach my $e (@{$results}) {
123  my $upi = $e->{upi};
124  if(exists $upi_xref_id{$upi}) {
125  $e->{xref_id} = $upi_xref_id{$upi};
126  }
127  else {
128  $sth->execute($source_id, $e->{upi}, $e->{upi}, 1, $species_id, 'CHECKSUM');
129  my $id = $sth->{'mysql_insertid'};
130  $e->{xref_id} = $id;
131  $upi_xref_id{$upi} = $id;
132  }
133  }
134  return;
135  });
136 
137  $self->log_progress('Starting object_xref insertion');
138  $h->batch(-SQL => $insert_object_xref, -CALLBACK => sub {
139  my ($sth) = @_;
140  foreach my $e (@{$results}) {
141  $sth->execute($e->{id}, $e->{object_type}, $e->{xref_id}, 'CHECKSUM', 'DUMP_OUT');
142  }
143  return;
144  });
145  });
146 
147  $self->log_progress('Finished insertions');
148 
149  return;
150 }
151 
152 sub _delete_entries {
153  my ($self, $table) = @_;
154  $self->log_progress('Deleting entries from %s', $table);
155  my $lookup = {
156  xref => <<'SQL',
157 DELETE x
158 FROM xref x
159 WHERE x.source_id = ?
160 SQL
161  object_xref => <<'SQL',
162 DELETE ox
163 FROM xref x,
164  object_xref ox
165 WHERE x.source_id = ?
166 AND ox.xref_id = x.xref_id
167 SQL
168  };
169 
170  my $sql = $lookup->{$table};
171  throw "Cannot find delete SQL for the table $table" unless $sql;
172  my $source_id = $self->source_id();
173  my $count = $self->_xref_helper()->execute_update(-SQL => $sql, -PARAMS => [$source_id]);
174  my $type = ($count == 1) ? 'entry' : 'entries';
175  $self->log_progress('Deleted %s %s from %s', $count, $type, $table);
176  return;
177 }
178 
179 sub source_id {
180  my ($self) = @_;
181  return $self->_xref_helper()->execute_single_result(
182  -SQL => 'select source_id from source where name=?',
183  -PARAMS => [$self->external_db_name()]
184  );
185 }
186 
187 sub species_id {
188  my ($self) = @_;
189  my $species_id = $self->SUPER::species_id();
190  if(! defined $species_id) {
191  $species_id = $self->get_id_from_species_name($self->core()->species());
192  $self->SUPER::species_id($species_id);
193  }
194  return $species_id;
195 }
196 
197 sub get_method {
198  my ($self) = @_;
199  my $method_class = $DEFAULT_METHOD;
200  eval "require ${method_class};";
201  if($@) {
202  throw "Cannot require the class ${method_class}. Make sure your PERL5LIB is correct: $@";
203  }
204  return $method_class->new( -MAPPER => $self );
205 }
206 
207 ############# INTERNAL METHODS
208 
209 sub _update_status {
210  my ($self, $status) = @_;
211  if($self->xref()) {
212  my $h = $self->_xref_helper();
213  my $sql = q{insert into process_status (status, date) values(?,now())};
214  $h->execute_update(-SQL => $sql, -PARAMS => [$status]);
215  }
216  else {
217  my $time = localtime();
218  $self->log_progress(q{Status Update '%s' @ %s}."\n", $status, $time);
219  }
220  return;
221 }
222 
223 sub _map_checksums {
224  my ($self, $db_url) = @_;
225  my $source_id = $self->source_id();
226  my $dbc = $self->mapper->xref->dbc;
227  if (defined $db_url) {
228  $source_id = 1;
229  my ($dbconn_part, $driver, $user, $pass, $host, $port, $dbname, $table_name, $tparam_name, $tparam_value, $conn_param_string) =
230  $db_url =~ m{^((\w*)://(?:(\w+)(?:\:([^/\@]*))?\@)?(?:([\w\-\.]+)(?:\:(\d*))?)?/([\w\-\.]*))(?:/(\w+)(?:\?(\w+)=([\w\[\]\{\}]*))?)?((?:;(\w+)=(\w+))*)$};
232  -dbname => $dbname,
233  -user => $user,
234  -pass => $pass,
235  -host => $host,
236  -port => $port);
237  }
238  my $count = $dbc->sql_helper()->execute_single_result(-SQL => 'select count(*) from checksum_xref where source_id = ' . $source_id);
239  return $count;
240 }
241 
242 sub log_progress {
243  my ( $self, $fmt, @params ) = @_;
244  return if (!$self->verbose);
245  printf( STDERR "CHKSM==> %s\n", sprintf( $fmt, @params ) );
246 }
247 
248 1;
XrefMapper::BasicMapper
Definition: BasicMapper.pm:8
accession
public accession()
Bio::EnsEMBL::Utils::SqlHelper::execute_single_result
public Scalar execute_single_result()
Bio::EnsEMBL::DBSQL::DBConnection
Definition: DBConnection.pm:42
Bio::EnsEMBL::DBSQL::DBConnection::sql_helper
public Bio::EnsEMBL::Utils::SqlHelper sql_helper()
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68