ensembl-hive  2.7.0
BaseObject.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::IdMapping::BaseObject - base object for IdMapping objects
34 
35 =head1 SYNOPSIS
36 
37  # this object isn't instantiated directly but rather extended
40 
41 =head1 DESCRIPTION
42 
43 This is the base object for some of the objects used in the IdMapping
44 application. An object that extends BaseObject will have a ConfParser,
45 Logger and Cache object. BaseObject also implements some useful utility
46 functions related to file and db access.
47 
48 This isn't very clean OO design but it's efficient and easy to use...
49 
50 =head1 METHODS
51 
52  new
54  file_exists
55  fetch_value_from_db
56  dump_table_to_file
57  upload_file_into_table
58  logger
59  conf
60  cache
61 
62 =cut
63 
64 
65 package Bio::EnsEMBL::IdMapping::BaseObject;
66 
67 use strict;
68 use warnings;
69 no warnings 'uninitialized';
70 
71 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
72 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
73 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
74 
75 
76 =head2 new
77 
78  Arg [LOGGER]: Bio::EnsEMBL::Utils::Logger $logger - a logger object
79  Arg [CONF] : Bio::EnsEMBL::Utils::ConfParser $conf - a configuration object
80  Arg [CACHE] : Bio::EnsEMBL::IdMapping::Cache $cache - a cache object
81  Example : my $object = Bio::EnsEMBL::IdMapping::BaseObjectSubclass->new(
82  -LOGGER => $logger,
83  -CONF => $conf,
84  -CACHE => $cache
85  );
86  Description : Constructor
87  Return type : implementing subclass type
88  Exceptions : thrown on wrong or missing arguments
89  Caller : general
90  Status : At Risk
91  : under development
92 
93 =cut
94 
95 sub new {
96  my $caller = shift;
97  my $class = ref($caller) || $caller;
98 
99  my ($logger, $conf, $cache) = rearrange(['LOGGER', 'CONF', 'CACHE'], @_);
100 
101  unless ($logger and ref($logger) and
102  $logger->isa('Bio::EnsEMBL::Utils::Logger')) {
103  throw("You must provide a Bio::EnsEMBL::Utils::Logger for logging.");
104  }
105 
106  unless ($conf and ref($conf) and
107  $conf->isa('Bio::EnsEMBL::Utils::ConfParser')) {
108  throw("You must provide configuration as a Bio::EnsEMBL::Utils::ConfParser object.");
109  }
110 
111  unless ($cache and ref($cache) and
112  $cache->isa('Bio::EnsEMBL::IdMapping::Cache')) {
113  throw("You must provide configuration as a Bio::EnsEMBL::IdMapping::Cache object.");
114  }
115 
116  my $self = {};
117  bless ($self, $class);
118 
119  # initialise
120  $self->logger($logger);
121  $self->conf($conf);
122  $self->cache($cache);
123 
124  return $self;
125 }
126 
127 
128 =head2 get_filehandle
129 
130  Arg[1] : String $filename - filename for filehandle
131  Arg[2] : String $path_append - append subdirectory name to basedir
132  Arg[3] : String $mode - filehandle mode (<|>|>>)
133  Example : my $fh = $object->get_filehandle('mapping_stats.txt', 'stats',
134  '>');
135  print $fh "Stats:\n";
136  Description : Returns a filehandle to a file for reading or writing. The file
137  is qualified with the basedir defined in the configuration and
138  an optional subdirectory name.
139  Return type : filehandle
140  Exceptions : thrown on missing filename
141  Caller : general
142  Status : At Risk
143  : under development
144 
145 =cut
146 
147 sub get_filehandle {
148  my $self = shift;
149  my $filename = shift;
150  my $path_append = shift;
151  my $mode = shift;
152 
153  throw("Need a filename for this filehandle.") unless (defined($filename));
154 
155  my $path = $self->conf->param('basedir');
156  $path = path_append($path, $path_append) if (defined($path_append));
157 
158  $mode ||= '>';
159 
160  open(my $fh, $mode, "$path/$filename") or
161  throw("Unable to open $path/$filename: $!");
162 
163  return $fh;
164 }
165 
166 
167 =head2 file_exists
168 
169  Arg[1] : String $filename - filename to test
170  Arg[2] : Boolean $path_append - turn on pre-pending of basedir
171  Example : unless ($object->file_exists('gene_mappings.ser', 1)) {
172  $object->do_gene_mapping;
173  }
174  Description : Tests if a file exists and has non-zero size.
175  Return type : Boolean
176  Exceptions : none
177  Caller : general
178  Status : At Risk
179  : under development
180 
181 =cut
182 
183 sub file_exists {
184  my $self = shift;
185  my $filename = shift;
186  my $path_append = shift;
187 
188  my $path = $self->conf->param('basedir');
189  $path = path_append($path, $path_append) if (defined($path_append));
190 
191  return (-s "$path/$filename");
192 }
193 
194 
195 =head2 fetch_value_from_db
196 
197  Arg[1] : DBI::db $dbh - a DBI database handle
198  Arg[2] : String $sql - SQL statement to execute
199  Example : my $num_genes = $object->fetch_value_from_db($dbh,
200  'SELECT count(*) FROM gene');
201  Description : Executes an SQL statement on a db handle and returns the first
202  column of the first row returned. Useful for queries returning a
203  single value, like table counts.
204  Return type : Return type of SQL statement
205  Exceptions : thrown on wrong or missing arguments
206  Caller : general
207  Status : At Risk
208  : under development
209 
210 =cut
211 
212 sub fetch_value_from_db {
213  my $self = shift;
214  my $dbh = shift;
215  my $sql = shift;
216 
217  throw("Need a db handle.") unless ($dbh and $dbh->isa('DBI::db'));
218  throw("Need an SQL query to execute.") unless ($sql);
219 
220  my $sth = $dbh->prepare($sql);
221  $sth->execute;
222  my ($retval) = $sth->fetchrow_array;
223 
224  return $retval;
225 }
226 
227 
228 =head2 dump_table_to_file
229 
230  Arg[1] : String $dbtype - db type (source|target)
231  Arg[2] : String $table - name of table to dump
232  Arg[3] : String $filename - name of dump file
233  Arg[4] : Boolean $check_existing - turn on test for existing dump
234  Example : my $rows_dumped = $object->dump_table_to_file('source',
235  'stable_id_event', 'stable_id_event_existing.txt');
236  Description : Dumps the contents of a db table to a tab-delimited file. The
237  dump file will be written to a subdirectory called 'tables'
238  under the basedir from your configuration.
239  Return type : Int - the number of rows dumped
240  Exceptions : thrown on wrong or missing arguments
241  Caller : general
242  Status : At Risk
243  : under development
244 
245 =cut
246 
247 sub dump_table_to_file {
248  my $self = shift;
249  my $dbtype = shift;
250  my $table = shift;
251  my $filename = shift;
252  my $check_existing = shift;
253 
254  # argument check
255  unless (($dbtype eq 'source') or ($dbtype eq 'target')) {
256  throw("Missing or unknown db type: $dbtype.");
257  }
258  throw("Need a table name.") unless ($table);
259  throw("Need a filename.") unless ($filename);
260 
261  # conditionally check if table was already dumped
262  if ($check_existing and $self->file_exists($filename, 'tables')) {
263  $self->logger->info("$filename exists, won't dump again.\n");
264  return 0;
265  }
266 
267  my $fh = $self->get_filehandle($filename, 'tables');
268 
269  my $dba = $self->cache->get_DBAdaptor($dbtype);
270  my $dbh = $dba->dbc->db_handle;
271  my $sth = $dbh->prepare("SELECT * FROM $table");
272  $sth->execute;
273 
274  my $i = 0;
275 
276  while (my @row = $sth->fetchrow_array) {
277  $i++;
278 
279  # use '\N' for NULL values
280  for (my $j = 0; $j < scalar(@row); $j++) {
281  $row[$j] = '\N' unless (defined($row[$j]));
282  }
283 
284  print $fh join("\t", @row);
285  print $fh "\n";
286  }
287 
288  $sth->finish;
289 
290  return $i;
291 }
292 
293 
294 =head2 upload_file_into_table
295 
296  Arg[1] : String $dbtype - db type (source|target)
297  Arg[2] : String $table - name of table to upload the data to
298  Arg[3] : String $filename - name of dump file
299  Arg[4] : Boolean $no_check_empty - don't check if table is empty
300  Example : my $rows_uploaded = $object->upload_file_into_table('target',
301  'stable_id_event', 'stable_id_event_new.txt');
302  Description : Uploads a tab-delimited data file into a db table. The data file
303  will be taken from a subdirectory 'tables' under your configured
304  basedir. If the db table isn't empty and $no_check_empty isn't
305  set, no data is uploaded (and a warning is issued).
306  Return type : Int - the number of rows uploaded
307  Exceptions : thrown on wrong or missing arguments
308  Caller : general
309  Status : At Risk
310  : under development
311 
312 =cut
313 
314 sub upload_file_into_table {
315  my $self = shift;
316  my $dbtype = shift;
317  my $table = shift;
318  my $filename = shift;
319  my $no_check_empty = shift;
320 
321  # argument check
322  unless ( ( $dbtype eq 'source' ) or ( $dbtype eq 'target' ) ) {
323  throw("Missing or unknown db type: $dbtype.");
324  }
325  throw("Need a table name.") unless ($table);
326  throw("Need a filename.") unless ($filename);
327 
328  # sanity check for dry run
329  if ( $self->conf->param('dry_run') ) {
330  $self->logger->warning(
331  "dry_run - skipping db upload for $filename.\n");
332  return;
333  }
334 
335  my $file =
336  join( '/', $self->conf->param('basedir'), 'tables', $filename );
337  my $r = 0;
338 
339  if ( -s $file ) {
340 
341  $self->logger->debug( "$file -> $table\n", 1 );
342 
343  my $dba = $self->cache->get_DBAdaptor($dbtype);
344  my $dbh = $dba->dbc->db_handle;
345 
346  my $idtable = 0;
347  if ( $table =~ /^([^_]+)_stable_id/ ) {
348  # This is a stable_id table we're working with.
349  $idtable = 1;
350  $table = $1;
351  }
352 
353  # check table is empty
354  my ( $sql, $sth );
355  unless ($no_check_empty) {
356  if ($idtable) {
357  $sql =
358  qq(SELECT count(*) FROM $table WHERE stable_id IS NOT NULL);
359  }
360  else {
361  $sql = qq(SELECT count(*) FROM $table);
362  }
363  $sth = $dbh->prepare($sql);
364  $sth->execute;
365  my ($c) = $sth->fetchrow_array;
366  $sth->finish;
367 
368  if ( $c > 0 ) {
369  if ($idtable) {
370  $self->logger->warning(
371  "Table $table contains $c stable IDs.\n",
372  1 );
373  }
374  else {
375  $self->logger->warning(
376  "Table $table not empty: found $c entries.\n",
377  1 );
378  }
379  $self->logger->info( "Data not uploaded!\n", 1 );
380  return $r;
381  }
382  } ## end unless ($no_check_empty)
383 
384  # now upload the data
385  if ($idtable) {
386  # Create a temporary table, upload the data into it, and then
387  # update the main table.
388  $dbh->do(
389  qq( CREATE TABLE stable_id_$$ ( object_id INTEGER UNSIGNED,
390  stable_id VARCHAR(255),
391  version SMALLINT UNSIGNED,
392  created_date DATETIME,
393  modified_date DATETIME,
394  PRIMARY KEY(object_id) ) )
395  );
396 
397  $dbh->do(
398  qq(LOAD DATA LOCAL INFILE '$file' INTO TABLE stable_id_$$));
399 
400  $dbh->do(
401  qq(
402  UPDATE $table, stable_id_$$
403  SET $table.stable_id=stable_id_$$.stable_id,
404  $table.version=stable_id_$$.version,
405  $table.created_date=stable_id_$$.created_date,
406  $table.modified_date=stable_id_$$.modified_date
407  WHERE $table.${table}_id = stable_id_$$.object_id )
408  );
409 
410  $dbh->do(qq(DROP TABLE stable_id_$$));
411  } ## end if ($idtable)
412  else {
413  $dbh->do(qq(LOAD DATA LOCAL INFILE '$file' INTO TABLE $table));
414  }
415  $dbh->do(qq(OPTIMIZE TABLE $table));
416 
417  } ## end if ( -s $file )
418  else {
419  $self->logger->warning( "No data found in file $filename.\n", 1 );
420  }
421 
422  return $r;
423 } ## end sub upload_file_into_table
424 
425 
426 =head2 logger
427 
428  Arg[1] : (optional) Bio::EnsEMBL::Utils::Logger - the logger to set
429  Example : $object->logger->info("Starting ID mapping.\n");
430  Description : Getter/setter for logger object
431  Return type : Bio::EnsEMBL::Utils::Logger
432  Exceptions : none
433  Caller : constructor
434  Status : At Risk
435  : under development
436 
437 =cut
438 
439 sub logger {
440  my $self = shift;
441  $self->{'_logger'} = shift if (@_);
442  return $self->{'_logger'};
443 }
444 
445 
446 =head2 conf
447 
448  Arg[1] : (optional) Bio::EnsEMBL::Utils::ConfParser - the configuration
449  to set
450  Example : my $basedir = $object->conf->param('basedir');
451  Description : Getter/setter for configuration object
452  Return type : Bio::EnsEMBL::Utils::ConfParser
453  Exceptions : none
454  Caller : constructor
455  Status : At Risk
456  : under development
457 
458 =cut
459 
460 sub conf {
461  my $self = shift;
462  $self->{'_conf'} = shift if (@_);
463  return $self->{'_conf'};
464 }
465 
466 
467 =head2 cache
468 
469  Arg[1] : (optional) Bio::EnsEMBL::IdMapping::Cache - the cache to set
470  Example : $object->cache->read_from_file('source');
471  Description : Getter/setter for cache object
472  Return type : Bio::EnsEMBL::IdMapping::Cache
473  Exceptions : none
474  Caller : constructor
475  Status : At Risk
476  : under development
477 
478 =cut
479 
480 sub cache {
481  my $self = shift;
482  $self->{'_cache'} = shift if (@_);
483  return $self->{'_cache'};
484 }
485 
486 
487 1;
488 
Bio::EnsEMBL::IdMapping::BaseObject
Definition: BaseObject.pm:25
Bio::EnsEMBL::Utils::ScriptUtils
Definition: ScriptUtils.pm:11
Bio::EnsEMBL::IdMapping::Cache::read_from_file
public read_from_file()
Bio::EnsEMBL::Utils::ConfParser::param
public Scalar param()
Bio::EnsEMBL::IdMapping::Cache::new
public Bio::EnsEMBL::IdMapping::Cache new()
get_filehandle
public get_filehandle()
Bio::EnsEMBL::Utils::ConfParser
Definition: ConfParser.pm:41
Bio::EnsEMBL::Utils::Logger
Definition: Logger.pm:36
Bio::EnsEMBL::Utils::Logger::info
public info()
Bio::EnsEMBL::IdMapping::Cache
Definition: Cache.pm:18
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68