ensembl-hive  2.8.1
DB.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 =head1 DESCRIPTION
21 
22 A database class that auto-instantiates a DBIC schema
23 
24 Convenience methods prevent having to delve into DBIC guts for common activities
25 
26 =head1 SYNOPSIS
27 
28 my $db = Xref::DB->new(
29  config => {
30  host => 'db.com',
31  port => 3306,
32  user => 'me',
33  pass => 'Go on then',
34  driver => 'mysql',
35  db => 'name_for_db',
36  create => 1, # Deploys the schema to the DB for first use
37  }
38 );
39 
40 $db = Xref::DB->new(
41  config_file => 'db.conf' # config options in Config::General format
42 );
43 
44 my $dbh = $db->dbh; # $dbh is a DBI database handle borrowed for direct SQL
45 $dbh->prepare('DROP TABLE dependent_xref');
46 
47 $db->create_db_row('Xref',{
48  xref_id => 1,
49  accession => 'YAY',
50  description => 'Sample new Xref',
51  source_id => 1,
52  ...
53 });
54 
55 =cut
56 
57 
58 package Xref::DB;
59 
60 use strict;
61 use warnings;
62 
63 use Moose;
64 use namespace::autoclean;
65 use Config::General;
66 use Config::IniFiles; # FIXME normalise around one config format
67 use Carp;
68 use Xref::Schema;
69 use DBI;
70 
71 has schema => (
72  isa => 'Xref::Schema',
73  is => 'ro',
74  builder => '_init_db'
75 );
76 
77 has config_file => (
78  isa => 'Str',
79  is => 'rw',
80  builder => '_guess_config'
81 );
82 
83 has config => (
84  isa => 'HashRef',
85  is => 'rw'
86 );
87 
88 has now_function => (
89  isa => 'Str',
90  default => 'now()',
91  is => 'rw',
92 );
93 
94 
95 =head2 _init_db
96  Arg [1] : HashRef of configuation parameters (driver, db, host, port, user, pass)
97  Description: Initialise the core database.
98  Return type: schema
99  Caller : internal
100 
101 =cut
102 
103 sub _init_db {
104  my $self = shift;
105 
106  $self->_init_config if ! defined $self->config;
107  $self->_validate_config($self->config);
108  my %conf = %{ $self->config };
109  my $enable_unicode = $conf{enable_unicode} // 0;
110  my %opts;
111  $opts{mysql_enable_utf8} = $enable_unicode if ($conf{driver} eq 'mysql');
112  $opts{mysql_auto_reconnect} = 1 if ($conf{driver} eq 'mysql');
113  $opts{sqlite_unicode} = $enable_unicode if($conf{driver} eq 'SQLite');
114  my $dsn;
115  if ($conf{driver} eq 'SQLite') {
116  $dsn = sprintf 'dbi:%s:database=%s',$conf{driver},$conf{file};
117  $self->now_function("date('now')");
118  } else {
119  $dsn = sprintf 'dbi:%s:database=%s;host=%s;port=%s', $conf{driver}, $conf{db}, $conf{host}, $conf{port};
120  }
121 
122  my %deploy_opts = ();
123  # Example deploy option $deploy_opts{add_drop_table} = 1;
124  my $schema = Xref::Schema->connect($dsn, $conf{user}, $conf{pass}, \%opts);
125 
126  if ($conf{create} == 1 && $conf{driver} eq 'mysql') {
127  my $dbh = DBI->connect(
128  sprintf('DBI:%s:database=;host=%s;port=%s', $conf{driver}, $conf{host}, $conf{port}), $conf{user}, $conf{pass}, \%opts
129  );
130 
131  # Remove database if already exists
132  my %dbs = map {$_->[0] => 1} @{$dbh->selectall_arrayref('SHOW DATABASES')};
133  my $dbname = $conf{db};
134  if ($dbs{$dbname}) {
135  $dbh->do( "DROP DATABASE $dbname;" );
136  }
137 
138  my $db_collation = ( $enable_unicode ) ? 'utf8_general_ci' : 'latin1_swedish_ci';
139  $dbh->do("CREATE DATABASE $dbname DEFAULT COLLATE ${db_collation};");
140 
141  $dbh->disconnect;
142  }
143 
144  $schema->deploy(\%deploy_opts) if $conf{create} == 1;
145 
146  return $schema;
147 } ## end sub _init_db
148 
149 
150 =head2 _guess_config
151  Description: Don't want production use to guess at least at the moment.
152  This mainly exists so TestDB can override and replace with a
153  useful default
154  Return type: undef
155  Caller : internal
156 
157 =cut
158 
159 sub _guess_config {
160  return;
161 } ## end sub _guess_config
162 
163 
164 =head2 _init_config
165  Arg [1] : HashRef of configuation parameters (driver, db, host, port, user, pass)
166  Description: Initialisae the loading of the configuration file.
167  Return type: HashRef - $self->config
168  Caller : internal
169 
170 =cut
171 
172 sub _init_config {
173  my $self = shift;
174 
175  if (defined $self->config_file) {
176  my $conf = Config::General->new($self->config_file);
177  my %opts = $conf->getall();
178  $self->config(\%opts);
179  } else {
180  confess 'No config or config_file provided to new(). Cannot execute';
181  }
182 
183  return $self->config;
184 } ## end sub _init_config
185 
186 
187 =head2 _validate_config
188  Arg [1] : HashRef of configuation parameters (driver, db, host, port, user, pass)
189  Description: Configuration file parameter validation
190  Return type: DBI database handle
191  Caller : internal
192 
193 =cut
194 
195 sub _validate_config {
196  my ($self,$config) = @_;
197  my @required_keys = qw/driver/;
198  if ($config->{driver} eq 'mysql') {
199  push @required_keys, qw/db host port user pass/;
200  } elsif ($config->{driver} eq 'SQLite') {
201  push @required_keys, qw/file/;
202  } else {
203  confess q(TestDB config requires parameter 'driver' with value mysql or SQLite);
204  }
205  my @errors;
206  foreach my $constraint (@required_keys) {
207  if (! exists $config->{$constraint}) {
208  push @errors, "Missing argument '$constraint'";
209  }
210  }
211  if (scalar @errors > 0) {
212  confess sprintf "%s \n%s",
213  ($self->config_file) ? 'Missing options in '.$self->config_file. ': ' : 'Missing options in supplied config: ',
214  join ';',@errors;
215  }
216 } ## end sub _validate_config
217 
218 
219 =head2 dbh
220  Description: Shortcut for accessing a database handle directly. I get the
221  impression we might be doing this a lot.
222  Return type: DBI database handle
223  Caller : internal
224 
225 =cut
226 
227 sub dbh {
228  my $self = shift;
229  return $self->schema->storage->dbh;
230 } ## end sub dbh
231 
232 
233 =head2 create_db_row
234  Arg [1] : model
235  Arg [2] : arguments : These should be key-value pairs matching the rows in
236  the table
237  Description: Shortcut for creating things on the fly
238  Return type:
239  Caller : internal
240 
241 =cut
242 
243 sub create_db_row {
244  my ($self,$model, $params) = @_;
245  my $source = $self->schema->resultset($model)->create(
246  $params
247  );
248  return $source;
249 } ## end sub create_db_row
250 
251 
252 =head2 populate_metadata
253 
254  Arg [1] : Config file path, normally xref_config.ini
255  Description: Loads species and source information into the schema from the
256  supplied file in Config::IniFiles format
257  Caller : User
258 
259 =cut
260 
261 # TODO: Provide species AND division in order to limit quantity of madness
262 
263 sub populate_metadata {
264  my ($self, $config_path) = @_;
265 
266  my $config = $self->_load_xref_config($config_path);
267 
268  # Populate species table with species taxa
269  print "Iterating over species groups\n";
270 
271  my %sources;
272  # First build up records for each potential source
273  foreach my $section ( $config->GroupMembers('source')) {
274 
275  my ( $source_name ) = $section =~ /
276  \A
277  source\s+(.*)
278  \Z
279  /x;
280  $sources{$source_name} = $self->_mangle_source_block($section, $config);
281 
282  }
283 
284  my %compiled_config;
285  # Next, build a config hash from the species entries in the config file
286  # and populate them with relevant source information from the %sources
287  foreach my $section ( $config->GroupMembers('species') ) {
288  my ( $species_name ) = $section =~ /
289  \A
290  species\s+(\S+)
291  \s*
292  \Z
293  /x;
294 
295  my $taxon_id = $config->val( $section, 'taxonomy_id' );
296 
297  my @source_names = $config->val( $section, 'source', ());
298  my %sources_for_species;
299 
300  foreach my $source_entry (@source_names) {
301 
302  if (! exists $sources{$source_entry}) {
303  confess 'Species config references a source that is not defined in the config: '.$source_entry;
304  }
305 
306  $sources_for_species{$source_entry} = $sources{$source_entry};
307  }
308 
309  $compiled_config{$species_name} = {
310  species_id => $taxon_id, # species_id == taxon_id in xref system,
311  taxonomy_id => $taxon_id,
312  sources => \%sources_for_species
313  }
314  }
315 
316  # Now populate the database with the result each source entry in the config with source-specific info
317 
318  foreach my $species ( keys %compiled_config ) {
319 
320  my $species_record = $self->schema->resultset('Species')->create({
321  species_id => $compiled_config{$species}{species_id},
322  name => $species,
323  taxonomy_id => $compiled_config{$species}{taxonomy_id}
324  });
325 
326  my $compiled_sources = $compiled_config{$species}{sources};
327 
328  foreach my $source ( keys %$compiled_sources ) {
329  my $parser = delete $compiled_sources->{$source}->{parser};
330  my $source_record = $self->schema->resultset('Source')->find_or_create(
331  $compiled_sources->{$source} # once trimmed, we can pump the source hash straight into DBIC
332  );
333  delete $sources{$source};
334 
335  $source_record->create_related(
336  'source_url',
337  {
338  species_id => $compiled_config{$species}{species_id},
339  parser => $parser
340  }
341  );
342  } # End foreach source
343 
344 
345  } # End foreach species
346 
347  # Add any sources which are not explicitly linked to a species
348 
349  foreach my $source ( keys %sources ) {
350 
351  my $parser = delete $sources{$source}->{parser};
352  my $source_record = $self->schema->resultset('Source')->find_or_create(
353  $sources{$source}
354  );
355 
356  }
357 
358  return;
359 }
360 
361 =head2 _load_xref_config
362 
363 Arg [1] : path to xref_config.ini
364 Description: Load the config file and sanity-check to ensure content is
365  properly formatted for loading.
366  FIXME: make parsing into a parser? Better yet switch to a format
367  which can express this grammar correctly without micro-formatting
368 Returntype : Hashref config, as returned by Config::IniFiles
369 
370 =cut
371 
372 sub _load_xref_config {
373  my ($self, $config_path) = @_;
374 
375  if (! -e $config_path) {
376  confess "Unable to open config file $config_path";
377  }
378  my $config = Config::IniFiles->new(-file => $config_path);
379 
380  if (! defined $config) {
381  confess "Errors in $config_path, unable to parse: ". join ',', @Config::IniFiles::errors;
382  }
383  my $source_id = 0;
384  my %source_ids; # A tracker for sections we've seen
385 
386  foreach my $section ( $config->GroupMembers('source')) {
387  my ( $spaces, $source_name ) = $section =~ /
388  \A
389  source(\s+)(\S+)
390  \s*
391  \Z
392  /x;
393 
394  # This validation is how we used to validate this file
395  if ( length($spaces) > 1 ) {
396  confess(
397  sprintf(
398  "Too many spaces between the words 'source' and '%s'\nwhile reading source section '[%s]'\n",
399  $source_name,
400  $section
401  )
402  );
403  }
404 
405  if ( index( $config->val( $section, 'name' ), "\n" ) != -1 ) {
406  confess(
407  sprintf( "The source section '[%s]' occurs more than once in \n", $section )
408  );
409  }
410 
411  $source_ids{$section} = ++$source_id; # Record existence of species->source
412  }
413 
414  foreach my $section ( $config->GroupMembers('species')) {
415  my ( $spaces, $species_name ) = $section =~ /
416  \A
417  species(\s+)(\S+)
418  \s*
419  \Z
420  /x;
421 
422  if ( length($spaces) > 1 ) {
423  confess(
424  sprintf(
425  "Too many spaces between the words 'species' and '%s'\nwhile reading species section '[%s]'\n",
426  $species_name,
427  $section
428  )
429  );
430  }
431 
432  foreach my $source_name (split qr{ \n }msx, $config->val($section, 'source') ) {
433 
434  $source_name =~ s{ \s\z }{}msx; # Config file can easily contain trailling whitespace
435  my $source_section = "source $source_name";
436  # Check integrity of species to source mentions
437  if ( !exists $source_ids{$source_section} ) {
438  confess(
439  sprintf( "Can not find source section '[%s]'\nwhile reading species section '[%s]'\n",
440  $source_section,
441  $section
442  )
443  );
444  }
445 
446  }
447 
448  }
449  return $config;
450 }
451 
452 =head2 _mangle_source_block
453 
454 Arg 1 : String - section header, the name for the section that identifies
455  the section we need to extract values from
456 Arg 2 : Hashref - config file content, the output of _load_xref_config()
457 Description: Takes a single source record from xref_config.ini and creates a
458  hashref. Some arguments are massaged to get them into the schema
459  easily
460 Returntype : Hashref of a single source's properties
461 Caller : Internal
462 
463 =cut
464 
465 sub _mangle_source_block {
466  my ($self, $section, $config) = @_;
467 
468  my $source_config;
469  # Get the easy ones done
470  foreach my $key (qw/name priority parser/) {
471  my $value = $config->val($section, $key);
472  if (defined $value) {
473  $source_config->{$key} = $value;
474  }
475  }
476 
477  my $priority_description = $config->val($section, 'prio_descr');
478  if (defined $priority_description) {
479  $source_config->{priority_description} = $priority_description;
480  }
481 
482  return $source_config;
483 }
484 
485 __PACKAGE__->meta->make_immutable;
486 
487 1;
map
public map()
Xref::Schema
Definition: ChecksumXref.pm:3
accession
public accession()
Xref::DB
Definition: DB.pm:40
Xref::DB::dbh
public DBI dbh()