ensembl-hive  2.7.0
CliHelper.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 
22 =head1 CONTACT
23 
24  Please email comments or questions to the public Ensembl
25  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
26 
27  Questions may also be sent to the Ensembl help desk at
28  <http://www.ensembl.org/Help/Contact>.
29 
30 =cut
31 
32 =head1 NAME
33 
35 
36 =head1 VERSION
37 
38 $Revision$
39 
40 =head1 SYNOPSIS
41 
43 
44  my $cli = Bio::EnsEMBL::Utils::CliHelper->new();
45 
46  # get the basic options for connecting to a database server
47  my $optsd = $cli->get_dba_opts();
48 
49  # add another option
50  push(@$optsd,"print");
51 
52  # process the command line with the supplied options plus a reference to a help subroutine
53  my $opts = $cli->process_args($optsd,\&usage);
54 
55  # use the command line options to get an array of database details
56  for my $db_args (@{$cli->get_dba_args_for_opts($opts)}) {
57  # use the args to create a DBA
58  my $dba = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{$db_args});
59  ...
60  if(defined $opts->{print}) {
61  ...
62  }
63  }
64 
65  For adding secondary databases, a prefix can be supplied. For instance, to add a second set of
66  db params prefixed with dna (-dnahost -dbport etc.) use the prefix argument with get_dba_opts and
67  get_dba_args_for_opts:
68  # get the basic options for connecting to a database server
69  my $optsd =
70  [ @{ $cli_helper->get_dba_opts() }, @{ $cli_helper->get_dba_opts('gc') } ];
71  # process the command line with the supplied options plus a help subroutine
72  my $opts = $cli_helper->process_args( $optsd, \&usage );
73  # get the dna details
74  my ($dna_dba_details) =
75  @{ $cli_helper->get_dba_args_for_opts( $opts, 1, 'dna' ) };
76  my $dna_db =
77  Bio::EnsEMBL::DBSQL::DBAdaptor->new( %{$dna_dba_details} ) );
78 
79 =head1 DESCRIPTION
80 
81 Utilities for a more consistent approach to parsing and handling EnsEMBL script command lines
82 
83 =head1 METHODS
84 
85 See subroutines.
86 
87 =cut
88 
89 package Bio::EnsEMBL::Utils::CliHelper;
90 
91 use warnings;
92 use strict;
93 
94 use Carp;
95 use Getopt::Long qw(:config auto_version no_ignore_case);
96 
100 
101 my $dba_opts = [{args => ['host', 'dbhost', 'h'], type => '=s'},
102  {args => ['port', 'dbport', 'P'], type => ':i'},
103  {args => ['user', 'dbuser', 'u'], type => '=s'},
104  {args => ['pass', 'dbpass', 'p'], type => ':s'},
105  {args => ['dbname', 'D'], type => ':s'},
106  {args => ['pattern', 'dbpattern'], type => ':s'},
107  {args => ['driver'], type => ':s'},
108  {args => ['species_id'], type => ':i'},
109  {args => ['species'], type => ':i'},];
110 
111 =head2 new()
112 
113  Description : Construct a new instance of a CliHelper object
114  Returntype : Bio::EnsEMBL::Utils:CliHelper
115  Status : Under development
116 
117 =cut
118 
119 sub new {
120  my ($class, @args) = @_;
121  my $self = bless({}, ref($class) || $class);
122  return $self;
123 }
124 
125 =head2 get_dba_opts()
126 
127  Arg [1] : Optional prefix for dbnames e.g. dna
128  Description : Retrieves the standard options for connecting to one or more Ensembl databases
129  Returntype : Arrayref of option definitions
130  Status : Under development
131 
132 =cut
133 
134 sub get_dba_opts {
135  my ($self, $prefix) = @_;
136  $prefix ||= '';
137  my @dba_opts = map {
138  my $opt = join '|', map { $prefix . $_ } @{$_->{args}};
139  $opt . $_->{type};
140  } @{$dba_opts};
141  return \@dba_opts;
142 }
143 
144 =head2 process_args()
145 
146  Arg [1] : Arrayref of supported command line options (e.g. from get_dba_opts)
147  Arg [2] : Ref to subroutine to be invoked when -help or -? is supplied
148  Description : Retrieves the standard options for connecting to one or more Ensembl databases
149  Returntype : Hashref of parsed options
150  Status : Under development
151 
152 =cut
153 
154 sub process_args {
155  my ($self, $opts_def, $usage_sub) = @_;
156  my $opts = {};
157  push @{$opts_def}, q/help|?/ => $usage_sub;
158  GetOptions($opts, @{$opts_def}) ||
159  croak 'Could not parse command line arguments';
160  return $opts;
161 }
162 
163 =head2 get_dba_args_for_opts()
164 
165  Arg [1] : Hash of options (e.g. parsed from command line options by process_args())
166  Arg [2] : If set to 1, the databases are assumed to have a single species only. Default is 0 if database name matches collection, 1 otherwise.
167  Arg [3] : Optional prefix to use when parsing e.g. dna
168  Description : Uses the parsed command line options to generate an array of DBAdaptor arguments
169  : (e.g. expands dbpattern, finds all species_ids for multispecies databases)
170  : These can then be passed directly to Bio::EnsEMBL::DBSQL::DBAdaptor->new()
171  Returntype : Arrayref of DBA argument hash refs
172  Status : Under development
173 
174 =cut
175 
176 sub get_dba_args_for_opts {
177  my ($self, $opts, $single_species_opt, $prefix) = @_;
178  $prefix ||= '';
179 
180  my ($host, $port, $user, $pass, $dbname,
181  $pattern, $driver, $species, $species_id)
182  = map { $prefix . $_ }
183  qw(host port user pass dbname pattern driver species species_id);
184 
185  my @db_args;
186  if (defined $opts->{$host}) {
187  my $dbc =
188  Bio::EnsEMBL::DBSQL::DBConnection->new(-USER => $opts->{$user},
189  -PASS => $opts->{$pass},
190  -HOST => $opts->{$host},
191  -PORT => $opts->{$port},
192  -DRIVER => $opts->{$driver}
193  );
194  my @dbnames;
195  if (defined $opts->{$dbname}) {
196  push @dbnames, $opts->{$dbname};
197  }
198  elsif (defined $opts->{$pattern}) {
199  # get a basic DBConnection and use to find out which dbs are involved
200  @dbnames = grep { m/$opts->{$pattern}/smx }
201  @{$dbc->sql_helper()->execute_simple(q/SHOW DATABASES/)};
202  }
203  else {
204  croak 'dbname or dbpattern arguments required';
205  }
206  for my $dbname (@dbnames) {
207 
208 #Decipher group of DBAdaptor by capturing the name_name(_name?)_core_ code. Otherwise we don't know
209  my ($group) = $dbname =~
210  /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?_([a-z]+)(?:_\d+)?_\d+/;
211  # set multi where we have collections
212  my $multi = $dbname =~ m/_collection_/ ? 1 : 0;
213  my $species_ids;
214  my $single_species = $single_species_opt;
215 
216  if (!defined $single_species) {
217  # if we're dealing with a collection, turn off single species mode by default
218  $single_species = $dbname =~ m/_collection_/ ? 0 : 1;
219  }
220  if ($single_species != 1) {
221  # for multispecies, get the list of species from meta
222  $species_ids =
223  $dbc->sql_helper()
224  ->execute(
225 "SELECT species_id,meta_value FROM $dbname.meta WHERE meta_key='species.production_name'"
226  );
227  if (!defined $opts->{$species_id} &&
228  scalar(@{$species_ids}) == 0)
229  {
230  croak "No species.production_name found in database";
231  }
232  }
233  # if we didn't get a list from meta, go ahead and use the supplied arguments if we have them
234  if (defined $opts->{$species_id}) {
235  $species_ids = [[$opts->{$species_id}, $opts->{$species}]];
236  }
237  # otherwise assume the default species
238  elsif(!defined $species_ids) {
239  $species_ids = [[1, undef]];
240  }
241  # deal with each species in turn
242  for my $species_id (@{$species_ids}) {
243  my $args = {-HOST => $opts->{$host},
244  -USER => $opts->{$user},
245  -PORT => $opts->{$port},
246  -PASS => $opts->{$pass},
247  -DBNAME => $dbname,
248  -DRIVER => $opts->{$driver},
249  -SPECIES_ID => $species_id->[0],
250  -SPECIES => $species_id->[1],
251  -MULTISPECIES_DB => $multi};
252  $args->{-GROUP} = $group if $group;
253  push(@db_args, $args);
254  }
255  } ## end for my $dbname (@dbnames)
256  } ## end if (defined $opts->{$host...})
257  else {
258  croak '(db)host arguments required';
259  }
260  return \@db_args;
261 } ## end sub get_dba_args_for_opts
262 
263 =head2 get_dba_args_for_opts()
264 
265  Arg [1] : Hash of options (e.g. parsed from command line options by process_args())
266  Arg [2] : If set to 1, the databases are assumed to have a single species only. Default is 0.
267  Arg [3] : Optional prefix to use when parsing e.g. dna
268  Description : Uses the parsed command line options to generate an array DBAdaptors.
269  : Note this can overload connections on a server
270  Returntype : Arrayref of Bio::EnsEMBL::DBSQL::DBAdaptor
271  Status : Under development
272 
273 =cut
274 
275 sub get_dbas_for_opts {
276  my ($self, $opts, $single_species, $prefix) = @_;
277 
278 # get all the DBA details that we want to work with and create DBAs for each in turn
279  my $dbas;
280  for my $args (
281  @{$self->get_dba_args_for_opts($opts, $single_species, $prefix)})
282  {
283  push @{$dbas}, Bio::EnsEMBL::DBSQL::DBAdaptor->new(%{$args});
284  }
285  return $dbas;
286 }
287 
288 =head2 load_registry_for_opts
289 
290  Arg [1] : Hash of options (e.g. parsed from command line options by process_args())
291  Arg [2] : Optional prefix to use when parsing e.g. dna or master
292  Description : Loads a Registry from the given options hash. If a C<registry>
293  option is given then the code will call C<load_all>. Otherwise
294  we use the database parameters given to call
295  C<load_registry_from_db()>.
296  Returntype : Integer of the number of DBAdaptors loaded
297  Status : Under development
298 
299 =cut
300 
301 sub load_registry_for_opts {
302  my ($self, $opts, $prefix) = @_;
303  $prefix ||= q{};
304  if ($opts->{registry}) {
305  my $location = $opts->{registry};
306  return Bio::EnsEMBL::Registry->load_all($location);
307  }
308  my ($host, $port, $user, $pass) =
309  map { $prefix . $_ } qw(host port user pass);
310  my %args = (-HOST => $opts->{$host},
311  -PORT => $opts->{$port},
312  -USER => $opts->{$user},);
313  $args{-PASS} = $opts->{$pass};
315 }
316 
317 1;
usage
public usage()
EnsEMBL
Definition: Filter.pm:1
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
Bio::EnsEMBL::Utils::CliHelper::get_dba_opts
public Arrayref get_dba_opts()
map
public map()
Bio::EnsEMBL::Utils
Definition: Argument.pm:3
Bio::EnsEMBL::Registry
Definition: Registry.pm:113
Bio::EnsEMBL::DBSQL::DBConnection
Definition: DBConnection.pm:42
Bio::EnsEMBL::Registry::load_registry_from_db
public Int load_registry_from_db()
Bio::EnsEMBL::DBSQL::DBAdaptor::new
public Bio::EnsEMBL::DBSQL::DBAdaptor new()
Bio::EnsEMBL::DBSQL::DBConnection::new
public Bio::EnsEMBL::DBSQL::DBConnection new()
Bio::EnsEMBL::DBSQL::DBConnection::sql_helper
public Bio::EnsEMBL::Utils::SqlHelper sql_helper()
Bio::EnsEMBL::Utils::CliHelper
Definition: CliHelper.pm:55
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::SqlHelper::execute_simple
public ArrayRef execute_simple()
Bio::EnsEMBL::Registry::load_all
public Int load_all()