ensembl-hive  2.6
DbCmd.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 SYNOPSIS
8 
9  standaloneJob.pl Bio::EnsEMBL::Hive::RunnableDB::DbCmd -db_conn mysql://ensro@ens-livemirror/ncbi_taxonomy -input_query "SELECT name FROM ncbi_taxa_name WHERE name_class = 'scientific name' AND taxon_id = 9606" --append '["-N"]' -output_file out.dat
10 
11 =head1 DESCRIPTION
12 
13  This RunnableDB module acts as a wrapper around a database connection. It interfaces with the database the same way as you would
14  on the command line (i.e. with redirections and / or pipes to other commands) but with hive parameters instead.
15  The database connection is created from the "data_dbc" parameter, if provided, or the current hive database.
16 
17 =head1 CONFIGURATION EXAMPLE
18 
19  #
20  # The following examples show how to configure SystemCmd in a PipeConfig module.
21  #
22 
23  # The most common use-case is to apply a SQL script onto the current database
24 
25  { -logic_name => 'write_member_counts',
26  -module => 'Bio::EnsEMBL::Hive::RunnableDB::DbCmd',
27  -parameters => {
28  'input_file' => $self->o('ensembl_cvs_root_dir').'/ensembl-compara/scripts/production/populate_member_production_counts_table.sql',
29  },
30  -flow_into => [ 'notify_pipeline_completed' ],
31  },
32 
33 
34  # You can also use the advanced parameters to run a query on the
35  # database with the db_cmd.pl and pipe its output onto another command
36  # e.g. mysql -h... -u... curr_db_name -N -q -e 'select * from mcl_sparse_matrix' | #mcl_bin_dir#/mcxload -abc ...
37 
38  { -logic_name => 'mcxload_matrix',
39  -module => 'Bio::EnsEMBL::Hive::RunnableDB::DbCmd',
40  -parameters => {
41  'append' => [qw(-N -q)],
42  'input_query' => 'select * from mcl_sparse_matrix',
43  'command_out' => [qw(#mcl_bin_dir#/mcxload -abc - -ri max -o #work_dir#/#file_basename#.tcx -write-tab #work_dir#/#file_basename#.itab)],
44  },
45  -flow_into => [ 'mcl' ],
46  },
47 
48 
49  # Finally, you can run another executable (like mysqlimport) with its
50  # own parameters onto another database (specified by 'db_conn')
51 
52  { -logic_name => 'populate_method_links_from_file',
53  -module => 'Bio::EnsEMBL::Hive::RunnableDB::DbCmd',
54  -parameters => {
55  'db_conn' => '#rel_db#',
56  'method_link_dump_file' => $self->o('method_link_dump_file'),
57  'executable' => 'mysqlimport',
58  'append' => [ '#method_link_dump_file#' ],
59  },
60  -flow_into => [ 'load_all_genomedbs' ],
61  },
62 
63 =head1 LICENSE
64 
65  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
66  Copyright [2016-2024] EMBL-European Bioinformatics Institute
67 
68  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
69  You may obtain a copy of the License at
70 
71  http://www.apache.org/licenses/LICENSE-2.0
72 
73  Unless required by applicable law or agreed to in writing, software distributed under the License
74  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
75  See the License for the specific language governing permissions and limitations under the License.
76 
77 =head1 CONTACT
78 
79  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
80 
81 =cut
82 
83 
84 package Bio::EnsEMBL::Hive::RunnableDB::DbCmd;
85 
86 use strict;
87 use warnings;
88 
89 use Bio::EnsEMBL::Hive::Utils qw(join_command_args);
90 
91 # This runnable is simply a SystemCmd specialized for database commands
92 
93 use base ('Bio::EnsEMBL::Hive::RunnableDB::SystemCmd');
94 
95 
96 sub param_defaults {
97  my $self = shift;
98  return {
99  %{$self->SUPER::param_defaults(@_)},
100  'executable' => undef,
101  'prepend' => [],
102  'append' => [],
103  'input_file' => undef,
104  'input_query' => undef,
105  'output_file' => undef,
106  'command_in' => undef,
107  'command_out' => undef,
108  }
109 }
110 
111 
112 =head2 fetch_input
113 
114  Description : Implements fetch_input() interface method of Bio::EnsEMBL::Hive::Process that is used to read in parameters and load data.
115  Here it deals with finding the command line, doing parameter substitution and storing the result in a predefined place.
116 
117  param('cmd'): The recommended way of passing in the command line. It can be either a string, or an array-ref of strings. The later is safer if some of the
118  arguments contain white-spaces.
119 
120  param('*'): Any other parameters can be freely used for parameter substitution.
121 
122 =cut
123 
124 sub fetch_input {
125  my $self = shift;
126 
127  # Validate the arguments
128  # There can be only 1 input
129  if (not $self->param('executable') and not ($self->param('input_file') or $self->param('input_query') or $self->param('command_in'))) {
130  die "No input defined (missing 'input_file', 'input_query' and 'command_in')\n";
131  } elsif (($self->param('input_file') and ($self->param('input_query') or $self->param('command_in'))) or ($self->param('input_query') and $self->param('command_in'))) {
132  die "Only 1 input ('input_file', 'input_query' and 'command_in') can be defined\n";
133  }
134  # And 1 output
135  if ($self->param('output_file') and $self->param('command_out')) {
136  die "'output_file' and 'command_out' cannot be set together\n";
137  }
138 
139  # If there is any of those, system() will need a shell to deal with
140  # the pipes / redirections, and we need to hide the passwords
141  my $need_a_shell = ($self->param('input_file') or $self->param('command_in') or $self->param('output_file') or $self->param('command_out')) ? 1 : 0;
142 
143  my @cmd = @{ $self->data_dbc->to_cmd(
144  $self->param('executable'),
145  [grep {defined $_} @{$self->param('prepend')}],
146  [grep {defined $_} @{$self->param('append')}],
147  $self->param('input_query'),
148  $need_a_shell,
149  ) };
150 
151  # Add the input data
152  my $prefix = '';
153  if ($self->param('input_file')) {
154  push @cmd, '<', $self->param('input_file');
155  } elsif ($self->param('input_query')) {
156  # the query as already been fed into @cmd by to_cmd()
157  } elsif ($self->param('command_in')) {
158  $prefix = (join_command_args($self->param('command_in')))[1] . ' | ';
159  }
160 
161  # Add the output data
162  my $postfix = '';
163  if ($self->param('output_file')) {
164  push @cmd, '>', $self->param('output_file');
165  } elsif ($self->param('command_out')) {
166  $postfix = ' | ' . (join_command_args($self->param('command_out')))[1];
167  }
168 
169  if ($need_a_shell) {
170  my ($join_needed, $flat_cmd) = join_command_args(\@cmd);
171  $flat_cmd =~ s/ '(\S*\$EHIVE_TMP_PASSWORD_\d+)' / $1 /g;
172  $self->param('cmd', $prefix.$flat_cmd.$postfix);
173  } else {
174  $self->param('cmd', \@cmd);
175  }
176 }
177 
178 
179 1;
Bio::EnsEMBL::Hive::Utils
Definition: Collection.pm:4
EnsEMBL
Definition: Filter.pm:1
Bio::EnsEMBL::Hive::RunnableDB::DbCmd
Definition: DbCmd.pm:19
Bio
Definition: AltAlleleGroup.pm:4