9 standaloneJob.pl RunnableDB/
DatabaseDumper.pm -exclude_ehive 1 -exclude_list 1 \
10 -table_list
"['peptide_align_%']" -src_db_conn mysql:
14 This is a Runnable to dump the tables of a database (by
default,
17 The following parameters are accepted:
19 - src_db_conn : the connection parameters to the database to be
20 dumped (by
default, the current eHive database
if available)
22 - exclude_ehive [
boolean=0] :
do we exclude the eHive-specific tables
25 - table_list [
string or array of strings]: the list of tables
26 to include in the dump. The
'%' wildcard is accepted.
28 - exclude_list [
boolean=0] :
do we consider
'table_list' as a list
29 of tables to be excluded from the dump (instead of included)
31 - output_file [string] : the file to write the dump to. If the filename
32 ends with
".gz", the file is compressed with
"gzip" (
default parameters)
34 - output_db [
string] : URL of a database to write the dump to. In
this 37 - skip_dump [
boolean=0] :
set this to 1 to skip the dump
40 The decision process regarding which tables should be dumped is quite complex.
41 The following sections explain the various scenarios.
47 If
"table_list" is undefined or maps to an empty list, the list
48 of tables to be dumped is decided accordingly to
"exclude_list" (EL)
49 and
"exclude_ehive" (EH).
"exclude_list" controls the whole list of
52 EL EH List of tables to dump
54 0 1 => all the tables, except the eHive ones
55 1 0 => all the tables, except the non-eHive ones = only the eHive tables
56 1 1 => both eHive and non-eHive tables are excluded = nothing is dumped
58 If
"table_list" is defined to non-empty list T, the table of decision is:
60 EL EH List of tables to dump
61 0 0 => all the tables in T + the eHive tables
62 0 1 => all the tables in T
63 1 0 => all the tables, except the ones in T
64 1 1 => all the tables, except the ones in T and the eHive ones
66 1.b. eHive-only database
68 The decision table can be simplified
if the database only contains eHive tables.
69 In particular, the
"exclude_list" and
"table_list" parameters have no effect.
71 EH List of tables to dump
72 0 => All the eHive tables, i.e. the whole database
73 1 => No eHive tables, i.e. nothing
77 The
"exclude_ehive" parameter is ignored.
80 EL List of tables to dump
82 1 => all the tables are excluded = nothing is dumped
84 non-empty
"table_list" T:
85 EL List of tables to dump
86 0 => all the tables in T
87 1 => all the tables, except the ones in T
92 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
93 Copyright [2016-2022] EMBL-European Bioinformatics Institute
95 Licensed under the Apache License,
Version 2.0 (the
"License"); you may not use
this file except in compliance with the License.
96 You may obtain a copy of the License at
100 Unless required by applicable law or agreed to in writing, software distributed under the License
101 is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
102 See the License
for the specific language governing permissions and limitations under the License.
106 Please subscribe to the
Hive mailing list: http:
111 package Bio::EnsEMBL::Hive::RunnableDB::DatabaseDumper;
118 use base (
'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd');
123 %{$self->SUPER::param_defaults(@_)},
125 # Which tables to dump. How the options are combined is explained above 126 'table_list' => undef, # array-ref
127 'exclude_ehive' => 0, #
boolean 128 'exclude_list' => 0, #
boolean 131 'src_db_conn' => undef, # URL, hash-ref, or Registry name
132 'output_file' => undef, # String
133 'output_db' => undef, # URL, hash-ref, or Registry name
136 'skip_dump' => 0, #
boolean 137 'dump_options' => undef, # Extra options to pass to the dump program
139 # SystemCmd's options to make sure the whole command succeeded 140 'use_bash_pipefail' => 1,
141 'use_bash_errexit' => 1,
148 # The final list of tables 152 # Connection parameters 153 my $src_db_conn = $self->param(
'src_db_conn');
154 my $src_dbc = $src_db_conn ? go_figure_dbc($src_db_conn) : $self->data_dbc;
155 $self->param(
'src_dbc', $src_dbc);
157 $self->input_job->transient_error(0);
158 die
'Only the "mysql" driver is supported.' if $src_dbc->driver ne
'mysql';
160 my @ehive_tables = ();
162 ## Only query the list of eHive tables if there is a "hive_meta" table 163 my $meta_sth = $src_dbc->table_info(undef, undef,
'hive_meta');
164 if ($meta_sth->fetchrow_arrayref) {
166 @ehive_tables = (@{$src_dba->hive_pipeline->list_all_hive_tables}, @{$src_dba->hive_pipeline->list_all_hive_views});
167 unless (@ehive_tables) {
168 my @ref_ehive_tables = qw(hive_meta pipeline_wide_parameters worker dataflow_rule analysis_base analysis_ctrl_rule job accu log_message job_file analysis_data resource_description analysis_stats analysis_stats_monitor role msg progress resource_class worker_resource_usage);
169 # The hard-coded list is comprehensive, so some tables may not be 170 # in this database (which may be on a different version) 171 push @ehive_tables, @{$self->_get_table_list($src_dbc, $_)}
for @ref_ehive_tables;
176 $self->param(
'nb_ehive_tables', scalar(@ehive_tables));
178 # Get the table list in either "tables" or "ignores" 179 my $table_list = $self->_get_table_list($src_dbc, $self->param(
'table_list') ||
'');
180 $self->say_with_header(sprintf(
"table_list: %d %s", scalar(@$table_list), join(
'/', @$table_list)));
181 my $nothing_to_dump = 0;
183 if ($self->param(
'exclude_list')) {
184 push @ignores, @$table_list;
185 $nothing_to_dump = 1
if !$self->param(
'table_list');
187 push @tables, @$table_list;
188 $nothing_to_dump = 1
if $self->param(
'table_list') and !@$table_list;
191 # eHive tables are ignored if exclude_ehive is set 192 if ($self->param(
'exclude_ehive')) {
193 push @ignores, @ehive_tables;
194 } elsif (@ehive_tables) {
195 if (@tables || $nothing_to_dump) {
196 push @tables, @ehive_tables;
197 $nothing_to_dump = 0;
201 # Output file / output database 202 $self->param(
'output_file') || $self->param(
'output_db') || die
'One of the parameters "output_file" and "output_db" is mandatory';
203 unless ($self->param(
'output_file')) {
204 $self->param(
'real_output_db', go_figure_dbc( $self->param(
'output_db') ) );
205 die
'Only the "mysql" driver is supported.' if $self->param(
'real_output_db')->driver ne
'mysql';
208 $self->input_job->transient_error(1);
210 $self->say_with_header(sprintf(
"tables: %d %s", scalar(@tables), join(
'/', @tables)));
211 $self->say_with_header(sprintf(
"ignores: %d %s", scalar(@ignores), join(
'/', @ignores)));
213 my @options = qw(--skip-lock-tables);
214 # Without any table names, mysqldump thinks that it should dump 215 # everything. We need to add special arguments to handle this 216 if ($nothing_to_dump) {
217 $self->say_with_header(
"everything is excluded, nothing to dump !");
218 push @options, qw(--no-create-info --no-data);
219 @ignores = (); # to clean-up the command-line
224 if ($self->param(
'output_file')) {
225 if (lc $self->param(
'output_file') =~ /\.gz$/) {
226 $output = sprintf(
' | gzip > %s', $self->param(
'output_file'));
228 $output = sprintf(
'> %s', $self->param(
'output_file'));
231 $output = join(
' ',
'|', @{ $self->param(
'real_output_db')->to_cmd(undef, undef, undef, undef, 1) } );
234 # Extra parameter to add to the command-line 235 my $dump_options = $self->param(
'dump_options')
237 # Must be joined because of the redirection / the pipe 239 @{ $src_dbc->to_cmd(
'mysqldump', undef, undef, undef, 1) },
242 ref($dump_options) ? @$dump_options : ($dump_options,),
243 (map {sprintf(
'--ignore-table=%s.%s', $src_dbc->dbname, $_)} @ignores),
247 # Check whether the current database has been restored from a snapshot. 248 # If it is the case, we shouldn't re-dump and overwrite the file. 249 # We also check here the value of the "skip_dump" parameter 250 my $completion_signature = sprintf(
'dump_%d_restored', defined $self->input_job->dbID ? $self->input_job->dbID : 0);
252 if ($self->param(
'skip_dump') or $self->param($completion_signature)) {
253 # A command that always succeeds 254 $self->param(
'cmd',
'true');
255 if ($self->param(
'skip_dump')) {
256 $self->warning(
'Skipping the dump because "skip_dump" is defined');
258 $self->warning(
"Skipping the dump because this database has been restored from the target dump. We don't want to overwrite it");
260 } elsif ($self->param(
'nb_ehive_tables')) {
261 # OK, we can dump and this is an eHive database. 262 # We add the signature to the dump, so that the 263 # job won't rerun on a restored database 264 # We're very lucky that gzipped streams can be concatenated and the 265 # output is still valid ! 266 my $extra_sql = qq{echo
"INSERT INTO pipeline_wide_parameters VALUES ('$completion_signature', 1);\n" $output};
267 $extra_sql =~ s/>/>>/;
268 $self->param(
'cmd',
"$cmd; $extra_sql");
270 # Direct dump on a non-eHive database 271 $self->param(
'cmd', $cmd);
276 # Splits a string into a list of strings 277 # Ask the database for the list of tables that match the wildcard "%" 278 # and also select the tables that actually exist 279 sub _get_table_list {
280 my ($self, $dbc, $table_list) = @_;
283 foreach my $initable (ref($table_list) eq
'ARRAY' ? @$table_list : split(
' ', $table_list)) {
284 if ($initable =~ /%/) {
285 $initable =~ s/_/\\_/g;
287 my $sth = $dbc->table_info(undef, undef, $initable, undef);
288 push @newtables, map( {$_->[2]} @{$sth->fetchall_arrayref});