2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
26 use File::Path qw/mkpath/;
27 use Getopt::Long qw/:config no_ignore_case auto_version bundling_override/;
28 use IO::Compress::Gzip qw/gzip $GzipError/;
32 my $PIGZ_BINARY =
'pigz';
33 my $PIGZ_PROCESSORS = 2; #some machines only have 4 cores so
do not go mad
34 my $MAX_FILE_SIZE = 1 * 1024 * 1024; #anything greater than 1MB farm out
38 my $self = bless({}, $class);
43 $self->dry()
if $self->opts()->{
dry};
44 $self->process()
if ! $self->opts()->{
dry};
47 select($self->{oldfh});
65 databases|database|db=s@
78 ) or pod2usage(-verbose => 1, -exitval => 1);
79 pod2usage(-verbose => 1, -exitval => 0) if $opts->{help};
80 pod2usage(-verbose => 2, -exitval => 0) if $opts->{man};
81 $self->{opts} = $opts;
87 my $o = $self->opts();
91 open my $fh,
'>', $file or die
"Cannot open log file '${file}' for writing: $!";
92 my $oldfh = select($fh);
93 $self->{oldfh} = $oldfh;
100 my $o = $self->opts();
104 if (! $o->{defaults}) {
105 @required_params = qw/host username/;
107 -message =>
'-pattern is not supported with -databases mode',
110 ) if $o->{pattern} && $o->{databases};
113 foreach my $r (@required_params) {
117 "-${r} has not been given at the command line but is a required parameter",
124 #Check if gzip command is available
126 $self->{pigz_binary} = 0;
127 $self->v(
'Forcing Perl based GZip compression');
130 `$PIGZ_BINARY --version >/dev/
null 2>/dev/
null`;
131 $self->{pigz_binary} = ($? == 0) ? 1 : 0;
132 my $feedback = ($self->{pigz_binary}) ?
'available' :
'not available';
133 $self->v(q{pigz binary
'%s' is %s}, $PIGZ_BINARY, $feedback);
141 my $o = $self->opts();
143 #Processing -opt 1 -opt 2,3 into opt => [1,2,3]
144 $self->_cmd_line_to_array(
'databases')
if $o->{databases};
145 $self->_cmd_line_to_array(
'groups')
if $o->{groups};
146 $self->_cmd_line_to_array(
'species')
if $o->{species};
148 my $original_databases_args = $o->{databases};
151 if ($o->{tables} && !$o->{sql}) {
152 $self->_cmd_line_to_array(
'tables');
153 $self->v(q{Will work with the tables [%s]}, join(q{,}, @{ $o->{tables} }));
156 if ($o->{defaults}) {
157 $self->_set_opts_from_hostname();
159 $o->{port} = 3306
if !$o->{port};
161 my $p = $o->{pattern};
163 $o->{databases} = $self->_all_dbs($p);
165 $o->{directory} = File::Spec->rel2abs($o->{directory});
168 if(! $o->{username}) {
170 -msg =>
'No -username given on the command line or in the configuration file',
176 $self->v(q{Using the database server %s@%s:%d},
177 map { $o->{$_} } qw/username host port/);
179 #Filter for those on the specified server; sometimes redundant
180 my %dbs =
map { $_ => 1 } @{ $self->_all_dbs() };
182 foreach my $db (@{$o->{databases}}) {
184 push(@final_dbs, $db);
187 $self->v(
'DB %s is not available from the specified server', $db);
190 $o->{databases} = \@final_dbs;
192 #Filtering DBs based on groups & species
195 foreach my $group (@{ $o->{groups} }) {
196 $self->v(
'Filtering for group %s', $group);
197 %dbs =
map { $_ => 1 } grep { / _ $group _ /xms } @{ $o->{databases} };
199 $o->{databases} = [ keys %dbs ];
203 foreach my $species (@{ $o->{species} }) {
204 $self->v(
'Filtering for species %s', $species);
205 %dbs =
map { $_ => 1 } grep { / $species _ /xms } @{ $o->{databases} };
207 $o->{databases} = [ keys %dbs ];
210 #Do we have any DBs left to process?
211 my $db_count = scalar(@{ $o->{databases} });
212 if ($db_count == 0) {
213 my $msg =
'No databases found on the server ' . $o->{host};
214 if($original_databases_args && $o->{defaults}) {
215 my $version = $o->{version} ||
'-NONE';
216 $msg .= qq{. You specified the -database arg and -defaults. Are you on the correct server or did -version
'$version' excluded
this DB?};
218 pod2usage(-msg => $msg, -exitval => 1, -verbose => 0);
220 $self->v(q{Working %d database(s)}, $db_count);
222 $o->{databases} = [ sort { $a cmp $b } @{ $o->{databases} } ];
224 $o->{verbose} = 1
if $o->{dry};
231 my $databases = $self->opts()->{databases};
232 my $list = join(q{,}, @{$databases});
233 $self->v(q{The following databases would have been dumped [%s]}, $list);
241 my $test_case = $self->opts()->{testcompatible};
243 $self->v(
'Producing test case compatible dumps')
if $test_case;
245 my $databases = $self->opts()->{databases};
246 foreach my $db (@{$databases}) {
247 $self->v(
'Working with database %s', $db);
252 $self->_setup_dir($db);
255 my @tables = keys %{ $self->tables() };
257 #Do data dumps if we didn't ask for just SQL
258 if (!$self->opts()->{sql}) {
259 my @tables_to_process;
260 if ($self->opts()->{tables}) {
262 %lookup =
map { $_ => 1 } @{ $self->opts()->{tables} };
263 @tables_to_process = grep { $lookup{$_} } @tables;
265 @tables_to_process = @tables;
267 foreach my $table (sort { $a cmp $b } @tables_to_process) {
268 next
if $self->is_view($table);
269 $self->data($table, $db);
272 $self->v(
'-sql mode is on so no data dumping will occur');
279 $sql_file = $self->file(
'table.sql');
280 unlink $sql_file
if -f $sql_file;
281 open $fh,
'>', $sql_file or croak
"Cannot open filehandle to $sql_file: $!";
284 $sql_file = $self->file($db .
'.sql.gz');
285 unlink $sql_file
if -f $sql_file;
286 $fh = IO::Compress::Gzip->new($sql_file) or croak
"Cannot create gzip stream to $sql_file: $GzipError";
291 foreach my $table (sort { $a cmp $b } @tabs) {
292 my $sql = $self->sql($table);
293 print $fh $sql,
';',
"\n" x 2;
296 $writer->(grep { !$self->is_view($_) } @tables);
297 $writer->(grep { $self->is_view($_) } @tables);
299 $self->permissions($sql_file);
301 #Checksum the DB's files
302 $self->checksum()
if ! $test_case;
306 $self->clear_current_dir();
307 $self->clear_tables();
309 $self->v(
'Finished with database %s', $db);
315 my ($self, $table) = @_;
316 my $q_table = $self->dbh()->quote_identifier($table);
319 ->selectcol_arrayref(qq{SHOW CREATE TABLE $q_table}, { Columns => [2] });
320 my $sql = $array->[0];
321 return $self->modify_sql($sql, $table);
325 my ($self, $sql, $table) = @_;
326 if ($self->is_view($table)) {
327 $sql =~ s/DEFINER=.+ \s+ SQL/DEFINER=CURRENT_USER() SQL/xms;
328 $sql =~ s/SQL \s+ SECURITY \s+ DEFINER/SQL SECURITY INVOKER/xms;
330 if($self->opts()->{testcompatible}) {
331 $sql =~ s/DEFAULT\s+CHARSET=latin1
332 $sql =~ s/COLLATE=latin1_bin
333 $sql =~ s/AUTO_INCREMENT=\d+
334 $sql =~ s/CHARACTER SET latin1
335 $sql =~ s/COLLATE latin1_bin
342 my ($self, $table, $db) = @_;
343 return if $self->is_view($table);
344 $self->v(
'Dumping table %s', $table);
345 my $q_table = $self->dbh()->quote_identifier($table);
346 my $file = $self->file($table .
'.txt');
348 my $dbc_params = $self->dbc_params($db);
350 my $mysql_exe =
'mysql';
355 '--max_allowed_packet=1024M',
358 '--skip-column-names',
359 "-e 'SELECT * FROM ${db}.${table}'",
362 '-e \'s/(^|\t)NULL($|\t)/\1\\\\N\2/g\'',
363 '-e \'s/(^|\t)NULL($|\t)/\1\\\\N\2/g\'',
367 my $cmd = join(
' ', @cmd);
368 my $output = `$cmd 2>&1`;
370 $self->compress($file)
if ! $self->opts()->{testcompatible};
376 my $dir = $self->current_dir();
378 $self->v(
'Checksumming directory %s', $dir);
380 opendir(my $dh, $dir) or die
"Cannot open directory $dir";
381 my @files = sort { $a cmp $b } readdir($dh);
382 closedir($dh) or die
"Cannot close directory $dir";
384 my $checksum = $self->file(
'CHECKSUMS');
385 unlink $checksum
if -f $checksum;
387 open my $fh,
'>', $checksum or croak
"Cannot open filehandle to $checksum: $!";
388 foreach my $file (@files) {
389 next
if $file =~ /^\./; #hidden file or up/current dir
390 next
if $file =~ /^CHECKSUM/;
391 my $path = File::Spec->catfile($dir, $file);
392 my $sum = `sum $path`;
395 print $fh
"${sum}\t${file}\n";
398 $self->permissions($checksum);
404 my ($self, $database) = @_;
405 if (!exists $self->{
'dbh'}) {
406 my $o = $self->opts();
407 my %args = (host => $o->{host}, port => $o->{port});
408 $args{database} = $database
if defined $database;
411 'DBI:mysql:' . join(q{;},
map { $_ .
'=' . $args{$_} } keys %args);
412 $self->v(
'DBI connection URI %s', $dsn);
414 DBI->connect($dsn, $o->{username}, $o->{password}, { RaiseError => 1 });
418 return $self->{
'dbh'};
422 my ($self, $database) = @_;
423 my $o = $self->opts();
424 my %args = (host => $o->{host}, port => $o->{port});
425 $args{database} = $database
if defined $database;
427 my $dbc_params = join(
' ', (
428 '--host='.$o->{host},
429 '--port='.$o->{port},
430 '--user='.$o->{username},
431 '--password='.$o->{password}
440 if (exists $self->{dbh}) {
441 $self->{dbh}->disconnect();
448 my ($self, $table) = @_;
449 return ($self->tables()->{$table} eq
'VIEW') ? 1 : 0;
454 if (!exists $self->{tables}) {
456 $self->dbh()->selectcol_arrayref(
457 'select TABLE_NAME, TABLE_TYPE from information_schema.TABLES where TABLE_SCHEMA = DATABASE()',
458 { Columns => [ 1, 2 ] }
460 my %hits = @{$array};
461 $self->{tables} = \%hits;
463 return $self->{tables};
468 delete $self->{tables};
473 my ($self, $filename) = @_;
474 return File::Spec->catfile($self->current_dir(), $filename);
478 my ($self, $current_dir) = @_;
479 $self->{
'current_dir'} = $current_dir
if defined $current_dir;
480 return $self->{
'current_dir'};
483 sub clear_current_dir {
485 delete $self->{current_dir};
491 return $self->{
'opts'};
495 my ($self, $file) = @_;
496 my $target_file = $file .
'.gz';
498 $self->v(q{Compressing
'%s' to
'%s'}, $file, $target_file);
500 if (-f $target_file) {
502 or die
"Cannot remove the existing gzip file $target_file: $!";
505 my @stats = stat($file);
506 my $size = $stats[7];
507 if($self->{pigz_binary} && $size >= $MAX_FILE_SIZE) {
508 system (
"$PIGZ_BINARY --processes $PIGZ_PROCESSORS -4 $file") and confess "Could not pigz $file using $PIGZ_BINARY";
511 gzip $file => $target_file
512 or die
"gzip failed from $file to $target_file : $GzipError\n";
514 if (-f $target_file && -f $file) {
515 unlink $file or die
"Cannot remove the file $file: $!";
517 $self->permissions($target_file);
522 my ($self, $file) = @_;
524 chmod($mode, $file) or die
"Cannot perform the chmod to mode $mode for file $file";
529 my ($self, $msg, @args) = @_;
530 return unless $self->opts()->{verbose};
531 my $s_msg = sprintf($msg, @args);
532 my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) =
534 print sprintf(
"[%02d-%02d-%04d %02d:%02d:%02d] %s\n",
535 $mday, $mon, $year + 1900,
536 $hour, $min, $sec, $s_msg);
541 my ($self, $db) = @_;
542 my @path = ($self->opts()->{directory});
543 if($self->opts()->{testcompatible}) {
544 if( $db =~ /^(?:\w+_test_db_)?([a-zA-Z0-9_]+)_([a-z]+)_\d+/) {
548 $self->v(
"Cannot decipher name and group from $db. Using the database name");
555 my $dir = File::Spec->catdir(@path);
556 $self->current_dir($dir);
558 mkpath($dir) or die
"Cannot create directory $dir: $!";
560 or die
"Cannot change permissions on dir for everyone to write: $!";
565 sub _set_opts_from_hostname {
567 my $o = $self->opts();
568 my $defaults = $o->{defaults};
569 return unless $o->{defaults};
570 confess
"The given location '$defaults' does not exist" if ! -f $defaults;
572 my $host = $self->_host();
573 my $settings = $self->_hostname_opts_from_config()->{$host};
574 confess
"Specified -defaults but $host is not known. Check your $defaults ini file"
577 #Setup default connection params
578 $o->{host} = $settings->{host} || $host; # use a configured host otherwise use hostname
580 #only use if specified
581 $o->{port} = $settings->{port}
if $settings->{port};
582 $o->{username} = $settings->{username}
if $settings->{username};
583 $o->{password} = $settings->{password}
if $settings->{password};
584 $o->{sql} = $settings->{sql}
if $settings->{sql};
586 if (!$o->{databases}) {
587 my $opts_pattern = $o->{pattern};
588 $opts_pattern = qr/$opts_pattern/
if $opts_pattern;
589 my $settings_pattern = $settings->{pattern};
590 my $pattern = (defined $opts_pattern) ? $opts_pattern : $settings_pattern;
591 $o->{databases} = $self->_all_dbs($pattern);
595 $o->{directory} = $settings->{dir};
600 #Assume normal ini-file format
601 sub _hostname_opts_from_config {
603 my $hostname_opts = {};
605 if($self->opts()->{version}) {
606 $target_dir =
'release-' . $self->opts()->{version};
609 $target_dir =
'dumps';
612 my $defaults = $self->opts()->{defaults};
613 open my $fh,
'<', $defaults or confess
"Cannot open defaults file '$defaults' for reading: $!";
617 while(my $line = <$fh>) {
619 next
if $line =~ /^\s*(?:\#|\;|$)/; #next
for comments & empty lines
622 if(my ($section) = $line =~ /^\s*\[\s*(.+?)\s*\]\s*$/xms) {
623 $current_section = $section;
624 $hostname_opts->{$current_section} = {};
628 if( my ($key, $value) = $line =~ /^\s*([^=]+?)\s*=\s*(.*?)\s*$/) {
630 #Compile into a regex
631 if($key eq
'pattern') {
634 #Change into the correct location
635 elsif($key eq
'dir') {
636 $value = File::Spec->catdir($value, $target_dir);
638 $hostname_opts->{$current_section}->{$key} = $value;
641 confess
"Error in ini file '$defaults' at line $counter: '$line'";
644 return $hostname_opts;
649 my $host = hostname();
653 #Always filter by version if it was given
655 my ($self, $pattern) = @_;
657 if($self->opts()->{version}) {
658 $like =
'%\\_'.$self->opts()->{version}.
'%';
659 $self->v(q{Looking
for databases with the pattern
'%s' }, $like);
661 my $dbh = $self->dbh();
662 my $databases = $dbh->selectcol_arrayref(
'show databases like ?',
663 { Columns => [1] }, $like);
665 return [ grep { $_ =~ $pattern } @{$databases}]
if $pattern;
669 sub _cmd_line_to_array {
670 my ($self, $key) = @_;
671 my $array = $self->opts()->{$key};
672 $array = (ref($array) && ref($array) eq
'ARRAY') ? $array : [$array];
673 my $string = join(q{,}, @{$array});
674 my @new_array = split(/,/, $string);
675 $self->opts()->{$key} = \@new_array;
693 ./dump_mysql.pl (-version VER | -release VER) [-defaults] | [ -username USER -password PASS -host HOST [-port PORT] [-pattern
'REGEX' | -databases DB] [-tables TABLE] -directory DIR] [-verbose] [-help | -man]
695 #Test Case compatbile dumps
696 ./dump_mysql.pl -username root -password pass -host 127.0.0.1 -testcompatible -verbose -directory /tmp/test-genome-DBs -database homo_sapiens_core_testdb
698 #Using defaults ini file
699 ./dump_mysql.pl --defaults my.ini --username root --password p --version 64
701 ./dump_mysql.pl --defaults my.ini --username root --password p --release 64 -dry
703 ./dump_mysql.pl --defaults my.ini --username root --password p --version 64 --tables dna
705 ./dump_mysql.pl --defaults my.ini --username root --password p --version 64 --tables meta,meta_coord --tables analysis --groups core,otherfeatures --groups vega
707 ./dump_mysql.pl --defaults my.ini --username root --password p --version 64 --tables meta,meta_coord --tables analysis --groups core,otherfeatures --groups vega --sql
710 ./dump_mysql.pl --host srv --username root --password p --pattern
'.+_64.+' --directory $PWD/dumps
712 ./dump_mysql.pl --host srv --username root --password p --databases my_db --databases other_db --directory $PWD/dumps
714 ./dump_mysql.pl --host srv --username root --password p --databases my_db,toto_db --databases other_db --directory $PWD/dumps
716 ./dump_mysql.pl --host srv --username root --password p --databases my_db --tables dna,dnac --directory $PWD/dumps
718 ./dump_mysql.pl --host srv --username root --password p --db my_db --tables dna --tables dnac --directory $PWD/dumps
722 A script which is used to generate MySQL dumps which take into account issues
723 surrounding BLOB handling, VIEWS and other oddities of the Ensembl MySQL dump
726 You B<MUST> be on the database server the dumps are going to be generated
727 from. If not then all file manipulations will fail.
729 As a pose to normal scripts
this version is aware of webteam database setup
730 and therefore can automatically configure itself to a set of useful
731 parameters rather than having to manually configure the setup.
737 =item B<--username | --user | -u>
739 REQUIRED. Username of the connecting account. Must be able to perform
740 C<SELECT INTO OUTFILE> calls.
742 =item B<--password | -pass | -p>
744 REQUIRED. Password of the connecting user.
748 Uses the
default mechanism which involves looking at the host the script
749 is executing on and setting a number of options
for databases to look
for
750 as well as port settings. C<-defaults> can be used in conjunction with
751 C<-groups>, C<-species> and C<-tables> but not with parameters like <--host>.
753 The options set are specified by your custom ini-file.
755 =item B<--version | --release>
757 If you are
using C<--defaults> then you must also specify the version
758 of the databases you are dumping. Once specified the program will only
759 consider databases with the version number in there (specifically the
760 occurance of C<%\_VERSION%>). C<--release> can also be used.
762 =item B<--host | --hostname | -h>
764 Host name of the database to connect to. Cannot be used with <--defaults>.
768 Optional integer of the database port. Defaults to 3306. Cannot be used
773 Allows the specification of a regular expression to select databases with.
774 Cannot be used in conjunction with the C<--databases> argument.
776 =item B<--databases | --database | --db>
778 Allows database name specification and can be used more than once. Cannot
779 be used in conjunction with C<--pattern>. Comma separated values are
782 =item B<--tables | --table>
784 Allows you to specify a table to perform the dumps
for. This will be applied
785 to all databases matching the given pattern or the list of databases. Be
786 warned that
this will cause a full SQL re-dump and checksum re-calculation.
788 =item B<--directory | --dir>
790 Target directory to place all dumps. A sub-directory will be created here;
791 one per database dump. Cannot be used with <--defaults>.
795 Force a dump of the SQL
for the database and nothing
else.
797 =item B<--testcompatible>
799 If specified will create MySQL dumps compatible with the Ensembl test
800 framework. This creates 2 levels of directory based on the database
801 species and the group it belongs to e.g. homo_sapiens and otherfeatures. We
802 also avoid gzipping any file and produce a single table.sql file.
806 Makes the program give more information
about what is going on. Otherwise
807 the program is silent.
811 If given the script will write all logs to output. Switches on C<--verbose>
815 Force the use of Perl
's GZip libraries rather than using external zipping
820 If specified the script will list all databases which have been found and
821 will be dumped but will not run any dumping process.
833 =head1 DEFAULTS FILE FORMAT
835 The defaults file format is an ini file which respects all basic rules about
836 comments (preceeded by a ;), section headers and key/value pairs. The basic
841 port = 3306 ; port of the DB
842 pattern = ^homo_sap\w+$ ; regular expression to filter DBs by
843 dir = /path/to/dump/dir ;
847 port = 3306 ; port of the DB
848 pattern = ^web\w+$ ; regular expression to filter DBs by
849 dir = /path/to/dump/dir ;
850 sql = 1 ; dump just the SQL for these databases
852 ;if your host isn't the same as the server you are running the script on
854 host = my-real-server ; host you want the script to connect to
855 port = 3306 ; port of the DB
856 pattern = ^web\w+$ ; regular expression to filter DBs by
857 dir = /path/to/dump/dir ;
858 sql = 1 ; dump just the SQL
for these databases
860 ; and
if you wanted everything in the config file so our cmd line becomes
861 ; ./dump_mysql.pl -defaults mydbcfg.ini
865 password = uberpassword
866 pattern = ^homo_sap.+var.+$
867 dir = /path/to/dump/dir
869 As an example of one which grabs all core dbs from a-m and puts it in /dumps
871 [genome.mysql.server]
873 pattern = ^[a-m]\w*_core_\d+_\d+[a-z]?$
876 The server name should be the same as what is emitted from
878 perl -MSys::Hostname -e
'print hostname(), "\n"'
880 Also
using line bounded regular expressions i.e. C<^> and C<$> will improve
881 the accuracy of the databases you are looking
for.
893 =item IO::Compress::Gzip
897 =head1 MAKING IT FASTER