2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
21 use DBI qw( :sql_types );
22 use File::Spec::Functions qw/:ALL/;
23 use Getopt::Long qw( :config no_ignore_case auto_version );
26 my $rcsid =
'$Revision$';
27 our ($VERSION) = $rcsid =~ /(\d+\.\d+)/;
30 my $indent =
' ' x length($0);
35 $0 --host=dbhost [ --port=dbport ] \\
36 $indent --user=dbuser [ --pass=dbpass ] \\
37 $indent --type=schema-type | --database=dbname \\
38 $indent [ --release=
new-release ] [ --from=old-release ] \\
39 $indent [ --species=dbspecies ] \\
40 $indent [ --gitdir=/some/path ] \\
41 $indent [ --dryrun ] \\
42 $indent [ --interactive 0|1 ]\\
43 $indent [ --verbose ] [ --quiet ] \\
44 $indent [ --mysql=optional_path ] \\
52 --host / -h\tdatabase host name (required)
53 --port / -P\tdatabase port (optional,
default=3306)
54 --user / -u\tdatabase user (required)
55 --pass / -p\tdatabase user password (optional, no
default)
57 --type / -t restrict to database schema type
58 (i.e. core, compara, funcgen, gene2phenotype, variation, production or ontology)
59 (required
if --database is not specified)
61 --database / -d full name of database, or database name pattern
62 (required
if --type is not specified)
64 --release / -r release number (optional,
default is the latest
65 release that we can find patches
for)
67 --from / -f only consider databases from
this release
68 (optional, no
default)
70 --species / -s restrict to species (optional, no
default)
72 --gitdir the directory where the relevant Ensembl Git repositories
73 have been checked out (optional,
default=misc-scripts/../..)
75 --dryrun / -n
do not actually modify databases
76 (optional,
default=not set)
78 --verbose / -v display extra information
80 --quiet / -q
do not display warnings
82 --fix also go through all old patches to find any missing
83 patch (patching starts at release equal to the
84 oldest patch in the database) >>USE WITH CAUTION<<
86 --oldest used in conjunction with --fix,
this option allows control
87 over how many releases are included in the fix. This option
88 exists
for users who have incomplete meta entries and
89 wish to bring their database automatically up to date.
91 --fixlast an extension of B<--oldest> and B<--fix>. This combines
92 to patch the current and last release only, giving an easy
93 way to patch a database post-handover without worrying
94 about ancient patches.
96 --mysql specify the location of the mysql binary
if it is not on
97 \$PATH. Otherwise we
default this to mysql
99 --nointeractive specify
if you want an non-interactive patching environment
100 (
default false). >>USE WITH CAUTION<<
102 --help display
this text
103 --
about display further information
104 --version display version and quit
112 This script patches one or several Ensembl databases from older
113 releases to the release specified by the user on the command line
114 using the --release=NN command line
switch, or to the latest release
115 for which the script is able to find a patch
if the --release=NN
116 switch is not used. To only patch databases from a particular
117 Ensembl release, the user may use the --from=NN command line
118 switch. In
this case, the script will use the value from the
119 'schema_version' meta key or, failing that, from the database name,
120 to determine what databases should be or shouldn
't be patched.
122 The script is able to patch databases that have Ensembl Core
123 schemas, Ensembl Compara schemas, Ensembl Regulation schemas,
124 Ensembl Variation schemas, Ensembl Production and Ontology schemas
125 provided that the appropriate Git repositories have been checked out
126 and are available to this script. The Git root directory where
127 all Ensembl Git repositories are located may be specified using the
128 --gitdir=/some/path command line switch if the script is unable to
129 determine it by itself.
131 The user has to specify either a particular database to patch (using
132 --database=XXX) or by giving a schema type (using --type=XXX). In
133 the case where a single database is provided, the script will try to
134 figure out what schema type the database has from the value of the
135 'schema_type
' meta key or, failing that, from the database name.
137 If the user gives only a schema type, the script will look for
138 databases with that schema type, again using the value of the
139 'schema_type
' meta key or, failing that, using the database names.
141 The --database=XXX command line switch may also be used to specify a
142 pattern to match database names with.
144 To further restrict the set of databases that will be patched,
145 the --species=XXX command line switch may be used. The value
146 will be used to match against the various values of the meta key
147 'species.alias
' or, failing that, against the database name.
149 When the --fix command line switch is used, the script will also
150 make it possible to apply older patches that might have been
151 skipped. For example: A database at schema version 65, with patches
152 since release 40, will not only be patched to the appropriate
153 release (specified with --release=NN), but all patches since release
154 40 (inclusively) will be tested. If the patch identifier for an old
155 patch is missing from the meta table of the database, that patch
160 The release coordinator patches all Ensembl Core-like databases
161 from release 65 to release 66:
163 $0 -h host -u user -p password \\
166 A genebuilder wishes to patch the same set as specified above but
167 without being prompted to apply patches
169 $0 -h host -u user -p password \\
170 -t core -f 65 -r 66 --nointeractive
172 A genebuilder patches one of her databases to release 66, and
173 wants to look at what the script proposes to do before actually
176 $0 -h host -u user -p password \\
177 -d my_database_66 -r 66 --dryrun
179 The release coordinator patches all mouse Core-like databases to
180 the latest release. She has checked out the 'ensembl
' Git repo
181 in her ~/src directory:
183 $0 -h host -u user -p password \\
184 -t core -s mouse --gitdir=~/src
186 A genebuilder (username 'my
') patches all her human databases to
189 $0 -h host -u user -p password \\
190 -s homo_sapiens -d 'my_%
'
192 A genebuilder makes sure that all patches up to and including
193 those for release 66 are included in her database, without
194 actually applying any patch (any missing patches needs to be
197 $0 -h host -u user -p password \\
198 -r 66 -d my_database --fix --dryrun
200 The genebuilder above has an evil twin who has mislaid their meta tables.
201 --fix threatens to apply ancient patches but they know their database
202 is correct until halfway through release 64. They wish to apply any
203 missing patches between release 64 and 66.
205 $0 -h host -u user -p password -r 66 \\
206 -d my_database --fix --oldest 64
208 The genebuilder above also has a doppleganger who decided they
209 wanted to patch for the last and current release of Ensembl alone. This
210 is useful for applying late patches. In this situation we will apply
211 patches for the current release (67) and the previous release (66).
213 $0 -h host -u user -p password -r 67 \\
214 -d my_database --fixlast
219 my ( $opt_host, $opt_port ) = ( undef, '3306
' );
220 my ( $opt_user, $opt_pass ) = ( undef, undef );
221 my ( $opt_species, $opt_type, $opt_release ) = ( undef, undef, undef );
231 my $opt_mysql = 'mysql
';
232 my $opt_interactive = 1;
234 my ( $opt_verbose, $opt_quiet );
236 if ( !GetOptions( 'host|h=s
' => \$opt_host,
237 'port|P=i
' => \$opt_port,
238 'user|u=s
' => \$opt_user,
239 'pass|p=s
' => \$opt_pass,
240 'species|s=s
' => \$opt_species,
241 'type|t=s
' => \$opt_type,
242 'from|f=i
' => \$opt_from,
243 'release|r=i
' => \$opt_release,
244 'database|d=s
' => \$opt_database,
245 'gitdir=s
' => \$opt_gitdir,
246 'dryrun|n!
' => \$opt_dryrun,
248 'fixlast!
' => \$opt_fixlast,
249 'oldest=i
' => \$opt_oldest,
250 'mysql=s
' => \$opt_mysql,
251 'interactive|i!
' => \$opt_interactive,
252 'verbose|v!
' => \$opt_verbose,
253 'quiet|q!
' => \$opt_quiet,
254 'help!
' => sub { usage(); exit(0); },
255 'about!
' => sub { about(); exit(0); } ) ||
256 !defined($opt_host) ||
257 !defined($opt_user) ||
258 ( !defined($opt_database) && !defined($opt_type) ) )
264 if ( defined($opt_type) &&
265 $opt_type ne 'core
' &&
266 $opt_type ne 'compara
' &&
267 $opt_type ne 'funcgen
' &&
268 $opt_type ne 'gene2phenotype
' &&
269 $opt_type ne 'variation
' &&
270 $opt_type ne 'production
' &&
271 $opt_type ne 'ontology
' )
273 die( sprintf( "Unknown schema type: %s\n", $opt_type ) );
282 # Get available patches.
284 foreach my $thing ( [ 'ensembl
', 'core
', 'table.sql
' ],
285 [ 'ensembl-compara
', 'compara
', 'table.sql
' ],
286 [ 'ensembl-funcgen
', 'funcgen
', 'table.sql
' ],
287 [ 'ensembl-gene2phenotype
', 'gene2phenotype
', 'table.sql
' ],
288 [ 'ensembl-variation
', 'variation
', 'table.sql
' ],
289 [ 'ensembl-production
', 'production
', 'table.sql
' ],
290 [ 'ensembl-ontology-schema
', 'ontology
', 'tables.sql
' ] )
292 my ($git_repo, $schema_type, $schema_file) = @{$thing};
294 if ( defined($opt_type) && $schema_type ne $opt_type ) { next }
296 my $sql_dir = _sql_dir($git_repo, $schema_type, $schema_file);
297 if(! defined $sql_dir) {
299 warn(sprintf("No SQL directory found for Git repo %s, %s schema type\n", $git_repo, $schema_type));
303 my $dh = IO::Dir->new($sql_dir);
305 if ( !defined($dh) ) {
307 warn(sprintf( "Unable to find SQL directory '%s
'\n", $sql_dir ) );
312 while ( my $file_name = $dh->read() ) {
313 if ( $file_name =~ /^patch_\d+_(\d+)_?[a-z]+?\.sql$/ ) {
314 my $patch_release = $1;
316 if ( !defined($latest_release) ||
317 $latest_release < $patch_release )
319 $latest_release = $patch_release;
323 printf( "Found %s patch file '%s
' for release %d\n",
324 $schema_type, $file_name, $patch_release ) if ! $opt_quiet;
327 my $full_file_name = catfile( $sql_dir, $file_name );
329 push( @{ $patches{$schema_type}{$patch_release} },
330 { 'patch
' => $file_name, 'path
' => $full_file_name } );
334 } ## end foreach my $thing ( [ 'ensembl
'...])
337 if ( defined($opt_release) && $opt_release > $latest_release ) {
338 die( sprintf( "Release %d is too new, " .
339 "last release with patches is release %d\n",
340 $opt_release, $latest_release ) );
343 if ( !defined($opt_release) ) {
345 printf( "Latest release with patches is release %d\n",
348 $opt_release = $latest_release;
351 my $dsn = sprintf( "DBI:mysql:host=%s;port=%d", $opt_host, $opt_port );
353 my $dbh = DBI->connect( $dsn, $opt_user, $opt_pass,
354 { 'RaiseError
' => 0, 'PrintError
' => 0 } );
357 my $pass = ($opt_pass) ? 'with a
' : 'with no
';
358 warn(sprintf(q{Cannot connect to DSN '%s
' with user %s %s password. Check your settings}, $dsn, $opt_user, $pass));
362 # Loop through the databases on the server, patch the ones we want to
363 # patch and filter out the ones that we don't want to patch.
366 my $found_databases = 0;
368 if ( defined($opt_database) ) {
369 $sth = $dbh->prepare(
"SHOW DATABASES LIKE ?");
370 $sth->bind_param( 1, $opt_database, SQL_VARCHAR );
372 else { $sth = $dbh->prepare(
"SHOW DATABASES") }
377 $sth->bind_col( 1, \$database );
380 while ( $sth->fetch() ) {
382 if ( $database =~ /^(?:information_schema|mysql)$/ ) { next }
384 # Figure out schema version, schema type, and species name from the
385 # database by querying its meta table.
387 my $sth2 = $dbh->prepare(
389 "SELECT meta_key, meta_value FROM %s WHERE meta_key IN " .
390 "('schema_version', 'schema_type', " .
391 "'species.alias', 'species.common_name', 'patch')",
392 $dbh->quote_identifier( undef, $database,
'meta' ) ) );
397 $sth2->bind_columns( \( $key, $value ) );
399 my ( $schema_version_ok, $schema_type_ok, $species_ok );
400 my ( $schema_version, $schema_type, $species );
403 while ( $sth2->fetch() ) {
404 if ( $key eq
'schema_version' ) {
405 $schema_version = $value;
406 if ( defined($opt_from) ) {
407 if ( $schema_version eq $opt_from ) { $schema_version_ok = 1 }
408 else { $schema_version_ok = 0 }
410 else { $schema_version_ok = 1 }
412 elsif ( $key eq
'schema_type' ) {
413 $schema_type = $value;
414 if ( defined($opt_type) ) {
415 if ( $schema_type eq $opt_type ) { $schema_type_ok = 1 }
416 else { $schema_type_ok = 0 }
418 else { $schema_type_ok = 1 }
420 elsif ( $key eq
'species.alias' ) {
421 if ( defined($opt_species) ) {
422 if ( $value eq $opt_species ) { $species_ok = 1 }
424 else { $species_ok = 1 }
426 elsif ( $key eq
'species.common_name' ) {
429 elsif ( $key eq
'patch' ) {
430 if(index($value,
"\n") > -1) {
431 warn
"The patch value '$value' in database '$database' has line-breaks. Remove them to silence this message";
434 if($value =~ /^(patch_\d+_(\d+)_?[a-z]+?\.sql)\|(.*)$/) {
435 my $patch_ident = $1;
436 my $patch_release = $2;
438 $dbpatches{$patch_release}{$patch_ident} = $patch_info;
441 warn
"The patch value $value from database $database does not conform to the pattern of 'patch_from_to_tag|description'. Please fix";
444 } ## end
while ( $sth2->fetch() )
446 # If we haven't yet found out the schema version, schema type, or
447 # species, look to the database name to provide clues.
450 if ( ! $schema_version ) {
451 #remove defined as version maybe empty string
453 if ( $database =~ /^ensembl.+?(\d+)$/ or #
this captures compara|eg|ontology|production naming conventions
454 $database =~ /_(\d+)_\w+$/) {
456 $schema_version = $1;
458 if ( defined($opt_from) ) {
459 if ( $schema_version == $opt_from ) { $schema_version_ok = 1 }
460 else { $schema_version_ok = 0 }
463 $schema_version_ok = 1 }
465 elsif ( ! $opt_quiet ) {
466 $schema_version_ok = 0;
467 warn( sprintf(
"Can not determine schema version from '%s'\n",
472 if ( !defined($schema_type) ) {
473 if ( $database =~ /_(core|funcgen|gene2phenotype|variation|compara|production|ontology)_/ ) {
475 if ( defined($opt_type) ) {
476 if ( $schema_type eq $opt_type ) { $schema_type_ok = 1 }
477 else { $schema_type_ok = 0 }
479 else { $schema_type_ok = 1 }
481 elsif ( !$opt_quiet ) {
482 warn( sprintf(
"Can not determine schema type from '%s'\n",
486 if ( !defined($species) ) {
487 if ($database =~ /compara_([a-z][a-z_]+[a-z])?_\d+_\d+/ or # EG
case, e.g. ensembl_compara_fungi_18_71
488 $database =~ /ensembl[a-z]?_(?:compara|production|ontology)_/ or
489 $database =~ /_test_db_([a-z_]+)_([a-z])_/ or
490 $database =~ /([a-z][a-z_]+[a-z])_(?:core|funcgen|gene2phenotype|variation)_/)
493 $species =
'multi' unless defined $species;
495 if ( defined($opt_species) ) {
496 if ( $species eq $opt_species ) { $species_ok = 1 }
498 else { $species_ok = 1 }
500 elsif ( $opt_species && !$opt_quiet ) {
501 warn( sprintf(
"Can not determine species from '%s'\n", $database ) );
505 #Quick check if fix-last is active. If so we will hard-code some values
506 $opt_fix = 1
if $opt_fixlast and defined $schema_version;
508 if ( $schema_version_ok &&
510 ( !defined($opt_species) ||
511 ( defined($opt_species) && $species_ok ) ) &&
512 ( ( !$opt_fix && $schema_version < $opt_release ) ||
513 ( $opt_fix && $schema_version <= $opt_release ) ) )
515 $found_databases = 1;
516 print(
'-' x ( $ENV{COLUMNS} || 80 ),
"\n" );
517 printf(
"Considering '%s' [%s,%s,%d]\n",
518 $database, defined($species) ? $species :
'unknown',
519 $schema_type, $schema_version );
521 $opt_oldest = ($schema_version == $latest_release) ? $latest_release : $latest_release - 1;
523 if ($schema_version < $opt_oldest) {
524 printf(
"Cannot use --fixlast with a schema release too far from the latest release; oldest allowed is $opt_oldest. Skipping $database");
527 printf(
"--fixlast is active. Will apply patches for version %d and up (if available)\n", $opt_oldest);
533 printf(
"Skipping database %s (type: %s | version: %d)\n", $database, ($schema_type||
'-'), ($schema_version || 0));
534 if( $schema_type_ok && $schema_version_ok && $schema_version == $opt_release) {
535 if (defined($opt_type) && $opt_type eq $schema_type) {
537 my $release_patches = join(q{, }, sort
map { $_->{patch} } @{$patches{$schema_type}{$schema_version}});
538 my $db_patches = join(q{, }, sort keys %{$dbpatches{$schema_version}});
540 if($release_patches ne $db_patches) {
541 printf(
"\t%s patches [%s] are not the same as release %i patches [%s]; rerun with --fix and --dryrun\n",
542 $database, $db_patches, $opt_release, $release_patches);
546 if($schema_type_ok && ! exists $patches{$schema_type}) {
547 printf(
"\t%s patches could not be found. Check your --gitdir option and try again\n", $schema_type);
553 # Now figure out what patches we need to apply to this database.
558 $start_version = $opt_oldest || ( sort { $a <=> $b } keys %dbpatches )[0];
559 if ( !defined($start_version) ) {
560 warn( sprintf(
"No patches in database, " .
561 "beginning fix from release %d\n",
563 $start_version = $schema_version;
566 printf(
"Earliest patch in database '%s' is from release %d\n",
567 $database, $start_version );
570 else { $start_version = $schema_version + 1 }
573 my $schema_version_warning = 0;
575 for ( my $r = $start_version; $r <= $opt_release; ++$r ) {
576 next unless exists $patches{$schema_type}{$r};
577 foreach my $entry ( sort { $a->{
'patch'} cmp $b->{
'patch'} }
578 @{ $patches{$schema_type}{$r} } )
580 my $patch = $entry->{
'patch'};
581 my $path = $entry->{
'path'};
583 if ( exists( $dbpatches{$r}{$patch} ) ) {
585 printf(
"Patch '%s' (%s) already applied\n",
586 $patch, $schema_type );
590 if ( !$opt_dryrun ) {
591 printf(
"Will apply patch '%s' (%s)\n", $patch,
593 push( @apply_these, $entry );
595 if ( $r < $opt_release && $patch =~ /a\.sql$/ ) {
596 # Warn about possible setting schema_version with an 'a'
598 $schema_version_warning = 1;
602 printf(
"Would apply patch '%s' (%s)\n",
603 $patch, $schema_type );
606 } ## end
foreach my $entry ( sort { ...})
608 } ## end
for ( my $r = $start_version...)
610 if ( $opt_dryrun || !@apply_these ) { print(
"\n"); next }
614 if($opt_interactive) {
615 print(
"Proceed with applying these patches? (y/N): ");
618 $apply_patches = (lc($yesno) =~ /^y(?:es)?$/) ? 1 : 0;
622 print
"Enterning non-interative mode. Will apply patches\n";
625 if ( $apply_patches ) {
627 foreach my $entry (@apply_these) {
628 my $patch = $entry->{
'patch'};
629 my $path = $entry->{
'path'};
631 my @cmd_list = ( $opt_mysql,
634 push(@cmd_list,
"--password=$opt_pass") if $opt_pass;
635 push(@cmd_list, "--port=$opt_port",
636 "--database=$database",
638 "--execute=source $path" );
640 printf( "Executing the following command:\n%s\n",
641 join( ' ', @cmd_list ) );
643 if ( system(@cmd_list) ) {
644 warn( sprintf(
"Failed to apply patch '%s' to database '%s'!\n",
645 $patch, $database ) );
647 if(!$opt_interactive) {
648 warn(
'In non-interative mode; aborting current run');
651 print(
"Next patch, next database, or abort? (p/d/A): ");
653 my $response = <STDIN>;
656 if ( lc($response) =~ /^p(?:atch)?$/ ) { next PATCH }
657 elsif ( lc($response) =~ /^d(?:atabase)?$/ ) { next DATABASE }
660 } ## end
foreach my $entry (@apply_these)
662 if ( !$opt_quiet && $schema_version_warning ) {
663 warn(
"Applied one or several 'a' patches, " .
664 "schema_version might have been updated\n" );
667 } ## end
if ( lc($yesno) =~ /^y(?:es)?$/)
671 } ## end
while ( $sth->fetch() )
673 if(!$found_databases) {
674 printf((
'-'x80).
"\n");
675 printf(
"No databases considered. Check your --database/--type/--release flags\n");
676 printf((
'-'x80).
"\n");
682 my ($git_repo, $schema_type, $schema_file) = @_;
685 $git_dir = $opt_gitdir;
688 my ($volume, $directories, $file) = splitpath(__FILE__);
689 $directories = curdir() unless $directories;
690 $git_dir = catdir($directories, updir(), updir());
692 my $sql_dir = rel2abs(canonpath( catdir( $git_dir, $git_repo, 'sql' ) ));
693 my $schema_location = catfile($sql_dir, $schema_file);
694 if(! -f $schema_location) {
696 printf(
"Could not find the schema file '%s' for E! module %s", $schema_location, $git_repo);
697 printf(
"\tTry using --gitdir if your checkouts are in a non-standard location\n") if $opt_gitdir;
701 printf("Using '%s' as our SQL directory\n", $sql_dir) if ! $opt_quiet;