ensembl-hive  2.7.0
fingerprint_db.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 use strict;
18 use warnings;
19 
20 
21 use DBI;
22 use Getopt::Long;
23 
24 my ($host, $port, $dbname, $user, $pass, @tables, $limit);
25 
26 GetOptions('user=s' => \$user,
27  'pass=s' => \$pass,
28  'host=s' => \$host,
29  'port=i' => \$port,
30  'dbname=s' => \$dbname,
31  'tables=s' => \@tables,
32  'limit=s' => \$limit,
33  'help' => sub { usage(); exit(0); });
34 
35 @tables = split(/,/,join(',',@tables));
36 
37 if (!$user || !$host || !$dbname) {
38 
39  usage();
40  exit(1);
41 
42 }
43 
44 my $dbi = DBI->connect( "DBI:mysql:host=$host:port=$port;database=$dbname", $user, $pass,
45  {'RaiseError' => 1}) || die "Can't connect to database\n";
46 
47 # use all if not specified
48 if (!@tables) {
49  my $sth = $dbi->prepare("SHOW TABLES;");
50  $sth->execute();
51  foreach my $row (@{$sth->fetchall_arrayref()}) {
52  push @tables, $row->[0];
53  }
54 }
55 
56 print "Table\tDescription checksum\tData checksum\n";
57 
58 foreach my $table (@tables) {
59 
60  print $table . "\t";
61 
62  my $passopt = $pass ? "-p$pass" : "";
63 
64  my $key_sth = $dbi->prepare("SHOW INDEX FROM $table");
65  $key_sth->execute();
66  my ($primary_key, $first_col);
67  while (my @cols = $key_sth->fetchrow()) {
68  $first_col = $cols[4];
69  if ($cols[2] eq "PRIMARY") {
70  $primary_key = $cols[4];
71  last;
72  }
73  }
74 
75  my $key = $primary_key;;
76  if (!$key) {
77  # print "Can't get primary key for table $table, using $first_col as key\n";
78  $key = $first_col;
79  }
80 
81  # description - note use of tr -d to strip newline
82  system ("mysql -u $user -h $host $passopt -P $port -e \'DESC $table\' $dbname | md5sum | tr -d '\n'");
83 
84  print "\t";
85 
86  # data
87  my $limitclause = $limit ? " LIMIT $limit" : "";
88  my $cmd = "\'SELECT * FROM $table ORDER BY $key $limitclause;desc $table;\'";
89  system ("mysql -u $user -h $host $passopt -P $port -e $cmd $dbname | md5sum");
90 
91 }
92 
93 
94 
95 sub usage {
96 
97  print << "EOF";
98 
99  fingerprint_db.pl -user {user} -pass {password} -host {host} -port {port} -dbname {database} -tables {tables} -limit {rows}
100 
101  Generates an MD5 checksum for the description and data of all or some tables of a database.
102 
103  Argument to tables may be comma-separated, or multiple -tables arguments may be used.
104  If no -tables argument is given all tables are analysed.
105 
106  Use -limit to limit the number of rows the comparison is based on; less accurate but faster for large tables.
107 
108  A single fingerprint for the whole database can be computed by piping the output of this script into md5sum.
109 
110 EOF
111 
112 }
usage
public usage()