ensembl-hive  2.7.0
load_database_from_ftp_site.pl
Go to the documentation of this file.
1 #!/usr/bin/perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 use strict;
18 use warnings;
19 
20 use Getopt::Long;
21 
22 # download and import the database
23 my $database;
24 my $root;
25 my $new_database;
26 my $user;
27 my $pass;
28 my $port;
29 my $host;
30 my $cleanup = undef;
31 my $force = undef; # if set ignore checksum dies just wrtie warnings.
32 my $mysqltmpdir = undef;
33 my $quiet = 0;
34 
35 GetOptions ('root=s' => \$root,
36  'database=s' => \$database,
37  'new_database=s' => \$new_database,
38  'host=s' => \$host,
39  'force' => \$force,
40  'cleanup' => \$cleanup,
41  'port=s' => \$port,
42  'user=s' => \$user,
43  'pass=s' => \$pass,
44  'mysqltempdir=s' => \$mysqltmpdir,
45  'quiet' => \$quiet,
46  'help' => sub { usage(); exit(0);}
47  );
48 
49 
50 if(defined($database)){
51  if(!defined($root)){
52  #query database to try and guess root;
53  $database =~ /\S+_\S+_\S+_(\d+)_/;
54  my $release = $1;
55  if(defined($release)){
56  $root = "//ftp.ensembl.org/ensembl/pub/release-".$release."/mysql";
57  print "Using $root as the root obtained from the database name\n" unless $quiet;
58  }
59  else{
60  die "No root given i.e. ftp.ensembl.org/pub/release-54/mysql and could not guess from the database name $database";
61  }
62  }
63 }
64 
65 if(!defined($root)){
66  die "No root given i.e. ftp.ensembl.org/pub/release-54/mysql and no database name given to try and guess root from";
67 }
68 
69 if(!defined($new_database)){
70  $new_database = $ENV{"USER"}."_".$database;
71  print "will create new database $new_database\n" unless $quiet;
72 }
73 
74 if(!defined $user or !defined $pass or !defined $host){
75  die "Need user, password and host for mysql instance to create new database on\n";
76 }
77 
78 
79 my $mysql_options = "-h$host -u$user -p$pass";
80 if(defined($port)){
81  $mysql_options .= " -P$port";
82 }
83 
84 print "rsync --recursive rsync:$root/$database .\n" unless $quiet;
85 my $line;
86 #goto SKIP;
87 if($quiet){
88  $line = `rsync --recursive --verbose rsync:$root/$database .`;
89 }
90 else{
91  $line = `rsync --recursive --quiet rsync:$root/$database .`;
92 }
93 
94 print $line unless $quiet;
95 #SKIP:
96 
97 #if it does snot exist then so be it just ignore error code
98 #my $com = "mysql $mysql_options -e'drop database ".$new_database."'";
99 #$line = `$com`;
100 # no need to check here as if the databae does not exist it should get an error
101 # just done to delete if it exists already
102 
103 
104 ##
105 ## generate error to test
106 ##
107 #$mysql_options =~ s/-uensadmin/-uensro/g;
108 
109 my $com = "mysql $mysql_options -e'create database $new_database'";
110 $line = `$com`;
111 if($? or $line =~ /Error/ or $line =~ /ERROR/){
112  print $line;
113  die "Error during mysql\n";
114 }
115 else{
116  print "Created new database $new_database on host $host\n" unless $quiet;
117 }
118 
119 
120 $mysql_options .= " $new_database";
121 
122 
123 #get the database schema and load it.
124 print "now creating the schema\n" unless $quiet;
125 system("gunzip -f $database/$database.sql.gz");
126 system("mysql $mysql_options < $database/$database.sql");
127 system("gzip $database/$database.sql");
128 
129 system("gunzip -f $database/CHECKSUMS.gz");
130 print "now parse the checksum\n" unless $quiet;
131 
132 if(defined($mysqltmpdir)){
133  $mysql_options = " --tmpdir $mysqltmpdir ".$mysql_options;
134 }
135 
136 open(CHK,"<$database/CHECKSUMS") or die "COuld not open CHECKSUMS for reading???\n";
137 while (<CHK>){
138  chomp;
139  my ($e1, $e2, $file) = split;
140  my $table;
141  my $index = "";
142  if($file =~ /(\S+)(.*\d*).txt.gz/){
143  $table = $1;
144  $index = $2;
145  }
146  else{
147  print "ignore $file\n" unless $quiet;
148  next;
149  }
150  if(!-e "$database/$file"){
151  print STDERR "$database/$file does not exist. It is specified in the CHECKSUM file but cannot be found?";
152  cleanup(1)
153  }
154  $com = "sum $database/$file";
155  $line = `$com`;
156  if($?){
157  print STDERR "$com failed\n";
158  print STDERR "with output:".$line."\n";
159  print STDERR "and error code $?\n";
160  print STDERR "Ending as no checksum could be obtained";
161  cleanup(1);
162  }
163  my ($s1, $s2, @junk) = split (/\s+/,$line);
164  if($s1 != $e1 or $s2 != $e2){
165  print STDERR "Warning: checksums do not match for file $database/$file\n" unless $quiet;
166  print STDERR " from checksum we have $e1 and $e2\n" unless $quiet;
167  print STDERR " but from sum we have $s1 and $s2\n" unless $quiet;
168  if(defined($force)){
169  print " Force set so carrying on\n" unless $quiet;
170  }
171  else{
172  print STDERR "Checksums do not match which can be a problem.\n";
173  print STDERR "But the CHECKSUM file can sometimes be wrong as the database may have been\n";
174  print STDERR "updated without the CHECKSUM file being updated\n";
175  print STDERR "To continue with just warning use the -force flag in the options\n";
176  cleanup(1);
177  }
178  }
179 
180  system("gunzip -f $database/$file");
181 
182  my $str= "mysqlimport --fields_escaped_by=\\\\ $mysql_options ".$ENV{"PWD"}."/$database/$table$index.txt";
183  print "$str\n" unless $quiet;
184  $line = `$str`;
185  if($line =~ /Error/ or $?){
186  print STDERR $line;
187  print STDERR "error code $?\n";
188  print STDERR "Error during mysqlimport\n";
189  cleanup(1);
190  }
191  print $line unless $quiet;
192  system("gzip $database/$table$index.txt");
193  print "\n\n" unless $quiet;
194 
195 }
196 close CHK;
197 
198 cleanup();
199 
200 
201 
202 
203 sub cleanup{
204  my $died = shift;
205  if(defined($died) and $died){
206  system("gzip $database/CHECKSUMS");
207  exit 1;
208  }
209  if(defined($cleanup)){
210  system("rm -Rf $database");
211  }
212  exit 0;
213 }
214 
215 
216 sub usage{
217 print << "EOH";
218 This perl script will download (rsync) the necesary ftp files and load them into a new local
219 ensembl mysql database. It will check that the checksums match and do all the zipping and
220 unzipping of the files.
221 
222 
223  load_database_from_ftp.pl -root {root} -database {database} -new_database {database2}
224  -force -cleanup -quiet -help
225  -host {host} -port {port} -user {user} -pass {password}
226  -mysqltempdir {dir}
227 
228  -root Root directory for ftp files
229 
230  -database Database name to get data for
231 
232  -new_database Name of the new database
233 
234  -user User name to access database. Must allow writing.
235 
236  -pass Password for user.
237 
238  -host Database host.
239 
240  -port Database port.
241 
242  -force import data even if the checksums do not match
243 
244  -cleanup remove the downloaded files at the end
245 
246  -quiet No output except for serous error message
247 
248  -mysqltmpdir Mysql may not have enough tmp space so this can be set to another directory
249 
250  -help print this help text
251 
252 
253 
254 examples:-
255 
256 1) perl load_database_from_ftp_site.pl -database homo_sapiens_core_54_36p -host mysqlhostname
257  -user mysqluser -pass mysqlpassword -force
258 
259 This will download the ftp files for the 54 release of the human core database and create a database
260 called <userid>_homo_sapiens_core_59_36p where userid is the login name of the user. To choose you
261 own database name use the -new_database option.
262 
263 
264 2) load_database_from_ftp_site.pl -databases homo_sapiens_core_57_37d -new_database homo_sapiens_core_59_37d
265  -host mysqlhostname -user mysqluser -pass mysqlpassword -quiet -cleanup -mysqltmpdir /scratch/
266 
267 Will load the human core database into the mysql instance on mysqlhostname and use the directory
268 /scratch/ to use as the tmp directory for mysql.
269 
270 EOH
271 
272 }
usage
public usage()
cleanup
public cleanup()