ensembl-hive  2.8.1
load_multiple_databases.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 use strict;
18 use warnings;
19 
20 use Getopt::Long;
22 my $reg = "Bio::EnsEMBL::Registry";
23 
24 # download and import the database
25 my $root;
26 my $prefix="";
27 my $release;
28 my $specieslist;
29 my $grouplist;
30 my $user;
31 my $pass;
32 my $port=3306;
33 my $host;
34 my $cleanup = undef;
35 my $force = undef; # if set ignore checksum dies just wrtie warnings.
36 my $mysqltempdir = undef;
37 my $quiet = 0;
38 my $run = undef;
39 
40 GetOptions ('root=s' => \$root,
41  'prefix=s' => \$prefix,
42  'release=s' => \$release,
43  'species=s' => \$specieslist,
44  'groups=s' => \$grouplist,
45  'host=s' => \$host,
46  'force' => \$force,
47  'cleanup' => \$cleanup,
48  'port=s' => \$port,
49  'user=s' => \$user,
50  'pass=s' => \$pass,
51  'mysqltempdir=s' => \$mysqltempdir,
52  'quiet' => \$quiet,
53  'run' => \$run,
54  'help' => sub { usage(); exit(0);}
55  );
56 
57 
58 
59 my @names;
60 if(defined($specieslist)){
61  @names = split(",",$specieslist);
62 }
63 else{
64  usage();
65  die "No species set?\n";
66 }
67 
68 my @types;
69 if(defined($grouplist)){
70  @types = split(",",$grouplist);
71 }
72 else{
73  usage();
74  die "No groups set?\n";
75 }
76 
77 my $db_version = undef;
78 
79 #
80 #connect to latest databases to get species name
81 #
82 
83 $reg->no_version_check(1);
84 $reg->load_registry_from_db(
85  -host => "ensembldb.ensembl.org",
86  -user => "anonymous",
87  -db_version => 59, # comment out later.
88  );
89 
90 my @species;
91 foreach my $sp (@names){
92  my $adap = $reg->get_adaptor($sp, "core", "slice");
93  if(defined($adap)){
94  my $name = $adap->dbc->dbname;
95 # print $name."\n";
96  if(defined($name)){
97  if($name =~ /(\S+_\S+)_core/){
98  push @species, $1;
99 # print "sp is $1\n";
100  }
101  }
102  }
103  else{
104  print "Could not find species $sp so ignoring\n";
105  }
106 }
107 
108 if(defined($release)){
109  if($release =~ /^\d+$/){
110  $db_version = $release;
111  }
112  else{
113  die "release must be an integer\n";
114  }
115 }
116 else{
117  $release = $reg->software_version();
118 }
119 
120 my $sqlport = 5306;
121 if($release < 47){
122  $sqlport = 3306;
123 }
124 
125 
126 my @database_list;
127 
128 my $sqltemplate = 'mysql -hensembldb.ensembl.org -uanonymous -PPORT --skip-column-names -e\'show databases like "SPECIES%TYPE%RELEASE%"\'';
129 
130 $sqltemplate =~ s/PORT/$sqlport/;
131 #print $sqltemplate."\n";
132 
133 foreach my $sp (@species){
134 # print $sp."\n";
135  foreach my $ty (@types){
136 # print "\t$ty\n";
137  my $sql = $sqltemplate;
138  $sql =~ s/SPECIES/$sp/;
139  $sql =~ s/RELEASE/$release/;
140  if($ty eq "all"){
141  $sql =~ s/TYPE//;
142  }
143  else{
144  $ty .= "\\_";
145  $sql =~ s/TYPE/$ty/;
146  }
147 # print $sql."\n";
148  my $line = `$sql`;
149  my @vals = split(/\n/,$line);
150  foreach my $db (@vals){
151 # print "\t".$db."\n";
152  push @database_list, $db;
153  }
154  }
155 }
156 if(!defined($host) or !defined $user){
157  usage();
158  die " No host or user\n";
159 }
160 
161 #
162 # check mysql instance data to be copoed to.
163 #
164 my $com = "mysql -h$host -u$user -P$port ";
165 if(defined($pass)){
166  $com .= "-p$pass ";
167 }
168 $com .= "-e'show databases like \"justatest\"' ";
169 #print $com."\n";
170 
171 my $line = `$com`;
172 if($?){
173  print $com." fails\n";
174  die "$line";
175 }
176 if($line =~ /ERROR/){
177  die "problem with mysql information\n$line\n";
178 }
179 
180 
181 use FindBin '$Bin';
182 my $com_init = "perl ".$Bin."/load_database_from_ftp_site.pl -host $host -user $user ";
183 if(defined($force)){
184  $com_init .= "-force ";
185 }
186 if(defined($cleanup)){
187  $com_init .= "-cleanup ";
188 }
189 if(defined($pass)){
190  $com_init .= "-pass $pass ";
191 }
192 if(defined($root)){
193  $com_init .= "-root $root ";
194 }
195 if(defined($mysqltempdir)){
196  $com_init .= "-mysqltempdir $mysqltempdir ";
197 }
198 if(defined($quiet)){
199  $com_init .= "-quiet ";
200 }
201 
202 my $okay="";
203 my $prob ="";
204 
205 foreach my $db (@database_list){
206  my $com = "mysql -h$host -u$user -P$port ";
207  if(defined($pass)){
208  $com .= "-p$pass ";
209  }
210  $com .= "-e'show databases like \"$prefix$db\"'";
211 
212 # print $db."\n";
213  $line = `$com`;
214 # print $line;
215  if($line =~ /$db/ and !defined($force)){
216  $prob .= "\t$prefix$db\n";
217  next;
218  }
219  elsif(defined($run)){
220  my $cmd = $com_init."-database $db -new_database $prefix$db ";
221 
222  print STDERR "Copying $db to $host as $prefix$db\n";
223  my $output = `$cmd`;
224  open(OUT,">$db.OUTPUT");
225  print OUT $line;
226  close OUT;
227  }
228  else{
229  $okay .= "\t$db to $host $prefix$db\n";
230  }
231 
232 }
233 
234 
235 
236 if(!defined($run)){
237  if(length($prob) > 1){
238  print "Problem with the following databases as they already exist on $host\n";
239  print $prob;
240  }
241  if(length($okay) > 1){
242  print "The following would be copied:-\n";
243  print $okay;
244  }
245  print "\nYou need to set the flag -run to actually do the data copy\n";
246  print "By default it is not done so that this list can be checked first\n";
247 }
248 else{
249  if(length($prob) > 1){
250  print "Problem with the following databases as they already exist on $host so not copied\n";
251  print $prob;
252  }
253 }
254 
255 sub usage{
256 print << "EOH";
257 It uses the Registry from the core API to get the species name to pass on to the script
258 load_database_from_ftp.pl.
259 
260  load_multiple_databases.pl -root {root} -prefix {prefix} -release {number}
261  -species {s1,s2,s3} -groups {type1,type2} -force -cleanup -quiet -help
262  -host {host} -port {port} -user {user} -pass {password}
263  -mysqltempdir {dir} -list
264 
265  -root Root directory for ftp files
266 
267  -prefix Database name to get data for
268 
269  -release Release version of the dtaabase to get
270 
271  -species Comma separated list of species to get
272 
273  -groups Comma separated list of database types to get
274  ( from core,variation,funcgen,otherfeatures,vega etc or all)
275 
276  -user User name to access database. Must allow writing.
277 
278  -pass Password for user.
279 
280  -host Database host.
281 
282  -port Database port.
283 
284  -force import data even if the checksums do not match
285  or the new database already exists.
286 
287  -cleanup remove the downloaded files at the end
288 
289  -quiet No output except for serous error message
290 
291  -mysqltmpdir Mysql may not have enough tmp space so this can be set to another directory
292 
293  -run If set will start the download etc else it will just list the databases.
294  NOTE: Not default as this script does alot so we want to make sure everything
295  is correct first before starting.
296 
297  -help print this help text
298 
299 
300 
301 examples:-
302 
303 1) perl load_multiple_databases.pl -release 54 -groups core -species human -host mysqlhostname
304  -user mysqluser -pass mysqlpassword -force -run -prefix "copy_"
305 
306 This will download the ftp files for the 54 release of the human core database and create a database
307 called copy_homo_sapiens_core_59_36p.
308 
309 
310 2) perl load_multiple_databases.pl -release 59 -species mouse -groups all -run
311  -host mysqlhostname -user mysqluser -pass mysqlpassword -quiet -cleanup -mysqltmpdir /scratch/
312 
313 Will load the mouse databases for release 59 into the mysql instance on mysqlhostname and use the directory
314 /scratch/ to use as the tmp directory for mysql.
315 This will load the databases:-
316  mus_musculus_cdna_59_37l
317  mus_musculus_core_59_37l
318  mus_musculus_funcgen_59_37l
319  mus_musculus_otherfeatures_59_37l
320  mus_musculus_variation_59_37l
321  mus_musculus_vega_59_37l
322 
323 EOH
324 
325 }
usage
public usage()
cleanup
public cleanup()
Bio::EnsEMBL::Registry
Definition: Registry.pm:113
run
public run()