2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
22 my $reg =
"Bio::EnsEMBL::Registry";
24 # download and import the database
35 my $force = undef; #
if set ignore checksum dies just wrtie warnings.
36 my $mysqltempdir = undef;
40 GetOptions (
'root=s' => \$root,
41 'prefix=s' => \$prefix,
42 'release=s' => \$release,
43 'species=s' => \$specieslist,
44 'groups=s' => \$grouplist,
47 'cleanup' => \$cleanup,
51 'mysqltempdir=s' => \$mysqltempdir,
54 'help' => sub {
usage(); exit(0);}
60 if(defined($specieslist)){
61 @names = split(
",",$specieslist);
65 die
"No species set?\n";
69 if(defined($grouplist)){
70 @types = split(
",",$grouplist);
74 die
"No groups set?\n";
77 my $db_version = undef;
80 #connect to latest databases to get species name
83 $reg->no_version_check(1);
84 $reg->load_registry_from_db(
85 -host =>
"ensembldb.ensembl.org",
87 -db_version => 59, # comment out later.
91 foreach my $sp (@names){
92 my $adap = $reg->get_adaptor($sp,
"core",
"slice");
94 my $name = $adap->dbc->dbname;
97 if($name =~ /(\S+_\S+)_core/){
104 print
"Could not find species $sp so ignoring\n";
108 if(defined($release)){
109 if($release =~ /^\d+$/){
110 $db_version = $release;
113 die
"release must be an integer\n";
117 $release = $reg->software_version();
128 my $sqltemplate =
'mysql -hensembldb.ensembl.org -uanonymous -PPORT --skip-column-names -e\'show databases like "SPECIES%TYPE%RELEASE%"\'';
130 $sqltemplate =~ s/PORT/$sqlport/;
131 #print $sqltemplate."\n";
133 foreach my $sp (@species){
135 foreach my $ty (@types){
137 my $sql = $sqltemplate;
138 $sql =~ s/SPECIES/$sp/;
139 $sql =~ s/RELEASE/$release/;
149 my @vals = split(/\n/,$line);
150 foreach my $db (@vals){
151 # print "\t".$db."\n";
152 push @database_list, $db;
156 if(!defined($host) or !defined $user){
158 die
" No host or user\n";
162 # check mysql instance data to be copoed to.
164 my $com =
"mysql -h$host -u$user -P$port ";
168 $com .=
"-e'show databases like \"justatest\"' ";
173 print $com.
" fails\n";
176 if($line =~ /ERROR/){
177 die
"problem with mysql information\n$line\n";
182 my $com_init =
"perl ".$Bin.
"/load_database_from_ftp_site.pl -host $host -user $user ";
184 $com_init .=
"-force ";
186 if(defined($cleanup)){
187 $com_init .=
"-cleanup ";
190 $com_init .=
"-pass $pass ";
193 $com_init .=
"-root $root ";
195 if(defined($mysqltempdir)){
196 $com_init .=
"-mysqltempdir $mysqltempdir ";
199 $com_init .=
"-quiet ";
205 foreach my $db (@database_list){
206 my $com =
"mysql -h$host -u$user -P$port ";
210 $com .=
"-e'show databases like \"$prefix$db\"'";
215 if($line =~ /$db/ and !defined($force)){
216 $prob .=
"\t$prefix$db\n";
219 elsif(defined($run)){
220 my $cmd = $com_init.
"-database $db -new_database $prefix$db ";
222 print STDERR
"Copying $db to $host as $prefix$db\n";
224 open(OUT,
">$db.OUTPUT");
229 $okay .=
"\t$db to $host $prefix$db\n";
237 if(length($prob) > 1){
238 print
"Problem with the following databases as they already exist on $host\n";
241 if(length($okay) > 1){
242 print
"The following would be copied:-\n";
245 print
"\nYou need to set the flag -run to actually do the data copy\n";
246 print
"By default it is not done so that this list can be checked first\n";
249 if(length($prob) > 1){
250 print
"Problem with the following databases as they already exist on $host so not copied\n";
257 It uses the Registry from the core API to get the species name to pass on to the script
258 load_database_from_ftp.pl.
260 load_multiple_databases.pl -root {root} -prefix {prefix} -release {number}
261 -species {s1,s2,s3} -groups {type1,type2} -force -
cleanup -quiet -help
262 -host {host} -port {port} -user {user} -pass {password}
263 -mysqltempdir {dir} -list
265 -root Root directory
for ftp files
267 -prefix Database name to get data
for
269 -release Release version of the dtaabase to get
271 -species Comma separated list of species to get
273 -groups Comma separated list of database types to get
274 ( from core,variation,funcgen,otherfeatures,vega etc or all)
276 -user User name to access database. Must allow writing.
278 -pass Password
for user.
284 -force
import data even
if the checksums
do not match
285 or the
new database already exists.
287 -
cleanup remove the downloaded files at the end
289 -quiet No output except
for serous error message
291 -mysqltmpdir Mysql may not have enough tmp space so
this can be set to another directory
293 -
run If set will start the download etc
else it will just list the databases.
294 NOTE: Not
default as
this script does alot so we want to make sure everything
295 is correct first before starting.
297 -help print
this help text
303 1) perl load_multiple_databases.pl -release 54 -groups core -species human -host mysqlhostname
304 -user mysqluser -pass mysqlpassword -force -
run -prefix
"copy_"
306 This will download the ftp files
for the 54 release of the human core database and create a database
307 called copy_homo_sapiens_core_59_36p.
310 2) perl load_multiple_databases.pl -release 59 -species mouse -groups all -
run
311 -host mysqlhostname -user mysqluser -pass mysqlpassword -quiet -
cleanup -mysqltmpdir /scratch/
313 Will load the mouse databases
for release 59 into the mysql instance on mysqlhostname and use the directory
314 /scratch/ to use as the tmp directory
for mysql.
315 This will load the databases:-
316 mus_musculus_cdna_59_37l
317 mus_musculus_core_59_37l
318 mus_musculus_funcgen_59_37l
319 mus_musculus_otherfeatures_59_37l
320 mus_musculus_variation_59_37l
321 mus_musculus_vega_59_37l