my ( $self, @args ) = @_;
my ( $host, $port, $user,
$pass, $verbose, $db_version,
$wait_timeout, $no_cache, $species, $species_suffix, $db_prefix )
= rearrange( [ 'HOST', 'PORT',
'USER', 'PASS',
'VERBOSE', 'DB_VERSION',
'WAIT_TIMEOUT', 'NO_CACHE',
'SPECIES', 'SPECIES_SUFFIX', 'DB_PREFIX' ],
@args );
my $ignore_multi = 0;
if ( defined($species) ) {
$species = lc($species);
$species =~ tr/ -/__/;
$ignore_multi = 1;
}
if (!defined($species_suffix)) {
$species_suffix = "";
}
if (defined($db_prefix)) {
$db_prefix = $db_prefix . '_';
} else {
$db_prefix = '';
}
if(! defined $db_version) {
# Do checking for the -DB_VERSION flag which can be mis-spelt. Regex assembled using:
# perl -MRegexp::Assemble -e '$r=Regexp::Assemble->new(); $r->add($_) for ("-dbversion","-version","-verion","-verison"); print $r->re, "\n";'
my %hashed_args = @args;
my ($possible_key) = grep { $_ =~ /(?-xism:-(?:ver(?:is?|si)|dbversi)on)/xism } keys %hashed_args;
if($possible_key) {
my $msg = sprintf(q{Detected no -DB_VERSION flag but found '%s'; assuming a mis-spelling. Please fix}, $possible_key);
warning($msg);
$db_version = $hashed_args{$possible_key};
}
}
my $ontology_db;
my $ontology_version;
my $taxonomy_db;
my $taxonomy_db_versioned;
my $ensembl_metadata_db;
my $ensembl_metadata_db_versioned;
my $production_dba_ok =
eval { require Bio::EnsEMBL::Production::DBSQL::DBAdaptor; 1 };
my $production_db;
my $production_version;
my $stable_ids_db;
my $stable_ids_version;
$user ||= "anonymous";
if ( !defined($port) ) {
$port = 3306;
if ( $host eq "ensembldb.ensembl.org" && defined($db_version) && $db_version < 48 ) {
$port = 4306;
}
}
$wait_timeout ||= 0;
my $original_count = $self->get_DBAdaptor_count();
my $err_pattern = 'Cannot %s to the Ensembl MySQL server at %s:%d; check your settings & DBI error message: %s';
my $dbh = DBI->connect( "DBI:mysql:host=$host;port=$port", $user, $pass ) or
throw(sprintf($err_pattern, 'connect', $host, $port, $DBI::errstr));
$dbh->ping() or
throw(sprintf($err_pattern, 'ping', $host, $port, $DBI::errstr));
my $res = $dbh->selectall_arrayref('SHOW DATABASES');
my @dbnames =
map { $_->[0] } @$res;
my %temp;
my $software_version = software_version();
if ( defined($db_version) ) {
$software_version = $db_version;
}
if ($verbose) {
printf( "Will only load v%d databases\n", $software_version );
}
# From the list of all the databses create a tempory hash of those we
# are interested in
for my $db (@dbnames) {
if ( $db =~ /^(\w+_collection_\w+(?:_\d+)?)_((\d+)_\w+)/ )
{ # NEEDS TO BE FIRST TO PICK UP COLLECTION DBS
if ( $3 eq $software_version ) {
$temp{$1} = $2;
}
} elsif ( $db =~ /^(.+)_(userdata)$/x ) {
$temp{$1} = $2;
} elsif (
$db =~ /^(ensembl_compara # compara database
(?:_\w+)*?) # optional ensembl genomes bit
_
(\d+)$/x )
{ # db version
if ( $2 eq $software_version ) {
$temp{$1} = $2;
}
} elsif ( $db =~ /^(ensembl_ancestral(?:_\w+?)*?)_(\d+)$/x ) {
if ( $2 eq $software_version ) {
$temp{$1} = $2;
}
} elsif ( $db =~ /^ensembl(?:genomes)?_ontology_(?:\d+_)?(\d+)/x ) {
if ( $1 eq $software_version ) {
$ontology_db = $db;
$ontology_version = $1;
}
} elsif ( $db =~ /^ncbi_taxonomy$/ ) {
$taxonomy_db = $db;
}
elsif ( $db =~ m{ \A ncbi_taxonomy_(\d+) \z }msx ) {
if ( $1 eq $software_version ) {
$taxonomy_db_versioned = $db;
}
} elsif ( $db =~ /^ensembl_metadata$/ ) {
$ensembl_metadata_db = $db;
}
elsif ( $db =~ m{ \A ensembl_metadata_(\d+) \z }msx ) {
if ( $1 eq $software_version ) {
$ensembl_metadata_db_versioned = $db;
}
} elsif ( $production_dba_ok and $db =~ /^ensembl(?:genomes)?_production(_\d+)?/x ) {
# production db can come with no version (i.e. that on ens-staging1),
# but it's backed up with a release number
my $version = $1;
if ($version) {
$version =~ s/_
if ($software_version and $version eq $software_version) {
$production_db = $db;
$production_version = $version;
}
} else { # this is the default choice
$production_db = $db if $db =~ /^ensembl(?:genomes)?_production$/;
}
} elsif ( $db =~ /^ensembl(?:genomes)?_stable_ids_(?:\d+_)?(\d+)/x ) {
if ( $1 eq $software_version ) {
$stable_ids_db = $db;
$stable_ids_version = $1;
}
} elsif (
$db =~ /^(?:$db_prefix)([a-z]+_[a-z0-9]+(?:_[a-z0-9]+)? # species name e.g. homo_sapiens or canis_lupus_familiaris
_
[a-z]+ # db type
(?:_\d+)?) # optional end bit for ensembl genomes databases
_
(\d+) # database release
_
(\w+)$ # assembly number can have letters too e.g 37c
/x
)
{
# Species specific databases (core, cdna, vega etc.)
my ( $sp_name, $db_rel, $assem ) = ( $1, $2, $3 );
if ($db_prefix) { $sp_name = $db_prefix . $sp_name; }
if ( !defined($species) || $sp_name =~ /^$species/ ) {
if ( $db_rel eq $software_version ) {
$temp{$sp_name} = $db_rel . "_" . $assem;
}
}
} else {
# warn( sprintf( "Skipping database '%s'\n", $db ) );
}
} ## end for my $db (@dbnames)
@dbnames = ();
foreach my $key ( keys %temp ) {
push @dbnames, $key . "_" . $temp{$key};
}
# Register Core like databases
my $core_like_dbs_found = 0;
foreach my $type (qw(core cdna vega vega_update otherfeatures rnaseq ccds)) {
my @dbs = grep { /^(?:$db_prefix)[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)? # species name
_
$type # the database type
_
(?:\d+_)? # optional end bit for ensembl genomes
\d+ # database release
_
/x } @dbnames;
if(@dbs) {
$core_like_dbs_found = 1;
}
foreach my $database (@dbs) {
if ( index( $database, 'collection' ) != -1 ) {
# Skip multi-species databases.
next;
}
my ( $prefix, $species, $num ) =
( $database =~ /(^$db_prefix)([a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?) # species name
_
$type # type
_
(?:\d+_)? # optional endbit for ensembl genomes
(\d+) # databases release
_
/x );
if(!defined($species)){
warn "Cannot extract species name from database '$database'";
}
my $dba =
-group => $type,
-species => $species.$species_suffix,
-host => $host,
-user => $user,
-pass => $pass,
-port => $port,
-dbname => $database,
-wait_timeout => $wait_timeout,
-no_cache => $no_cache );
if ($verbose) {
printf( "Species '%s' loaded from database '%s'\n",
$species, $database );
}
}
}
# Register multi-species databases
my @multi_dbs = grep { /^\w+_collection_core_\w+$/ } @dbnames;
if (!$ignore_multi) {
foreach my $multidb (@multi_dbs) {
my $sth = $dbh->prepare(
sprintf(
"SELECT species_id, meta_value FROM %s.meta "
. "WHERE meta_key = 'species.db_name'",
$dbh->quote_identifier($multidb) ) );
$sth->execute();
my ( $species_id, $species );
$sth->bind_columns( \( $species_id, $species ) );
while ( $sth->fetch() ) {
-group => "core",
-species => $species.$species_suffix,
-species_id => $species_id,
-multispecies_db => 1,
-host => $host,
-user => $user,
-pass => $pass,
-port => $port,
-dbname => $multidb,
-wait_timeout => $wait_timeout,
-no_cache => $no_cache
);
if ($verbose) {
printf( "Species '%s' (id:%d) loaded from database '%s'\n",
$species, $species_id, $multidb );
}
}
} ## end foreach my $multidb (@multi_dbs)
}
if(!$core_like_dbs_found && $verbose) {
print("No core-like databases found. Check your DB_VERSION (used '$software_version')\n");
}
# User upload DBs
my @userupload_dbs = grep { /_userdata$/ } @dbnames;
if (!$ignore_multi) {
for my $userupload_db (@userupload_dbs) {
if ( index( $userupload_db, 'collection' ) != -1 ) {
# Skip multi-species databases.
next;
}
my ($species) = ( $userupload_db =~ /(^.+)_userdata$/ );
my $dba =
-group => "userupload",
-species => $species.$species_suffix,
-host => $host,
-user => $user,
-pass => $pass,
-port => $port,
-wait_timeout => $wait_timeout,
-dbname => $userupload_db,
-no_cache => $no_cache );
if ($verbose) {
printf( "%s loaded\n", $userupload_db );
}
}
}
# Register multi-species userupload databases.
my @userdata_multidbs = grep { /^.+_collection_userdata$/ } @dbnames;
if (!$ignore_multi) {
foreach my $multidb (@userdata_multidbs) {
my $sth = $dbh->prepare(
sprintf(
"SELECT species_id, meta_value FROM %s.meta "
. "WHERE meta_key = 'species.db_name'",
$dbh->quote_identifier($multidb) ) );
$sth->execute();
my ( $species_id, $species );
$sth->bind_columns( \( $species_id, $species ) );
while ( $sth->fetch() ) {
-group => "userupload",
-species => $species.$species_suffix,
-species_id => $species_id,
-multispecies_db => 1,
-host => $host,
-user => $user,
-pass => $pass,
-port => $port,
-dbname => $multidb,
-wait_timeout => $wait_timeout,
-no_cache => $no_cache
);
if ($verbose) {
printf( "Species '%s' (id:%d) loaded from database '%s'\n",
$species, $species_id, $multidb );
}
}
} ## end foreach my $multidb (@userdata_multidbs)
}
# Variation
my $test_eval = eval "require Bio::EnsEMBL::Variation::DBSQL::DBAdaptor"; ## no critic
if ($@or (!$test_eval)) {
# Ignore variations as code required not there for this
if ($verbose) {
print(
"Bio::EnsEMBL::Variation::DBSQL::DBAdaptor module not found "
. "so variation databases will be ignored if found\n" );
}
}
else {
my @variation_dbs =
grep { /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?_variation_(?:\d+_)?\d+_/ } @dbnames;
if(! @variation_dbs && $verbose) {
print("No variation databases found\n");
}
for my $variation_db (@variation_dbs) {
if ( index( $variation_db, 'collection' ) != -1 ) {
# Skip multi-species databases.
next;
}
my ( $species, $num ) =
( $variation_db =~ /(^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?)_variation_(?:\d+_)?(\d+)_/ );
my $dba =
Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new(
-group => "variation",
-species => $species.$species_suffix,
-host => $host,
-user => $user,
-pass => $pass,
-port => $port,
-wait_timeout => $wait_timeout,
-dbname => $variation_db,
-no_cache => $no_cache );
if ($verbose) {
printf( "%s loaded\n", $variation_db );
}
}
# Register variation multispecies databases
my @variation_multidbs =
grep { /^\w+_collection_variation_\w+$/ } @dbnames;
if (!$ignore_multi) {
foreach my $multidb (@variation_multidbs) {
my $sth = $dbh->prepare(
sprintf( 'SELECT species_id, meta_value FROM %s.meta ',
$dbh->quote_identifier($multidb) )
. "WHERE meta_key = 'species.db_name'"
);
$sth->execute();
my ( $species_id, $species );
$sth->bind_columns( \( $species_id, $species ) );
while ( $sth->fetch() ) {
my $dba = Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new(
-group => 'variation',
-species => $species.$species_suffix,
-species_id => $species_id,
-multispecies_db => 1,
-host => $host,
-user => $user,
-pass => $pass,
-port => $port,
-dbname => $multidb,
-wait_timeout => $wait_timeout,
-no_cache => $no_cache
);
if ($verbose) {
printf( "Species '%s' (id:%d) loaded from database '%s'\n",
$species, $species_id, $multidb );
}
}
} ## end foreach my $multidb (@variation_multidbs)
}
}
my $func_eval = eval "require Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor"; ## no critic
if ($@ or (!$func_eval)) {
if ($verbose) {
# Ignore funcgen DBs as code required not there for this
print("Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor module not found "
. "so functional genomics databases will be ignored if found\n"
);
}
} else {
my @funcgen_dbs =
grep { /^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?_funcgen_(?:\d+_)?\d+_/ } @dbnames;
if(! @funcgen_dbs && $verbose) {
print("No funcgen databases found\n");
}
for my $funcgen_db (@funcgen_dbs) {
if ( index( $funcgen_db, 'collection' ) != -1 ) {
# Skip multi-species databases.
next;
}
my ( $species, $num ) =
( $funcgen_db =~ /(^[a-z]+_[a-z0-9]+(?:_[a-z0-9]+)?)_funcgen_(?:\d+_)?(\d+)_/ );
my $dba = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new(
-group => "funcgen",
-species => $species.$species_suffix,
-host => $host,
-user => $user,
-pass => $pass,
-port => $port,
-wait_timeout => $wait_timeout,
-dbname => $funcgen_db,
-no_cache => $no_cache
);
if ($verbose) {
printf( "%s loaded\n", $funcgen_db );
}
}
# Register functional genomics multispecies databases
my @funcgen_multidbs =
grep { /^\w+_collection_funcgen_\w+$/ } @dbnames;
if (!$ignore_multi) {
foreach my $multidb (@funcgen_multidbs) {
my $sth = $dbh->prepare(
sprintf( 'SELECT species_id, meta_value FROM %s.meta ',
$dbh->quote_identifier($multidb) )
. "WHERE meta_key = 'species.db_name'"
);
$sth->execute();
my ( $species_id, $species );
$sth->bind_columns( \( $species_id, $species ) );
while ( $sth->fetch() ) {
my $dba = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new(
-group => 'funcgen',
-species => $species.$species_suffix,
-species_id => $species_id,
-multispecies_db => 1,
-host => $host,
-user => $user,
-pass => $pass,
-port => $port,
-dbname => $multidb,
-wait_timeout => $wait_timeout,
-no_cache => $no_cache
);
if ($verbose) {
printf( "Species '%s' (id:%d) loaded from database '%s'\n",
$species, $species_id, $multidb );
}
}
} ## end foreach my $multidb (@funcgen_multidbs)
}
} ## end else [ if ($@) ]
# Compara
my @compara_dbs = grep { /^ensembl_compara/ } @dbnames;
if (!$ignore_multi) {
if (@compara_dbs) {
my $comp_eval = eval "require Bio::EnsEMBL::Compara::DBSQL::DBAdaptor"; ## no critic
if ($@ or (!$comp_eval)) {
# Ignore Compara as code required not there for this
if ($verbose) {
printf(
"Bio::EnsEMBL::Compara::DBSQL::DBAdaptor "
. "not found so the following compara "
. "databases will be ignored: %s\n",
join( ', ', @compara_dbs ) );
}
} else {
foreach my $compara_db (@compara_dbs) {
# Looking for EnsEMBL Genomes Comparas.
# ensembl_compara_bacteria_2_53 is registered as
# 'bacteria', ensembl_compara_pan_homology_2_53 is
# registered as 'pan_homology', ensembl_compara_53 is
# registered as 'multi', and the alias 'compara' still
# operates.
my ($species) =
$compara_db =~ /^ensembl_compara_(\w+)(?:_\d+){2}$/xm;
$species ||= 'multi';
my $dba = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(
-group => 'compara',
-species => $species.$species_suffix,
-host => $host,
-user => $user,
-pass => $pass,
-port => $port,
-wait_timeout => $wait_timeout,
-dbname => $compara_db,
-no_cache => $no_cache
);
if ($verbose) {
printf( "%s loaded\n", $compara_db );
}
} ## end foreach my $compara_db (@compara_dbs)
} ## end else [ if ($@)
} elsif ($verbose) {
print("No Compara databases found\n");
}
}
# Ancestral sequences
my @ancestral_dbs =
sort grep { /^ensembl_ancestral/ } @dbnames;
if (@ancestral_dbs && !$ignore_multi) {
foreach my $ancestral_db (@ancestral_dbs) {
# Looking for Compara's "ancestral" databases.
# ensembl_ancestral_plants_47_100 is registered with the 'plants'
# prefix, while ensembl_ancestral_100 is not given any prefix for
# backwards compatibility.
# Similarly, contrary to the nomenclature, "Ancestral sequences"
# is the species (production) name and "ancestral_sequences" is
# an alias.
my $alias;
my ($division) = $ancestral_db =~ /^ensembl_ancestral_(\w+)(?:_\d+){2}$/xm;
if ($division) {
$species = (ucfirst $division).' Ancestral sequences'.$species_suffix;
$alias = $division.'_ancestral_sequences'.$species_suffix;
} else {
$species = 'Ancestral sequences'.$species_suffix;
$alias = 'ancestral_sequences'.$species_suffix;
}
-group => 'core',
-species => $species,
-host => $host,
-user => $user,
-pass => $pass,
-port => $port,
-wait_timeout => $wait_timeout,
-dbname => $ancestral_db,
-no_cache => $no_cache
);
-species => $species,
-alias => [$alias],
);
if ($verbose) {
printf( "%s loaded\n", $ancestral_db );
}
}
} elsif ($verbose) {
print("No ancestral database found\n");
}
# Ontology
if ( defined($ontology_version) && $ontology_version != 0 && !$ignore_multi) {
my $dba =
'-species' => 'multi' . $species_suffix,
'-group' => 'ontology',
'-host' => $host,
'-port' => $port,
'-user' => $user,
'-pass' => $pass,
'-dbname' => $ontology_db, );
if ($verbose) {
printf( "%s loaded\n", $ontology_db );
}
}
elsif ($verbose) {
print("No ontology database found\n");
}
# Taxonomy
if ( ( defined $taxonomy_db ) || ( defined $taxonomy_db_versioned ) ) {
my $has_taxonomy = eval {require Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyDBAdaptor};
if($@ or (!defined $has_taxonomy)) {
if($verbose) {
print "ensembl_taxonomy API not found - ignoring $taxonomy_db\n";
}
} else {
my $taxonomy_dbname;
# Versioned database has priority over unversioned one.
if ( defined $taxonomy_db_versioned ) {
$taxonomy_dbname = $taxonomy_db_versioned;
}
else {
$taxonomy_dbname = $taxonomy_db;
}
my $dba = Bio::EnsEMBL::Taxonomy::DBSQL::TaxonomyDBAdaptor->new(
'-species' => 'multi' . $species_suffix,
'-group' => 'taxonomy',
'-host' => $host,
'-port' => $port,
'-user' => $user,
'-pass' => $pass,
'-dbname' => $taxonomy_dbname, );
if ($verbose) {
printf( "%s loaded\n", $taxonomy_dbname );
}
}
}
elsif ($verbose) {
print("No taxonomy database found\n");
}
# ensembl_metadata
if ( ( defined $ensembl_metadata_db ) || ( defined $ensembl_metadata_db_versioned ) ) {
my $has_metadata = eval {require Bio::EnsEMBL::MetaData::DBSQL::MetaDataDBAdaptor};
if($@ or (!defined $has_metadata)) {
if($verbose) {
print "ensembl_metadata API not found - ignoring $ensembl_metadata_db\n";
}
} else {
my $metadata_dbname;
# Versioned database has priority over unversioned one.
if ( defined $ensembl_metadata_db_versioned ) {
$metadata_dbname = $ensembl_metadata_db_versioned;
}
else {
$metadata_dbname = $ensembl_metadata_db;
}
my $dba = Bio::EnsEMBL::MetaData::DBSQL::MetaDataDBAdaptor->new(
'-species' => 'multi' . $species_suffix,
'-group' => 'metadata',
'-host' => $host,
'-port' => $port,
'-user' => $user,
'-pass' => $pass,
'-dbname' => $metadata_dbname, );
if ($verbose) {
printf( "%s loaded\n", $metadata_dbname );
}
}
}
elsif ($verbose) {
print("No ensembl_metadata database found\n");
}
# Production
if ( $production_dba_ok and defined($production_db) && !$ignore_multi) {
# require Bio::EnsEMBL::Production::DBSQL::DBAdaptor;
my $dba =
Bio::EnsEMBL::Production::DBSQL::DBAdaptor->new(
'-species' => 'multi' . $species_suffix,
'-group' => 'production',
'-host' => $host,
'-port' => $port,
'-user' => $user,
'-pass' => $pass,
'-dbname' => $production_db, );
if ($verbose) {
printf( "%s loaded\n", $production_db );
}
}
elsif ($verbose) {
print("No production database or adaptor found\n");
}
# Stable IDs
if ( defined($stable_ids_db) && $stable_ids_version != 0 && !$ignore_multi) {
my $dba =
'-species' => 'multi' . $species_suffix,
'-group' => 'stable_ids',
'-host' => $host,
'-port' => $port,
'-user' => $user,
'-pass' => $pass,
'-dbname' => $stable_ids_db, );
if ($verbose) {
printf( "%s loaded\n", $stable_ids_db );
}
}
-species => 'multi'.$species_suffix,
-alias => ['compara'.$species_suffix] );
-species => 'multi'.$species_suffix,
-alias => ['ontology'.$species_suffix] );
$production_dba_ok and
-species => 'multi'.$species_suffix,
-alias => ['production'.$species_suffix] );
-species => 'multi'.$species_suffix,
-alias => ['stable_ids'.$species_suffix] );
# Register aliases as found in adaptor meta tables.
$self->find_and_add_aliases( '-handle' => $dbh,
'-species_suffix' => $species_suffix );
$dbh->disconnect();
my $count = $self->get_DBAdaptor_count() - $original_count;
return $count >= 0 ? $count : 0;