3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::ArrayExpressParser;
22 ## Parsing format looks like (so we extract the species name):
23 # anopheles_gambiae.A-AFFY-102.tsv
33 my $default_ftp_server =
'ftp.ebi.ac.uk';
34 my $default_ftp_dir =
'pub/databases/microarray/data/atlas/bioentity_properties/ensembl';
38 my ($self, $ref_arg) = @_;
39 my $source_id = $ref_arg->{source_id};
40 my $species_id = $ref_arg->{species_id};
41 my $species_name = $ref_arg->{species};
42 my $file = $ref_arg->{file};
43 my $verbose = $ref_arg->{verbose};
44 my $db = $ref_arg->{dba};
45 my $dbi = $ref_arg->{dbi};
46 $dbi = $self->dbi unless defined $dbi;
48 if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){
49 croak
"Need to pass source_id, species_id and file as pairs";
60 if($file =~ /project[=][>](\S+?)[,]/){
63 if($file =~ /host[=][>](\S+?)[,]/){
66 if($file =~ /port[=][>](\S+?)[,]/){
69 if($file =~ /dbname[=][>](\S+?)[,]/){
72 if($file =~ /pass[=][>](\S+?)[,]/){
75 if($file =~ /user[=][>](\S+?)[,]/){
79 my %species_id_to_names = $self->species_id2name($dbi);
80 if (defined $species_name) { push @{$species_id_to_names{$species_id}}, $species_name; }
81 if (!defined $species_id_to_names{$species_id}) { next; }
82 my $species_id_to_names = \%species_id_to_names;
83 my $names = $species_id_to_names->{$species_id};
84 my $species_lookup = $self->_get_species($verbose);
85 my $active = $self->_is_active($species_lookup, $names, $verbose);
91 $species_name = $species_id_to_names{$species_id}[0];
93 #get stable_ids from core and create xrefs
95 my $registry =
"Bio::EnsEMBL::Registry";
103 '-dbname' => $dbname,
104 '-species' => $species_name,
107 $gene_adaptor = $db->get_GeneAdaptor();
108 } elsif (defined $project && $project eq
'ensembl') {
109 print
"Loading the Registry\n" if $verbose;
110 $registry->load_registry_from_multiple_dbs(
112 '-host' =>
'mysql-ens-sta-1',
117 $gene_adaptor = $registry->
get_adaptor($species_name,
'core',
'Gene');
118 } elsif (defined $project && $project eq
'ensemblgenomes') {
119 $registry->load_registry_from_multiple_dbs(
121 '-host' =>
'mysql-eg-staging-1.ebi.ac.uk',
126 '-host' =>
'mysql-eg-staging-2.ebi.ac.uk',
131 $gene_adaptor = $registry->get_adaptor($species_name,
'core',
'Gene');
132 } elsif (defined $db) {
133 $gene_adaptor = $db->get_GeneAdaptor();
135 die(
"Missing or unsupported project value. Supported values: ensembl, ensemblgenomes");
137 print
"Finished loading the registry\n" if $verbose;
139 my @stable_ids =
map { $_->stable_id } @{$gene_adaptor->fetch_all()};
142 foreach my $gene_stable_id (@stable_ids) {
144 my $xref_id = $self->add_xref({ acc => $gene_stable_id,
145 label => $gene_stable_id,
146 source_id => $source_id,
147 species_id => $species_id,
149 info_type =>
"DIRECT"} );
151 $self->add_direct_xref( $xref_id, $gene_stable_id,
'gene',
'', $dbi);
157 print
"Added $xref_count DIRECT xrefs\n" if($verbose);
158 if ( !$xref_count ) {
162 return 0; # successfull
167 my ($self, $verbose) = @_;
168 $verbose = (defined $verbose) ? $verbose : 0;
170 my $ftp = Net::FTP->new($default_ftp_server, Debug => $verbose) or confess
"Cannot connect to $default_ftp_server: $@";
171 $ftp->login(
"anonymous",
'-anonymous@') or confess
"Cannot login ", $ftp->message;
172 $ftp->cwd($default_ftp_dir);
173 my @files = $ftp->ls() or confess
"Cannot change to $default_ftp_dir: $@";
177 foreach my $file (@files) {
178 my ($species) = split(/\./, $file);
179 $species_lookup{$species} = 1;
181 return \%species_lookup;
185 my ($self, $species_lookup, $names, $verbose) = @_;
186 #Loop through the names and aliases first. If we get a hit then great
188 foreach my $name (@{$names}) {
189 if($species_lookup->{$name}) {
190 printf(
'Found ArrayExpress has declared the name "%s". This was an alias'.
"\n", $name)
if $verbose;