ensembl-hive  2.7.0
ArrayExpressParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefParser::ArrayExpressParser;
21 
22 ## Parsing format looks like (so we extract the species name):
23 # anopheles_gambiae.A-AFFY-102.tsv
24 #
25 
26 use strict;
27 use warnings;
28 use Carp;
29 use base qw( XrefParser::BaseParser );
31 use Net::FTP;
32 
33 my $default_ftp_server = 'ftp.ebi.ac.uk';
34 my $default_ftp_dir = 'pub/databases/microarray/data/atlas/bioentity_properties/ensembl';
35 
36 sub run_script {
37 
38  my ($self, $ref_arg) = @_;
39  my $source_id = $ref_arg->{source_id};
40  my $species_id = $ref_arg->{species_id};
41  my $species_name = $ref_arg->{species};
42  my $file = $ref_arg->{file};
43  my $verbose = $ref_arg->{verbose};
44  my $db = $ref_arg->{dba};
45  my $dbi = $ref_arg->{dbi};
46  $dbi = $self->dbi unless defined $dbi;
47 
48  if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){
49  croak "Need to pass source_id, species_id and file as pairs";
50  }
51  $verbose |=0;
52 
53  my $project;
54  my $user ="ensro";
55  my $host;
56  my $port = 3306;
57  my $dbname;
58  my $pass;
59 
60  if($file =~ /project[=][>](\S+?)[,]/){
61  $project = $1;
62  }
63  if($file =~ /host[=][>](\S+?)[,]/){
64  $host = $1;
65  }
66  if($file =~ /port[=][>](\S+?)[,]/){
67  $port = $1;
68  }
69  if($file =~ /dbname[=][>](\S+?)[,]/){
70  $dbname = $1;
71  }
72  if($file =~ /pass[=][>](\S+?)[,]/){
73  $pass = $1;
74  }
75  if($file =~ /user[=][>](\S+?)[,]/){
76  $user = $1;
77  }
78 
79  my %species_id_to_names = $self->species_id2name($dbi);
80  if (defined $species_name) { push @{$species_id_to_names{$species_id}}, $species_name; }
81  if (!defined $species_id_to_names{$species_id}) { next; }
82  my $species_id_to_names = \%species_id_to_names;
83  my $names = $species_id_to_names->{$species_id};
84  my $species_lookup = $self->_get_species($verbose);
85  my $active = $self->_is_active($species_lookup, $names, $verbose);
86 
87  if (!$active) {
88  return;
89  }
90 
91  $species_name = $species_id_to_names{$species_id}[0];
92 
93  #get stable_ids from core and create xrefs
94 
95  my $registry = "Bio::EnsEMBL::Registry";
96  my ($gene_adaptor);
97  if ($host) {
99  '-host' => $host,
100  '-port' => $port,
101  '-user' => $user,
102  '-pass' => $pass,
103  '-dbname' => $dbname,
104  '-species' => $species_name,
105  '-group' => 'core',
106  );
107  $gene_adaptor = $db->get_GeneAdaptor();
108  } elsif (defined $project && $project eq 'ensembl') {
109  print "Loading the Registry\n" if $verbose;
110  $registry->load_registry_from_multiple_dbs(
111  {
112  '-host' => 'mysql-ens-sta-1',
113  '-port' => 4519,
114  '-user' => 'ensro',
115  },
116  );
117  $gene_adaptor = $registry->get_adaptor($species_name, 'core', 'Gene');
118  } elsif (defined $project && $project eq 'ensemblgenomes') {
119  $registry->load_registry_from_multiple_dbs(
120  {
121  '-host' => 'mysql-eg-staging-1.ebi.ac.uk',
122  '-port' => 4160,
123  '-user' => 'ensro',
124  },
125  {
126  '-host' => 'mysql-eg-staging-2.ebi.ac.uk',
127  '-port' => 4275,
128  '-user' => 'ensro',
129  },
130  );
131  $gene_adaptor = $registry->get_adaptor($species_name, 'core', 'Gene');
132  } elsif (defined $db) {
133  $gene_adaptor = $db->get_GeneAdaptor();
134  } else {
135  die("Missing or unsupported project value. Supported values: ensembl, ensemblgenomes");
136  }
137  print "Finished loading the registry\n" if $verbose;
138 
139  my @stable_ids = map { $_->stable_id } @{$gene_adaptor->fetch_all()};
140 
141  my $xref_count = 0;
142  foreach my $gene_stable_id (@stable_ids) {
143 
144  my $xref_id = $self->add_xref({ acc => $gene_stable_id,
145  label => $gene_stable_id,
146  source_id => $source_id,
147  species_id => $species_id,
148  dbi => $dbi,
149  info_type => "DIRECT"} );
150 
151  $self->add_direct_xref( $xref_id, $gene_stable_id, 'gene', '', $dbi);
152  if ($xref_id) {
153  $xref_count++;
154  }
155  }
156 
157  print "Added $xref_count DIRECT xrefs\n" if($verbose);
158  if ( !$xref_count ) {
159  return 1; # 1 error
160  }
161 
162  return 0; # successfull
163 
164 }
165 
166 sub _get_species {
167  my ($self, $verbose) = @_;
168  $verbose = (defined $verbose) ? $verbose : 0;
169 
170  my $ftp = Net::FTP->new($default_ftp_server, Debug => $verbose) or confess "Cannot connect to $default_ftp_server: $@";
171  $ftp->login("anonymous",'-anonymous@') or confess "Cannot login ", $ftp->message;
172  $ftp->cwd($default_ftp_dir);
173  my @files = $ftp->ls() or confess "Cannot change to $default_ftp_dir: $@";
174  $ftp->quit;
175 
176  my %species_lookup;
177  foreach my $file (@files) {
178  my ($species) = split(/\./, $file);
179  $species_lookup{$species} = 1;
180  }
181  return \%species_lookup;
182 }
183 
184 sub _is_active {
185  my ($self, $species_lookup, $names, $verbose) = @_;
186  #Loop through the names and aliases first. If we get a hit then great
187  my $active = 0;
188  foreach my $name (@{$names}) {
189  if($species_lookup->{$name}) {
190  printf('Found ArrayExpress has declared the name "%s". This was an alias'."\n", $name) if $verbose;
191  $active = 1;
192  last;
193  }
194  }
195  return $active;
196 }
197 
198 
199 1;
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
map
public map()
XrefParser::BaseParser
Definition: BaseParser.pm:8
Bio::EnsEMBL::Registry
Definition: Registry.pm:113
Bio::EnsEMBL::DBSQL::DBAdaptor::new
public Bio::EnsEMBL::DBSQL::DBAdaptor new()
Bio::EnsEMBL::DBSQL::DBAdaptor::get_adaptor
public Adaptor get_adaptor()