ensembl-hive  2.8.1
CoreXrefParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefParser::CoreXrefParser;
21 
22 use strict;
23 use warnings;
24 use Carp;
25 use DBI;
26 
27 use base qw( XrefParser::BaseParser );
29 
30 sub run_script {
31 
32  my ($self, $ref_arg) = @_;
33  my $source_id = $ref_arg->{source_id};
34  my $species_id = $ref_arg->{species_id};
35  my $file = $ref_arg->{file};
36  my $verbose = $ref_arg->{verbose};
37  my $dbi = $ref_arg->{dbi};
38  $dbi = $self->dbi unless defined $dbi;
39 
40  if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){
41  croak "Need to pass source_id, species_id and file as pairs";
42  }
43  $verbose |=0;
44 
45  my $logic_name;
46  my $biotype;
47  my $object_type;
48  my $project;
49  my $copy_description_from_object;
50 
51  if($file =~ /logic_name[=][>](\S+?)[,]/){
52  $logic_name = $1;
53  }
54  if($file =~ /biotype[=][>](\S+?)[,]/){
55  $biotype = $1;
56  }
57  if($file =~ /object_type[=][>](\S+?)[,]/){
58  $object_type = $1;
59  }
60  if($file =~ /project[=][>](\S+?)[,]/){
61  $project = $1;
62  }
63  if($file =~ /copy_description_from_object[=][>](\S+?)[,]/){
64  $copy_description_from_object = $1;
65  }
66 
67  my $external_db_name = $self->get_source_name_for_source_id($source_id, $dbi);
68 
69  #copy object xrefs from core
70 
71  my $registry = "Bio::EnsEMBL::Registry";
72 
73  if ($project eq 'ensembl') {
75  {
76  '-host' => 'mysql-ensembl-mirror.ebi.ac.uk',
77  '-port' => 4240,
78  '-user' => 'ensro',
79  },
80  );
81  } elsif ($project eq 'ensemblgenomes') {
82 
83  $registry->load_registry_from_multiple_dbs(
84  {
85  '-host' => 'mysql-eg-staging-1.ebi.ac.uk',
86  '-port' => 4160,
87  '-user' => 'ensro',
88  },
89  {
90  '-host' => 'mysql-eg-staging-2.ebi.ac.uk',
91  '-port' => 4275,
92  '-user' => 'ensro',
93  },
94  );
95 
96  } else {
97  die("Missing or unsupported project value. Supported values: ensembl, ensemblgenomes");
98  }
99 
100  #get the species name
101  my %id2name = $self->species_id2name($dbi);
102  my $species_name = $id2name{$species_id}[0];
103 
104  if (!$object_type) {
105  $object_type = 'gene';
106  }
107 
108  my %valid_object_types = (
109 
110  gene => 'Gene',
111  transcript => 'Transcript',
112  translation => 'Translation',
113  Gene => 'Gene',
114  Transcript => 'Transcript',
115  Translation => 'Translation',
116  );
117 
118  if (!exists($valid_object_types{$object_type}) ) {
119 
120  die("Unsupported object type value. Supported values: ", join(',', keys %valid_object_types) );
121  }
122 
123  if ($biotype && $object_type ne 'gene' && $object_type ne 'transcript') {
124  die("Incorrect parser argument values: expecting gene or transcript object type when biotype provided.\n");
125  }
126 
127  my $object_adaptor = $registry->get_adaptor($species_name, 'core', $object_type);
128 
129  my @genes;
130 
131  if ($verbose) {
132  print STDERR "fetching genes...\n";
133  }
134 
135  if ($biotype) {
136  @genes = @{$object_adaptor->fetch_all_by_biotype($biotype)};
137  if ($biotype eq "tRNA") {
138  # Fetch also all tRNA_pseudogene genes
139  push (@genes, @{$object_adaptor->fetch_all_by_biotype('tRNA_pseudogene')});
140  }
141  } elsif ($logic_name) {
142 
143  if ($verbose) {
144  print STDERR "Fetching by logic_name, $logic_name\n";
145  }
146 
147  # This way we get all ncRNA genes (rRNAs, tRNAs, and all ncRNAs which can be under multiple biotypes)
148 
149  @genes = @{$object_adaptor->fetch_all_by_logic_name($logic_name)};
150  }
151 
152  my %added_xref;
153  my $direct_count = 0;
154 
155  print STDERR "Fetched " . @genes . " genes\n";
156 
157  foreach my $object (@genes) {
158 
159  #my @xrefs = @{$object->get_all_DBEntries($external_db_name)};
160  # as we use a generic ncRNA source, which maps to multiple external_db_id
161  my @xrefs = @{$object->get_all_DBEntries()};
162 
163  # print STDERR "processing " . @xrefs . " xrefs\n";
164 
165  if (@xrefs == 0) {
166  print STDERR "No xrefs for gene, " . $object->stable_id() . "!\n";
167  }
168 
169  foreach my $xref (@xrefs) {
170 
171  my $xref_id;
172  my $db_name = $xref->dbname();
173 
174  # $source_id maps to nCRNA_EG
175  # but we need to attach them specifcally
176  # to RNAmmer, tRNAScan or RFAM
177  # so get the source based on the db_name from the core db
178  my $external_source_id = $self->get_source_id_for_source_name($db_name, undef, $dbi);
179 
180  if (! defined $external_source_id) {
181  warn ("can't get a source_id for external_db, $db_name!\n");
182  return 1;
183  }
184 
185  if (!exists($added_xref{$xref->primary_id()})) {
186 
187  my $description = $xref->description();
188 
189  if ($copy_description_from_object && !$description) {
190 
191  if ($object->description()) {
192  #populate xref description with object description stripping the [Source: .. part
193  ($description) = $object->description() =~ /([^\[]+)/;
194  #trim trailing spaces
195  $description =~ s/\s+$//;
196  }
197  }
198 
199  $xref_id = $self->add_xref({ acc => $xref->primary_id(),
200  version => $xref->version(),
201  label => $xref->display_id(),
202  desc => $description,
203  source_id => $external_source_id,
204  species_id => $species_id,
205  dbi => $dbi,
206  info_type => "DIRECT"} );
207 
208 
209  $added_xref{$xref->primary_id()} = $xref_id;
210  }
211 
212  if (!$xref_id) {
213  $xref_id = $added_xref{$xref->primary_id()};
214  }
215 
216  $self->add_direct_xref($xref_id, $object->stable_id(), $valid_object_types{$object_type}, "", $dbi);
217  $direct_count++;
218  }
219  }
220 
221  my $xref_count = scalar(keys %added_xref);
222 
223  print "Added $xref_count $external_db_name xrefs and $direct_count $object_type direct xrefs\n" if($verbose);
224  if ( !$xref_count ) {
225  return 1; # 1 error
226  }
227 
228  return 0; # successfull
229 
230 
231 }
232 
233 1;
transcript
public transcript()
XrefParser::BaseParser
Definition: BaseParser.pm:8
Bio::EnsEMBL::Registry
Definition: Registry.pm:113
Bio::EnsEMBL::Registry::load_registry_from_multiple_dbs
public Int load_registry_from_multiple_dbs()