3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::CoreXrefParser;
32 my ($self, $ref_arg) = @_;
33 my $source_id = $ref_arg->{source_id};
34 my $species_id = $ref_arg->{species_id};
35 my $file = $ref_arg->{file};
36 my $verbose = $ref_arg->{verbose};
37 my $dbi = $ref_arg->{dbi};
38 $dbi = $self->dbi unless defined $dbi;
40 if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){
41 croak
"Need to pass source_id, species_id and file as pairs";
49 my $copy_description_from_object;
51 if($file =~ /logic_name[=][>](\S+?)[,]/){
54 if($file =~ /biotype[=][>](\S+?)[,]/){
57 if($file =~ /object_type[=][>](\S+?)[,]/){
60 if($file =~ /project[=][>](\S+?)[,]/){
63 if($file =~ /copy_description_from_object[=][>](\S+?)[,]/){
64 $copy_description_from_object = $1;
67 my $external_db_name = $self->get_source_name_for_source_id($source_id, $dbi);
69 #copy object xrefs from core
71 my $registry =
"Bio::EnsEMBL::Registry";
73 if ($project eq
'ensembl') {
76 '-host' =>
'mysql-ensembl-mirror.ebi.ac.uk',
81 } elsif ($project eq
'ensemblgenomes') {
83 $registry->load_registry_from_multiple_dbs(
85 '-host' =>
'mysql-eg-staging-1.ebi.ac.uk',
90 '-host' =>
'mysql-eg-staging-2.ebi.ac.uk',
97 die(
"Missing or unsupported project value. Supported values: ensembl, ensemblgenomes");
100 #get the species name
101 my %id2name = $self->species_id2name($dbi);
102 my $species_name = $id2name{$species_id}[0];
105 $object_type =
'gene';
108 my %valid_object_types = (
112 translation =>
'Translation',
114 Transcript =>
'Transcript',
115 Translation =>
'Translation',
118 if (!exists($valid_object_types{$object_type}) ) {
120 die(
"Unsupported object type value. Supported values: ", join(
',', keys %valid_object_types) );
123 if ($biotype && $object_type ne
'gene' && $object_type ne
'transcript') {
124 die(
"Incorrect parser argument values: expecting gene or transcript object type when biotype provided.\n");
127 my $object_adaptor = $registry->get_adaptor($species_name,
'core', $object_type);
132 print STDERR
"fetching genes...\n";
136 @genes = @{$object_adaptor->fetch_all_by_biotype($biotype)};
137 if ($biotype eq
"tRNA") {
138 # Fetch also all tRNA_pseudogene genes
139 push (@genes, @{$object_adaptor->fetch_all_by_biotype(
'tRNA_pseudogene')});
141 } elsif ($logic_name) {
144 print STDERR
"Fetching by logic_name, $logic_name\n";
147 # This way we get all ncRNA genes (rRNAs, tRNAs, and all ncRNAs which can be under multiple biotypes)
149 @genes = @{$object_adaptor->fetch_all_by_logic_name($logic_name)};
153 my $direct_count = 0;
155 print STDERR
"Fetched " . @genes .
" genes\n";
157 foreach my $object (@genes) {
159 #my @xrefs = @{$object->get_all_DBEntries($external_db_name)};
160 # as we use a generic ncRNA source, which maps to multiple external_db_id
161 my @xrefs = @{$object->get_all_DBEntries()};
163 # print STDERR "processing " . @xrefs . " xrefs\n";
166 print STDERR
"No xrefs for gene, " . $object->stable_id() .
"!\n";
169 foreach my $xref (@xrefs) {
172 my $db_name = $xref->dbname();
174 # $source_id maps to nCRNA_EG
175 # but we need to attach them specifcally
176 # to RNAmmer, tRNAScan or RFAM
177 # so get the source based on the db_name from the core db
178 my $external_source_id = $self->get_source_id_for_source_name($db_name, undef, $dbi);
180 if (! defined $external_source_id) {
181 warn (
"can't get a source_id for external_db, $db_name!\n");
185 if (!exists($added_xref{$xref->primary_id()})) {
187 my $description = $xref->description();
189 if ($copy_description_from_object && !$description) {
191 if ($object->description()) {
192 #populate xref description with object description stripping the [Source: .. part
193 ($description) = $object->description() =~ /([^\[]+)/;
194 #trim trailing spaces
195 $description =~ s/\s+$
199 $xref_id = $self->add_xref({ acc => $xref->primary_id(),
200 version => $xref->version(),
201 label => $xref->display_id(),
202 desc => $description,
203 source_id => $external_source_id,
204 species_id => $species_id,
206 info_type =>
"DIRECT"} );
209 $added_xref{$xref->primary_id()} = $xref_id;
213 $xref_id = $added_xref{$xref->primary_id()};
216 $self->add_direct_xref($xref_id, $object->stable_id(), $valid_object_types{$object_type},
"", $dbi);
221 my $xref_count = scalar(keys %added_xref);
223 print
"Added $xref_count $external_db_name xrefs and $direct_count $object_type direct xrefs\n" if($verbose);
224 if ( !$xref_count ) {
228 return 0; # successfull