3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::PHIbaseParser;
25 use POSIX qw(strftime);
35 my ($self, $ref_arg) = @_;
36 my $source_id = $ref_arg->{source_id};
37 my $species_id = $ref_arg->{species_id};
38 my $files = $ref_arg->{files};
39 my $verbose = $ref_arg->{verbose};
41 if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
42 croak
"Need to pass source_id, species_id and file as pairs";
46 my $phi_xml_file = @{$files}[0];
48 print STDERR
"PhiBase file to parse, $phi_xml_file\n" if($verbose);
56 my $phi_parser = XML::LibXML->new();
57 my $phi_doc = $phi_parser->parse_file($phi_xml_file);
59 my $concepts_count = 0;
61 foreach my $concept ($phi_doc->findnodes(
'ondex:ondexdata/ondexdataseq/concepts/concept')) {
62 my ($pid_node) = $concept->findnodes(
'./pid');
63 my $pid = $pid_node->to_literal;
64 my $concept_accessions_aref = $concept->findnodes(
'./coaccessions/concept_accession');
65 my $uniprot_acc = undef;
67 foreach my $concept_accession (@$concept_accessions_aref) {
69 # get the one associated with UniProt
70 my ($elementOf) = $concept_accession->findnodes(
'./elementOf');
72 if ($elementOf->to_literal =~ /UPROT/) {
73 my ($accession) = $concept_accession->findnodes(
'./accession');
74 $uniprot_acc = $accession->to_literal;
78 if (!defined $uniprot_acc) {
79 # print STDERR "phi id, $pid, no uniprot mapping found in xml file!\n";
82 if (!defined $phi_mapping{$uniprot_acc}) {
83 $phi_mapping{$uniprot_acc} =
90 my $phi_href = $phi_mapping{$uniprot_acc};
91 my $aref = $phi_href->{-phi_ids};
98 my $concept_gds_aref = $concept->findnodes(
'./cogds/concept_gds');
101 foreach my $concept_gds (@$concept_gds_aref) {
103 # get the one associated with Taxid
104 my ($attrname) = $concept_gds->findnodes(
'./attrname');
105 if ($attrname->to_literal =~ /TAXID/) {
106 my ($value) = $concept_gds->findnodes(
'./value');
107 $taxId = $value->to_literal;
110 if (! defined $taxIds{$taxId}) {
114 if (defined $uniprot_acc) {
115 my $phi_href = $phi_mapping{$uniprot_acc};
116 $phi_href->{-tax_id} = $taxId;
123 my @phis = keys (%phi_mapping);
125 print
"Parsed $concepts_count concepts\n";
126 print
"Found " . @phis .
" with UniProt mapping!\n";
128 print
"Found " . keys (%taxIds) .
" different taxIds\n";
130 #get the "main" PHIbase source id.
131 $source_id = $self->get_source_id_for_source_name(
"PHIbase");
134 #get the mapping that are already there so that we don't get lots of duplicates.
135 # stored in the global hash xref_dependent_mapped.
136 $self->get_dependent_mappings($source_id);
138 #if(!defined($species_id)){
139 # $species_id = $self->get_species_id_for_filename($phi_xml_file);
143 my (%swiss) = %{$self->get_valid_codes(
"uniprot/", $species_id)};
145 print
"got " . keys (%swiss) .
" Uniprot entries\n";
147 print
"species_id, source_id: $species_id, $source_id\n";
149 # Don't check only the species_id, but all taxIds specified in xref_config.ini
151 my %species2tax = $self->species_id2taxonomy();
152 my @tax_ids = @{$species2tax{$species_id}};
154 print
"tax_ids from xref_config.ini file: " . join (
', ', @tax_ids) .
"\n";
158 foreach my $uniprot_acc (keys (%phi_mapping)) {
159 my $phis_href = $phi_mapping{$uniprot_acc};
160 my $taxId = $phis_href->{-tax_id};
161 if (grep {$_ eq $taxId} @tax_ids) {
162 # Get the master_xref_id
165 if(!defined($swiss{$uniprot_acc})){
166 print STDERR
"failed to get the master_xref_if for UniProt, $uniprot_acc!\n";
167 # one reason it happens is that the UniProt identifier is attached to a different tax node in Phibase that it is in UniProt
170 foreach my $master_xref_id (@{$swiss{$uniprot_acc}}){
171 # print STDERR "master_xref_id, $master_xref_id\n";
173 my $phis_aref = $phis_href->{-phi_ids};
174 foreach my $phibase_id (@$phis_aref) {
175 # print STDERR "Adding xrefs for phibase id, $phibase_id\n";
176 $self->add_dependent_xref({ master_xref_id => $master_xref_id,
178 label => $phibase_id,
179 source_id => $source_id,
180 species_id => $species_id });
188 print
"Added $added PHIbase xrefs\n";