3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 # Parse UniProt (SwissProt & SPTrEMBL) files to create xrefs.
22 # Files actually contain both types of xref, distinguished by ID line;
24 # ID CYC_PIG Reviewed; 104 AA. Swissprot
25 # ID Q3ASY8_CHLCH Unreviewed; 36805 AA. SPTrEMBL
29 package XrefParser::UniProtDatabaseParser;
34 use POSIX qw(strftime);
43 my ($self, $ref_arg) = @_;
44 my $source_id = $ref_arg->{source_id};
45 my $species_id = $ref_arg->{species_id};
46 my $dbi = $ref_arg->{dbi};
47 my $xref_source = $ref_arg->{xref_source};
49 if((!defined $source_id) or (!defined $species_id) or (!defined $xref_source)){
50 croak
"Need to pass source_id, species_id and xref_source";
54 my $sp_source_id = $self->get_source_id_for_source_name(
'Uniprot/SWISSPROT',
'sequence_mapped', $dbi);
55 push @source_ids, $sp_source_id;
56 my $sptr_source_id = $self->get_source_id_for_source_name(
'Uniprot/SPTREMBL',
'sequence_mapped', $dbi);
57 push @source_ids, $sptr_source_id;
58 my $sptr_non_display_source_id = $self->get_source_id_for_source_name(
'Uniprot/SPTREMBL',
'protein_evidence_gt_2', $dbi);
59 push @source_ids, $sptr_non_display_source_id;
60 my $sp_direct_source_id = $self->get_source_id_for_source_name(
'Uniprot/SWISSPROT',
'direct', $dbi);
61 push @source_ids, $sp_direct_source_id;
62 my $sptr_direct_source_id = $self->get_source_id_for_source_name(
'Uniprot/SPTREMBL',
'direct', $dbi);
63 push @source_ids, $sptr_direct_source_id;
64 my $isoform_source_id = $self->get_source_id_for_source_name(
'Uniprot_isoform');
65 push @source_ids, $isoform_source_id;
67 my $get_xref_sql =
"SELECT xref_id, accession, version, label, description, info_type ".
68 "FROM xref WHERE species_id = ? AND source_id = ?";
69 my $get_xref_sth = $xref_source->prepare($get_xref_sql);
70 my $get_dependent_sql =
"SELECT x.xref_id, x.accession, x.version, x.label, x.description, x.source_id, x.species_id, dx.linkage_source_id FROM xref x, dependent_xref dx ".
71 "WHERE dx.dependent_xref_id = x.xref_id and dx.master_xref_id = ?";
72 my $get_dependent_sth = $xref_source->prepare($get_dependent_sql);
73 my $get_sequence_sql =
"SELECT sequence, sequence_type, status FROM primary_xref WHERE xref_id = ?";
74 my $get_sequence_sth = $xref_source->prepare($get_sequence_sql);
75 my $get_synonym_sql =
"SELECT synonym FROM synonym WHERE xref_id = ?";
76 my $get_synonym_sth = $xref_source->prepare($get_synonym_sql);
77 my $get_direct_sql =
"SELECT ensembl_stable_id, linkage_xref FROM translation_direct_xref WHERE general_xref_id = ?";
78 my $get_direct_sth = $xref_source->prepare($get_direct_sql);
79 my ($xref_id, $accession, $version, $label, $description, $info_type, $parsed_seq, $type, $status, $dep_xref_id, $dep_accession, $dep_version, $dep_label, $dep_description, $dep_source_id, $dep_species_id, $linkage_source_id, $synonym, $stable_id, $linkage_xref);
84 foreach my $xref_source_id (@source_ids) {
85 $get_xref_sth->execute($species_id, $xref_source_id);
86 $get_xref_sth->bind_columns(\$xref_id, \$accession, \$version, \$label, \$description, \$info_type);
87 while ($get_xref_sth->fetch) {
90 $xref->{ACCESSION} = $accession;
91 $xref->{LABEL} = $label;
92 $xref->{VERSION} = $version;
93 $xref->{SPECIES_ID} = $species_id;
94 $xref->{INFO_TYPE} = $info_type;
95 $xref->{SOURCE_ID} = $xref_source_id;
96 $xref->{DESCRIPTION} = $description;
98 # Add sequence if there is some
99 $get_sequence_sth->execute($xref_id);
100 $get_sequence_sth->bind_columns(\$parsed_seq, \$type, \$status);
101 while ($get_sequence_sth->fetch) {
102 $xref->{SEQUENCE_TYPE} = $type;
103 $xref->{STATUS} = $status;
104 $xref->{SEQUENCE} = $parsed_seq;
108 $get_synonym_sth->execute($xref_id);
109 $get_synonym_sth->bind_columns(\$synonym);
110 while ($get_synonym_sth->fetch) {
111 push (@{$xref->{SYNONYMS} }, $synonym);
114 # Look for direct xref
115 $get_direct_sth->execute($xref_id);
116 $get_direct_sth->bind_columns(\$stable_id, \$linkage_xref);
117 while ($get_direct_sth->fetch) {
120 $direct{STABLE_ID} = $stable_id;
121 $direct{ENSEMBL_TYPE} =
'Translation';
122 $direct{LINKAGE_TYPE} = $linkage_xref;
123 $direct{SOURCE_ID} = $xref_source_id;
124 push (@{$xref->{DIRECT_XREFS}}, \%direct);
127 #Add any dependent xrefs
128 $get_dependent_sth->execute($xref_id);
129 $get_dependent_sth->bind_columns(\$dep_xref_id, \$dep_accession, \$dep_version, \$dep_label, \$dep_description, \$dep_source_id, \$dep_species_id, \$linkage_source_id);
130 while ($get_dependent_sth->fetch) {
131 if ($dep_species_id != $species_id) { next; }
133 $dep{ACCESSION} = $dep_accession;
134 $dep{LABEL} = $dep_label;
135 $dep{VERSION} = $dep_version;
136 $dep{DESCRIPTION} = $dep_description;
137 $dep{SOURCE_ID} = $dep_source_id;
138 $dep{LINKAGE_SOURCE_ID} = $linkage_source_id;
139 $get_synonym_sth->execute($dep_xref_id);
140 $get_synonym_sth->bind_columns(\$synonym);
141 while ($get_synonym_sth->fetch) {
142 push (@{$dep{SYNONYMS} }, $synonym);
144 push @{$xref->{DEPENDENT_XREFS}}, \%dep;
150 $self->upload_xref_object_graphs( \@xrefs, $dbi );
157 $self->upload_xref_object_graphs(\@xrefs, $dbi)
if scalar(@xrefs) > 0;
158 return 0; # successfull