3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::ReactomeParser;
30 # Parse file of Reactome records and assign direct xrefs
33 # --------------------------------------------------------------------------------
37 my ($self, $ref_arg) = @_;
38 my $source_id = $ref_arg->{source_id};
39 my $species_id = $ref_arg->{species_id};
40 my $files = $ref_arg->{files};
41 my $release_file = $ref_arg->{rel_file};
42 my $verbose = $ref_arg->{verbose};
43 my $dbi = $ref_arg->{dbi};
44 my $species_name = $ref_arg->{species};
45 $dbi = $self->dbi unless defined $dbi;
47 if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
48 croak
"Needs to pass source_id, species_id and files as pairs";
52 my $file_desc = @{$files}[1];
54 if ( defined $release_file ) {
56 # Parse and set release information from $release_file.
57 my $release_io = $self->get_filehandle($release_file);
58 while ( defined( my $line = $release_io->getline() ) ) {
59 if ( $line =~ /([0-9]*)/ ) {
61 print
"Reactome release is '$release'\n" if($verbose);
67 croak
"Could not find release using $release_file\n";
70 $self->set_release( $source_id, $release, $dbi );
73 # Create a hash of all valid names for this species
74 my %species2alias = $self->species_id2name($dbi);
75 if (defined $species_name) { push @{$species2alias{$species_id}}, $species_name; }
76 if (!defined $species2alias{$species_id}) { next; }
77 my @aliases = @{$species2alias{$species_id}};
78 my %alias2species_id =
map {$_, 1} @aliases;
85 my $reactome_source_id = $self->get_source_id_for_source_name(
"reactome",
"direct", $dbi);
86 my $transcript_reactome_source_id = $self->get_source_id_for_source_name(
"reactome_transcript", undef, $dbi);
87 my $gene_reactome_source_id = $self->get_source_id_for_source_name(
"reactome_gene", undef, $dbi);
88 my $reactome_uniprot_source_id = $self->get_source_id_for_source_name(
"reactome",
"uniprot", $dbi);
89 if($reactome_source_id < 1 || $transcript_reactome_source_id < 1 || $gene_reactome_source_id < 1){
90 die
"Could not find source id for reactome sources???\n";
93 print
"Source_id = $reactome_source_id\n";
94 print
"Transcript_source_id = $transcript_reactome_source_id\n";
95 print
"Gene_source_id = $gene_reactome_source_id\n";
98 if($reactome_uniprot_source_id < 1){
99 die
"Could not find source id for reactome uniprot???\n";
102 print
"Source_id = $reactome_uniprot_source_id\n";
105 my (%uniprot) = %{$self->get_valid_codes(
"uniprot/",$species_id, $dbi)};
108 foreach my $file (@$files) {
109 my $reactome_io = $self->get_filehandle($file);
110 if ($file =~ /UniProt/) { $is_uniprot = 1; }
112 # ENSG00000138685 REACT_111045 http://www.reactome.org/PathwayBrowser/#REACT_111045 Developmental Biology TAS Homo sapiens
113 while (my $line = $reactome_io->getline() ) {
117 my ($ensembl_stable_id, $reactome_id, $url, $description, $evidence, $species) = split /\t+/,$line;
118 if ($description!~ /^[A-Za-z0-9_,\(\)\/\-\.:\+
'&;"\/\?%>\s\[\]]+$/) { next; }
121 $species = lc($species);
122 if ( $alias2species_id{$species} ){
125 # Attempt to guess the object_type based on the stable id
126 # Some entries just don't match on stable
id, so warn but
do not die
128 # 00000074047 REACT_268323 http:
130 my $info_type =
'DIRECT';
132 if (defined($uniprot{$ensembl_stable_id})) {
133 # First check if it is a uniprot id
134 foreach my $xref_id (@{$uniprot{$ensembl_stable_id}}){
135 $self->add_dependent_xref({ master_xref_id => $xref_id,
137 label => $reactome_id,
138 desc => $description,
139 source_id => $reactome_uniprot_source_id,
141 species_id => $species_id} );
143 $info_type =
'DEPENDENT';
146 elsif ($ensembl_stable_id =~ /G[0-9]*$/) {
148 $reactome_source_id = $gene_reactome_source_id;
150 elsif ($ensembl_stable_id =~ /T[0-9]*$/) {
151 $type =
'transcript';
152 $reactome_source_id = $transcript_reactome_source_id;
154 elsif ($ensembl_stable_id =~ /P[0-9]*$/) { $type =
'translation'; }
156 # Is not in Uniprot and does not match Ensembl stable id format
157 print STDERR
"Could not find type for $ensembl_stable_id\n";
162 # Add new entry for reactome xref
163 # as well as direct xref to ensembl stable id
164 my $xref_id = $self->add_xref({ acc => $reactome_id,
165 label => $reactome_id,
166 desc => $description,
167 info_type => $info_type,
168 source_id => $reactome_source_id,
170 species_id => $species_id} );
172 $self->add_direct_xref($xref_id, $ensembl_stable_id, $type, $dbi)
if $type;
177 print
"$parsed_count entries processed\n$err_count not found\n";