ensembl-hive  2.7.0
ReactomeParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefParser::ReactomeParser;
21 
22 use strict;
23 use warnings;
24 use Carp;
25 use DBI;
26 
27 use base qw( XrefParser::BaseParser );
29 
30 # Parse file of Reactome records and assign direct xrefs
31 
32 
33 # --------------------------------------------------------------------------------
34 
35 sub run {
36 
37  my ($self, $ref_arg) = @_;
38  my $source_id = $ref_arg->{source_id};
39  my $species_id = $ref_arg->{species_id};
40  my $files = $ref_arg->{files};
41  my $release_file = $ref_arg->{rel_file};
42  my $verbose = $ref_arg->{verbose};
43  my $dbi = $ref_arg->{dbi};
44  my $species_name = $ref_arg->{species};
45  $dbi = $self->dbi unless defined $dbi;
46 
47  if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
48  croak "Needs to pass source_id, species_id and files as pairs";
49  }
50  $verbose |=0;
51 
52  my $file_desc = @{$files}[1];
53 
54  if ( defined $release_file ) {
55  my $release;
56  # Parse and set release information from $release_file.
57  my $release_io = $self->get_filehandle($release_file);
58  while ( defined( my $line = $release_io->getline() ) ) {
59  if ( $line =~ /([0-9]*)/ ) {
60  $release = $1;
61  print "Reactome release is '$release'\n" if($verbose);
62  last;
63  }
64  }
65 
66  if (!$release) {
67  croak "Could not find release using $release_file\n";
68  }
69 
70  $self->set_release( $source_id, $release, $dbi );
71  }
72 
73  # Create a hash of all valid names for this species
74  my %species2alias = $self->species_id2name($dbi);
75  if (defined $species_name) { push @{$species2alias{$species_id}}, $species_name; }
76  if (!defined $species2alias{$species_id}) { next; }
77  my @aliases = @{$species2alias{$species_id}};
78  my %alias2species_id = map {$_, 1} @aliases;
79 
80  my $parsed_count = 0;
81  my $err_count = 0;
82 
83  my %reactome2ensembl;
84 
85  my $reactome_source_id = $self->get_source_id_for_source_name("reactome", "direct", $dbi);
86  my $transcript_reactome_source_id = $self->get_source_id_for_source_name("reactome_transcript", undef, $dbi);
87  my $gene_reactome_source_id = $self->get_source_id_for_source_name("reactome_gene", undef, $dbi);
88  my $reactome_uniprot_source_id = $self->get_source_id_for_source_name("reactome", "uniprot", $dbi);
89  if($reactome_source_id < 1 || $transcript_reactome_source_id < 1 || $gene_reactome_source_id < 1){
90  die "Could not find source id for reactome sources???\n";
91  }
92  else{
93  print "Source_id = $reactome_source_id\n";
94  print "Transcript_source_id = $transcript_reactome_source_id\n";
95  print "Gene_source_id = $gene_reactome_source_id\n";
96  }
97 
98  if($reactome_uniprot_source_id < 1){
99  die "Could not find source id for reactome uniprot???\n";
100  }
101  else{
102  print "Source_id = $reactome_uniprot_source_id\n";
103  }
104 
105  my (%uniprot) = %{$self->get_valid_codes("uniprot/",$species_id, $dbi)};
106  my $is_uniprot = 0;
107 
108  foreach my $file (@$files) {
109  my $reactome_io = $self->get_filehandle($file);
110  if ($file =~ /UniProt/) { $is_uniprot = 1; }
111  # Example line:
112  # ENSG00000138685 REACT_111045 http://www.reactome.org/PathwayBrowser/#REACT_111045 Developmental Biology TAS Homo sapiens
113  while (my $line = $reactome_io->getline() ) {
114  chomp $line;
115  $line =~ s/\r//g;
116 
117  my ($ensembl_stable_id, $reactome_id, $url, $description, $evidence, $species) = split /\t+/,$line;
118  if ($description!~ /^[A-Za-z0-9_,\(\)\/\-\.:\+'&;"\/\?%>\s\[\]]+$/) { next; }
119 
120  $species =~ s/\s/_/;
121  $species = lc($species);
122  if ( $alias2species_id{$species} ){
123  $parsed_count++;
124 
125  # Attempt to guess the object_type based on the stable id
126  # Some entries just don't match on stable id, so warn but do not die
127  # For example:
128  # 00000074047 REACT_268323 http://www.reactome.org/PathwayBrowser/#REACT_268323 Hedgehog 'off' state TAS Homo sapiens
129  my $type;
130  my $info_type = 'DIRECT';
131  if ($is_uniprot) {
132  if (defined($uniprot{$ensembl_stable_id})) {
133  # First check if it is a uniprot id
134  foreach my $xref_id (@{$uniprot{$ensembl_stable_id}}){
135  $self->add_dependent_xref({ master_xref_id => $xref_id,
136  acc => $reactome_id,
137  label => $reactome_id,
138  desc => $description,
139  source_id => $reactome_uniprot_source_id,
140  dbi => $dbi,
141  species_id => $species_id} );
142  }
143  $info_type = 'DEPENDENT';
144  }
145  }
146  elsif ($ensembl_stable_id =~ /G[0-9]*$/) {
147  $type = 'gene';
148  $reactome_source_id = $gene_reactome_source_id;
149  }
150  elsif ($ensembl_stable_id =~ /T[0-9]*$/) {
151  $type = 'transcript';
152  $reactome_source_id = $transcript_reactome_source_id;
153  }
154  elsif ($ensembl_stable_id =~ /P[0-9]*$/) { $type = 'translation'; }
155  else {
156  # Is not in Uniprot and does not match Ensembl stable id format
157  print STDERR "Could not find type for $ensembl_stable_id\n";
158  $err_count++;
159  next;
160  }
161 
162  # Add new entry for reactome xref
163  # as well as direct xref to ensembl stable id
164  my $xref_id = $self->add_xref({ acc => $reactome_id,
165  label => $reactome_id,
166  desc => $description,
167  info_type => $info_type,
168  source_id => $reactome_source_id,
169  dbi => $dbi,
170  species_id => $species_id} );
171 
172  $self->add_direct_xref($xref_id, $ensembl_stable_id, $type, $dbi) if $type;
173  }
174  }
175  }
176 
177  print "$parsed_count entries processed\n$err_count not found\n";
178  return 0;
179 }
180 
181 
182 1;
map
public map()
XrefParser::BaseParser
Definition: BaseParser.pm:8
run
public run()
XrefParser::Database
Definition: Database.pm:8