ensembl-hive  2.7.0
UniProtVarSplicParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefParser::UniProtVarSplicParser;
21 
22 # Parse UniProt alternative splice files
23 
24 use strict;
25 use warnings;
26 use Carp;
27 use File::Basename;
28 
29 use base qw( XrefParser::BaseParser );
30 
31 # UniProtVarSplic file format: fasta, e.g.
32 
33 #>P48347-2|14310_ARATH Isoform 2 of P48347 - Arabidopsis thaliana (Mouse-ear cress)
34 #MENEREKQVYLAKLSEQTERYDEMVEAMKKVAQLDVELTVEERNLVSVGYKNVIGARRAS
35 #WRILSSIEQKEESKGNDENVKRLKNYRKRVEDELAKVCNDILSVIDKHLIPSSNAVESTV
36 #FFYKMKGDYYRYLAEFSSGAERKEAADQSLEAYKAAVAAAENGLAPTHPVRLGLALNFSV
37 #FYYEILNSPESACQLAKQAFDDAIAELDSLNEESYKDSTLIMQLLRDNLTLWTSDLNEEG
38 #DERTKGADEPQDEV
39 
40 sub run {
41  my ($self, $ref_arg) = @_;
42  my $source_id = $ref_arg->{source_id};
43  my $species_id = $ref_arg->{species_id};
44  my $files = $ref_arg->{files};
45  my $release_file = $ref_arg->{rel_file};
46  my $verbose = $ref_arg->{verbose};
47 
48  if((!defined $source_id) or (!defined $species_id) or (!defined $files) or (!defined $release_file)){
49  croak "Need to pass source_id, species_id, files and rel_file as pairs";
50  }
51  $verbose |=0;
52 
53  my $file = @{$files}[0];
54 
55  my @xrefs;
56 
57  local $/ = "\n>";
58 
59  my $file_io = $self->get_filehandle($file);
60 
61  if ( !defined $file_io ) {
62  print STDERR "ERROR: Could not open $file\n";
63  return 1; # 1 error
64  }
65 
66  my %swiss = %{ $self->get_valid_codes( "uniprot", $species_id ) };
67 
68  print scalar(%swiss)." uniprot entries will be used as tests\n" if($verbose);
69  my $missed = 0;
70  while ( $_ = $file_io->getline() ) {
71  my $xref;
72 
73  my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n");
74 
75  # deconstruct header
76  my ($accession, @description) = split /\|/, $header;
77  my $description = join(" ", @description);
78 
79  my ($original, $extension) = split/-/, $accession;
80 
81  if(defined($swiss{$original})){
82  # make sequence into one long string
83  $sequence =~ s/\n//g;
84 
85  # build the xref object and store it
86  $xref->{ACCESSION} = $accession;
87  $xref->{LABEL} = $accession;
88  $xref->{DESCRIPTION} = $description;
89  $xref->{SEQUENCE} = $sequence;
90  $xref->{SOURCE_ID} = $source_id;
91  $xref->{SPECIES_ID} = $species_id;
92  $xref->{SEQUENCE_TYPE} = 'peptide';
93  $xref->{STATUS} = 'experimental';
94 
95  push @xrefs, $xref;
96  }
97  else{
98  $missed++;
99  }
100  }
101 
102  $file_io->close();
103 
104  print $missed." ignored as original uniprot not found in database\n" if($verbose);
105  print scalar(@xrefs) . " UniProtVarSplic xrefs succesfully parsed\n" if($verbose);
106 
107  $self->upload_xref_object_graphs(\@xrefs);
108 
109  if ( defined $release_file ) {
110  # Parse and apply the Swiss-Prot release info
111  # from $release_file.
112  my $release_io = $self->get_filehandle($release_file);
113  while ( defined( my $line = $release_io->getline() ) ) {
114  if ( $line =~ m#(UniProtKB/Swiss-Prot Release .*)# ) {
115  print "Swiss-Prot release is '$1'\n" if($verbose);
116  $self->set_release( $source_id, $1 );
117  }
118  }
119  $release_io->close();
120  }
121 
122 
123  return 0;
124 }
125 
126 1;
XrefParser::BaseParser
Definition: BaseParser.pm:8
run
public run()