3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::UniProtVarSplicParser;
22 # Parse UniProt alternative splice files
31 # UniProtVarSplic file format: fasta, e.g.
33 #>P48347-2|14310_ARATH Isoform 2 of P48347 - Arabidopsis thaliana (Mouse-ear cress)
34 #MENEREKQVYLAKLSEQTERYDEMVEAMKKVAQLDVELTVEERNLVSVGYKNVIGARRAS
35 #WRILSSIEQKEESKGNDENVKRLKNYRKRVEDELAKVCNDILSVIDKHLIPSSNAVESTV
36 #FFYKMKGDYYRYLAEFSSGAERKEAADQSLEAYKAAVAAAENGLAPTHPVRLGLALNFSV
37 #FYYEILNSPESACQLAKQAFDDAIAELDSLNEESYKDSTLIMQLLRDNLTLWTSDLNEEG
41 my ($self, $ref_arg) = @_;
42 my $source_id = $ref_arg->{source_id};
43 my $species_id = $ref_arg->{species_id};
44 my $files = $ref_arg->{files};
45 my $release_file = $ref_arg->{rel_file};
46 my $verbose = $ref_arg->{verbose};
48 if((!defined $source_id) or (!defined $species_id) or (!defined $files) or (!defined $release_file)){
49 croak
"Need to pass source_id, species_id, files and rel_file as pairs";
53 my $file = @{$files}[0];
59 my $file_io = $self->get_filehandle($file);
61 if ( !defined $file_io ) {
62 print STDERR
"ERROR: Could not open $file\n";
66 my %swiss = %{ $self->get_valid_codes(
"uniprot", $species_id ) };
68 print scalar(%swiss).
" uniprot entries will be used as tests\n" if($verbose);
70 while ( $_ = $file_io->getline() ) {
73 my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn(
"Can't parse FASTA entry: $_\n");
76 my ($accession, @description) = split /\|/, $header;
77 my $description = join(
" ", @description);
79 my ($original, $extension) = split/-/, $accession;
81 if(defined($swiss{$original})){
82 # make sequence into one long string
85 # build the xref object and store it
86 $xref->{ACCESSION} = $accession;
87 $xref->{LABEL} = $accession;
88 $xref->{DESCRIPTION} = $description;
89 $xref->{SEQUENCE} = $sequence;
90 $xref->{SOURCE_ID} = $source_id;
91 $xref->{SPECIES_ID} = $species_id;
92 $xref->{SEQUENCE_TYPE} =
'peptide';
93 $xref->{STATUS} =
'experimental';
104 print $missed.
" ignored as original uniprot not found in database\n" if($verbose);
105 print scalar(@xrefs) .
" UniProtVarSplic xrefs succesfully parsed\n" if($verbose);
107 $self->upload_xref_object_graphs(\@xrefs);
109 if ( defined $release_file ) {
110 # Parse and apply the Swiss-Prot release info
111 # from $release_file.
112 my $release_io = $self->get_filehandle($release_file);
113 while ( defined( my $line = $release_io->getline() ) ) {
114 if ( $line =~ m#(UniProtKB/Swiss-Prot Release .*)# ) {
115 print
"Swiss-Prot release is '$1'\n" if($verbose);
116 $self->set_release( $source_id, $1 );
119 $release_io->close();