3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
26 A parser
class to parse the Xenbase source file.
28 -species = xenopus_tropicalis
32 -columns = [acc label desc stable_id]
41 files => [
"xenopusjamboree.txt"],
46 package XrefParser::XenopusJamboreeParser;
65 my ( $self, $ref_arg ) = @_;
66 my $source_id = $ref_arg->{source_id};
67 my $species_id = $ref_arg->{species_id};
68 my $files = $ref_arg->{files};
69 my $verbose = $ref_arg->{verbose}
70 my $dbi = $ref_arg->{dbi}
72 if ( ( !defined $source_id )
73 or ( !defined $species_id )
74 or ( !defined $files ) )
76 confess
'Need to pass source_id, species_id and files as pairs';
79 my $file = @{$files}[0];
81 my $file_io = $self->get_filehandle($file);
82 if ( !defined $file_io ) {
83 confess
"Could not open $file\n";
86 my $input_file = Text::CSV->new({
91 }) || confess
"Cannot use file $file: " . Text::CSV->error_diag();
94 while ( my $data = $input_file->getline($file_io) ) {
97 my ( $accession, $label, $desc, $stable_id ) = @{$data};
99 # If there is a description, trim it a bit
100 if ( defined $desc ) {
101 $desc = parse_description( $desc );
104 if ( $label eq
'unnamed' ) {
108 $self->add_to_direct_xrefs({
109 stable_id => $stable_id,
115 source_id => $source_id,
116 species_id => $species_id,
122 || confess
"Error parsing file $file: " . $input_file->error_diag();
126 print $count .
" XenopusJamboreeParser xrefs succesfully parsed\n";
133 =head2 parse_description
134 Description: Extract description information from
135 Xenopus downloaded file
141 sub parse_description {
144 # Remove some provenance information encoded in the description
145 $desc =~ s{ \s* \[ .* \] }{}msx;
147 # Remove labels of type 5 of 14 from the description
148 $desc =~ s{ , \s+\d+\s+ of \s+\d+ }{}msx;