ensembl-hive  2.8.1
XenopusJamboreeParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 =head1 NAME
21 
23 
24 =head1 DESCRIPTION
25 
26 A parser class to parse the Xenbase source file.
27 
28 -species = xenopus_tropicalis
29 -species_id = 8364
30 -data_uri = ftp://ftp.xenbase.org/pub/GenePageReports/GenePageEnsemblModelMapping.txt
31 -file_format = TSV
32 -columns = [acc label desc stable_id]
33 
34 
35 =head1 SYNOPSIS
36 
37  my $parser = XrefParser::XenopusJamboreeParser->new($db->dbh);
38  $parser->run({
39  source_id => 150,
40  species_id => 8364,
41  files => ["xenopusjamboree.txt"],
42  });
43 
44 =cut
45 
46 package XrefParser::XenopusJamboreeParser;
47 
48 use strict;
49 use warnings;
50 
51 use Carp;
52 use Text::CSV;
53 
54 use parent qw( XrefParser::BaseParser );
55 
56 
57 =head2 run
58  Description: Runs the XenopusJamboreeParser
59  Return type: N/A
60  Caller : internal
61 
62 =cut
63 
64 sub run {
65  my ( $self, $ref_arg ) = @_;
66  my $source_id = $ref_arg->{source_id};
67  my $species_id = $ref_arg->{species_id};
68  my $files = $ref_arg->{files};
69  my $verbose = $ref_arg->{verbose} // 0;
70  my $dbi = $ref_arg->{dbi} // $self->dbi;
71 
72  if ( ( !defined $source_id )
73  or ( !defined $species_id )
74  or ( !defined $files ) )
75  {
76  confess 'Need to pass source_id, species_id and files as pairs';
77  }
78 
79  my $file = @{$files}[0];
80 
81  my $file_io = $self->get_filehandle($file);
82  if ( !defined $file_io ) {
83  confess "Could not open $file\n";
84  }
85 
86  my $input_file = Text::CSV->new({
87  sep_char => "\t",
88  empty_is_undef => 1,
89  binary => 1,
90  verbatim => 1
91  }) || confess "Cannot use file $file: " . Text::CSV->error_diag();
92 
93  my $count = 0;
94  while ( my $data = $input_file->getline($file_io) ) {
95  tr/\r\n//d for @$data;
96 
97  my ( $accession, $label, $desc, $stable_id ) = @{$data};
98 
99  # If there is a description, trim it a bit
100  if ( defined $desc ) {
101  $desc = parse_description( $desc );
102  }
103 
104  if ( $label eq 'unnamed' ) {
105  $label = $accession;
106  }
107 
108  $self->add_to_direct_xrefs({
109  stable_id => $stable_id,
110  type => 'gene',
111  acc => $accession,
112  label => $label,
113  desc => $desc,
114  dbi => $dbi,
115  source_id => $source_id,
116  species_id => $species_id,
117  });
118  $count++;
119  }
120 
121  $input_file->eof
122  || confess "Error parsing file $file: " . $input_file->error_diag();
123  $file_io->close();
124 
125  if ($verbose) {
126  print $count . " XenopusJamboreeParser xrefs succesfully parsed\n";
127  }
128 
129  return 0;
130 } ## end sub run
131 
132 
133 =head2 parse_description
134  Description: Extract description information from
135  Xenopus downloaded file
136  Return type: N/A
137  Caller : internal
138 
139 =cut
140 
141 sub parse_description {
142  my ( $desc ) = @_;
143 
144  # Remove some provenance information encoded in the description
145  $desc =~ s{ \s* \[ .* \] }{}msx;
146 
147  # Remove labels of type 5 of 14 from the description
148  $desc =~ s{ , \s+\d+\s+ of \s+\d+ }{}msx;
149 
150  return $desc;
151 }
152 
153 
154 1;
XrefParser::XenopusJamboreeParser
Definition: XenopusJamboreeParser.pm:27
XrefParser::BaseParser
Definition: BaseParser.pm:8
XrefParser::XenopusJamboreeParser::run
public N run()
run
public run()