ensembl-hive  2.7.0
ZFINDescParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =head1 CONTACT
19 
20  Please email comments or questions to the public Ensembl
21  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
22 
23  Questions may also be sent to the Ensembl help desk at
24  <http://www.ensembl.org/Help/Contact>.
25 
26 =head1 NAME
27 
28 Bio::EnsEMBL::Xref::Parser::ZFINDescParser
29 
30 =head1 DESCRIPTION
31 
32 A parser class to parse the ZFIN file for descriptions.
33 
34 -species = danio_rerio
35 -species_id = 7955
36 -data_uri = ftp://zfin.org/pub/transfer/MEOW/zfin_genes.txt
37 -file_format = TSV
38 -columns = [acc desc label ignored ignored]
39 
40 =head1 SYNOPSIS
41 
42  my $parser = Bio::EnsEMBL::Xref::Parser::ZFINDescParser->new(
43  source_id => 149,
44  species_id => 7955,
45  files => ['zfin_genes.txt'],
46  xref_dba => $xref_dba
47  );
48 
49  $parser->run();
50 
51 =cut
52 
53 
54 
55 package XrefParser::ZFINDescParser;
56 
57 use strict;
58 use warnings;
59 use Carp;
60 use Text::CSV;
61 
62 use parent qw( XrefParser::BaseParser );
63 
64 =head2 run
65  Description: Runs the ZFINDescParser
66  Return type: N/A
67  Caller : internal
68 =cut
69 
70 sub run {
71  my ($self, $ref_arg) = @_;
72 
73  my $source_id = $ref_arg->{source_id};
74  my $species_id = $ref_arg->{species_id};
75  my $files = $ref_arg->{files};
76  my $verbose = $ref_arg->{verbose} // 0;
77 
78  if ( (!defined $source_id) || (!defined $species_id) || (!defined $files) ) {
79  confess "Need to pass source_id, species_id and files as pairs";
80  }
81 
82  my $file = @{$files}[0];
83 
84 #e.g.
85 #ZDB-GENE-050102-6 WITHDRAWN:zgc:92147 WITHDRAWN:zgc:92147 0
86 #ZDB-GENE-060824-3 apobec1 complementation factor a1cf 0
87 #ZDB-GENE-090212-1 alpha-2-macroglobulin-like a2ml 15 ZDB-PUB-030703-1
88 
89 
90  my $count = 0;
91  my $withdrawn = 0;
92 
93  my $file_io = $self->get_filehandle($file);
94 
95  if ( !defined $file_io ) {
96  confess "Can't open ZFINDesc file '$file'\n";
97  }
98 
99  my $input_file = Text::CSV->new({
100  sep_char => "\t",
101  empty_is_undef => 1,
102  binary => 1
103  }) or confess "Cannot use file '$file': " . Text::CSV->error_diag();
104 
105 
106  # 2 extra columns are ignored
107  $input_file->column_names( [ 'zfin', 'desc', 'label'] );
108 
109  while ( my $data = $input_file->getline_hr( $file_io ) ) {
110  # skip if WITHDRAWN: this precedes both desc and label
111  if ( $data->{'label'} =~ /\A WITHDRAWN:/xms ) {
112  $withdrawn++;
113  }
114  else {
115  $self->add_xref({
116  acc => $data->{'zfin'},
117  label => $data->{'label'},
118  desc => $data->{'desc'},
119  source_id => $source_id,
120  species_id => $species_id,
121  info_type => "MISC"
122  });
123  $count++;
124  }
125  }
126 
127  $input_file->eof or confess "Error parsing file $file: " . $input_file->error_diag();
128  $file_io->close();
129 
130  if($verbose){
131  print "$count ZFINDesc xrefs added, $withdrawn withdrawn entries ignored\n";
132  }
133 
134  return 0;
135 }
136 
137 1;
XrefParser::ZFINDescParser
Definition: ZFINDescParser.pm:28
XrefParser::BaseParser
Definition: BaseParser.pm:8
run
public run()