ensembl-hive  2.7.0
PomBaseParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefParser::PomBaseParser;
21 
22 use strict;
23 use warnings;
24 use Carp;
25 use POSIX qw(strftime);
26 use File::Basename;
27 
28 use base qw( XrefParser::BaseParser );
29 
30 # --------------------------------------------------------------------------------
31 # Parse command line and run if being run directly
32 
33 sub run {
34 
35  my ($self, $ref_arg) = @_;
36  my $source_id = $ref_arg->{source_id};
37  my $species_id = $ref_arg->{species_id};
38  my $files = $ref_arg->{files};
39  my $verbose = $ref_arg->{verbose};
40  my $dbi = $ref_arg->{dbi};
41  $dbi = $self->dbi unless defined $dbi;
42 
43  if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
44  croak "Need to pass source_id, species_id and files as pairs";
45  }
46  $verbose |=0;
47 
48  my $file = @{$files}[0];
49 
50  my $gene_source_id = $self->get_source_id_for_source_name("PomBase_GENE", undef, $dbi);
51  my $transcript_source_id = $self->get_source_id_for_source_name("PomBase_TRANSCRIPT", undef, $dbi);
52 
53  my $pombase_io = $self->get_filehandle($file);
54 
55  if ( !defined $pombase_io ) {
56  print STDERR "ERROR: Could not open $file\n";
57  return 1; # 1 is an error
58  }
59 
60  my $xref_count =0;
61  my $syn_count =0;
62 
63  while ( $_ = $pombase_io->getline() ) {
64 
65  chomp;
66 
67  if ($_ =~ /^([^\t]+)\t([^\t]+)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t]*)$/) {
68 
69  my @line = split(m/\t/ms, $_);
70  my ($pombase_id, $name, $info_type, $biotype, $external_db_source, $desc, $ensembl_object_type, $synonyms) = undef;
71 
72  $pombase_id = $line[0];
73  $name = $line[1];
74  $info_type = $line[2];
75  $biotype = $line[3];
76  $external_db_source = $line[4];
77  $desc = $line[5];
78  $ensembl_object_type = $line[6];
79 
80  if (scalar @line == 8) {
81  $synonyms = $line[7];
82  }
83  # parse the lines corresponding to the gene entries
84  # and filter out lines corresponding to the CDS for example
85 
86  #print "$ensembl_object_type\n";
87  if ($ensembl_object_type eq 'Gene') {
88  my $ensembl_xref_id = $self->add_xref({ acc => $pombase_id,
89  label => $name,
90  desc => $desc,
91  source_id => $gene_source_id,
92  species_id => $species_id,
93  dbi => $dbi,
94  info_type => $info_type} );
95 
96  $self->add_direct_xref($ensembl_xref_id, $pombase_id, $ensembl_object_type, $info_type, $dbi);
97  } elsif ($ensembl_object_type eq 'Transcript') {
98  my $ensembl_xref_id = $self->add_xref({ acc => $pombase_id,
99  label => $name,
100  desc => $desc,
101  dbi => $dbi,
102  source_id => $transcript_source_id,
103  species_id => $species_id,
104  info_type => $info_type} );
105 
106  $self->add_direct_xref($ensembl_xref_id, $pombase_id, $ensembl_object_type, $info_type, $dbi);
107  }
108 
109  $xref_count++;
110  if ($synonyms) {
111  my (@syn) = split(/,/,$synonyms);
112  foreach my $synonym (@syn){
113  if ($verbose) {
114  print STDERR "adding synonym, $synonym\n";
115  }
116  $self->add_to_syn($pombase_id, $gene_source_id, $synonym, $species_id, $dbi);
117  $syn_count++;
118  }
119  }
120  } else {
121  if ($verbose) {
122  print STDERR "failed to parse line, $_\n\n";
123  }
124  }
125  }
126 
127  $pombase_io->close();
128 
129  print $xref_count." PomBase Xrefs added with $syn_count synonyms\n" if($verbose);
130  return 0; #successful
131 }
132 
133 1;
XrefParser::BaseParser
Definition: BaseParser.pm:8
run
public run()