ensembl-hive  2.8.1
MGIParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 =head1 NAME
21 
23 
24 =head1 DESCRIPTION
25 
26 A parser class to parse the MGI (official) source,
27 creating a DIRECT xref between MGI accession and ensembl mouse gene stable id ENSMUSG*
28 
29 -species = mus_musculus
30 -species_id = 10090
31 -data_uri = http://www.informatics.jax.org/downloads/reports/MRK_ENSEMBL.rpt
32 -file_format = TSV
33 -columns = [accession symbol name position chrom ens_gene_stableid] ##ignore other columns
34 
35 =head1 SYNOPSIS
36 
37  my $parser = XrefParser::MGIParser->new($db->dbh);
38  $parser->run({
39  source_id => 55,
40  species_id => 10090,
41  files => ["MRK_ENSEMBL.rpt"],
42  });
43 =cut
44 
45 package XrefParser::MGIParser;
46 
47 use strict;
48 use warnings;
49 use Carp;
50 use Text::CSV;
51 
52 use parent qw( XrefParser::BaseParser );
53 
54 =head2 run
55  Arg [1] : HashRef standard list of arguments from ParseSource
56  Example : $mgi_parser->run({ ... });
57  Description: Runs the MGIParser
58  Return type: 0 on success
59  Exceptions : throws on all processing errors
60  Caller : ParseSource in the xref pipeline
61 =cut
62 
63 sub run {
64 
65  my ( $self, $ref_arg ) = @_;
66  my $source_id = $ref_arg->{source_id};
67  my $species_id = $ref_arg->{species_id};
68  my $files = $ref_arg->{files};
69  my $verbose = $ref_arg->{verbose} // 0;
70  my $dbi = $ref_arg->{dbi} // $self->dbi;
71 
72  if ( ( !defined $source_id )
73  or ( !defined $species_id )
74  or ( !defined $files ) )
75  {
76  confess 'Need to pass source_id, species_id and files as pairs';
77  }
78 
79  my $file = @{$files}[0];
80 
81  my $file_io = $self->get_filehandle($file);
82  if ( !defined $file_io ) {
83  confess "Could not open $file\n";
84  }
85 
86  #synonyms; move this to SynonymAdaptor?!
87  my $syn_hash = $self->get_ext_synonyms( 'MGI', $dbi );
88 
89  #Init input file
90  my $input_file = Text::CSV->new(
91  {
92  sep_char => "\t",
93  empty_is_undef => 1,
94  strict => 1,
95  allow_loose_quotes => 1,
96  }
97  ) or confess "Cannot use file $file: " . Text::CSV->error_diag();
98 
99  my $count = 0;
100  my $syn_count = 0;
101 
102  while ( my $data = $input_file->getline($file_io) ) {
103  my $acc = $data->[0];
104  my $ensid = $data->[5];
105 
106  my $xref_id = $self->add_xref(
107  {
108  acc => $acc,
109  version => 0,
110  label => $data->[1],
111  desc => $data->[2],
112  source_id => $source_id,
113  species_id => $species_id,
114  info_type => 'DIRECT',
115  dbi => $dbi,
116  }
117  );
118 
119  $self->add_direct_xref( $xref_id, $ensid, 'Gene', undef, $dbi );
120  if ( exists $syn_hash->{$acc} ) {
121  foreach my $syn ( @{ $syn_hash->{$acc} } ) {
122  $self->add_to_syn( $acc, $source_id, $syn, $species_id, $dbi );
123  $syn_count += 1;
124  }
125  }
126  $count += 1;
127 
128  }
129  $input_file->eof
130  || confess "Error parsing file $file: " . $input_file->error_diag();
131  $file_io->close();
132 
133  if ($verbose) {
134  print "$count direct MGI xrefs added\n";
135  print $syn_count. " synonyms added\n";
136  }
137  return 0;
138 
139 }
140 
141 1;
accession
public accession()
XrefParser::BaseParser
Definition: BaseParser.pm:8
XrefParser::MGIParser
Definition: MGIParser.pm:26
run
public run()
XrefParser::MGIParser::run
public run()