ensembl-hive  2.8.1
ChecksumParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefParser::ChecksumParser;
21 
22 # Input format looks like:
23 #
24 # UPI0001B45C00 71B80D7A684B1F2DEDDA7B5AEE1D029E
25 # UPI0002473BEA 4542D97F3AB3F7B656ABB941AED3F2BB
26 # UPI00024743AF A69E7EEE820CA54100AD43E86BE823E4
27 
28 use strict;
29 use warnings;
30 use Carp;
31 use English qw( -no_match_vars );
32 use IO::File;
33 use base qw( XrefParser::BaseParser );
34 
35 my $TABLE_NAME = 'checksum_xref';
36 
37 sub run {
38  my ($self, $ref_arg) = @_;
39 
40  my $source_id = $ref_arg->{source_id};
41  my $species_id = $ref_arg->{species_id};
42  my $files = $ref_arg->{files};
43  my $verbose = $ref_arg->{verbose};
44 
45  if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
46  croak "Need to pass source_id, species_id and files as pairs";
47  }
48  $verbose ||=0;
49 
50  # FIXME: this will fail if the input file is in a read-only directory (ENSCORESW-3197)
51  my $target_file = $files->[0].'.mysqlinput';
52  my $input_fh = $self->get_filehandle($files->[0]);
53  if(-f $target_file) {
54  print "Target file '${target_file}' already exists; removing\n" if $verbose;
55  unlink $target_file;
56  }
57  my $output_fh = IO::File->new($target_file, 'w')
58  || croak "Failed to open ${target_file} for writing: ${OS_ERROR}";
59 
60  $self->_transfer_contents($input_fh, $output_fh, $source_id);
61 
62  close($input_fh);
63  close($output_fh);
64 
65  $self->_load_table($target_file, $verbose, $source_id);
66 
67  return;
68 }
69 
70 sub _transfer_contents {
71  my ($self, $input_fh, $output_fh, $source_id) = @_;
72  my $dbh = $self->dbi();
73  my ($counter) = $dbh->selectrow_array('select max(checksum_xref_id) from '.$TABLE_NAME );
74  while(my $line = <$input_fh>) {
75  chomp $line;
76  my ($upi, $checksum) = split(/\s+/, $line);
77 
78  # Use an ID one higher than the last. Obvious? Perhaps - except before
79  # the commit adding this comment the code only incremented $counter
80  # AFTER using it.
81  $counter += 1;
82 
83  my @output = ($counter, $source_id, $upi, $checksum);
84  print $output_fh join("\t", @output);
85  print $output_fh "\n";
86  }
87  return;
88 }
89 
90 sub _load_table {
91  my ($self, $file, $verbose, $source_id) = @_;
92  my $dbh = $self->dbi();
93  my ($count) = $dbh->selectrow_array('select count(*) from '.$TABLE_NAME . ' WHERE source_id = ' . $source_id);
94  if($count) {
95  print "'$TABLE_NAME' has rows for $source_id; deleting\n" if $verbose;
96  $dbh->do('delete from ' . $TABLE_NAME . ' WHERE source_id = ' . $source_id);
97  }
98  print "Loading data into '$TABLE_NAME' from '$file'\n" if $verbose;
99  my $load = sprintf(q{LOAD DATA LOCAL INFILE '%s' INTO TABLE %s}, $file, $TABLE_NAME);
100  $dbh->do($load);
101  print "Finished loading data into '$TABLE_NAME'\n" if $verbose;
102  return;
103 }
104 
105 1;
XrefParser::BaseParser
Definition: BaseParser.pm:8
run
public run()