3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::ChecksumParser;
22 # Input format looks like:
24 # UPI0001B45C00 71B80D7A684B1F2DEDDA7B5AEE1D029E
25 # UPI0002473BEA 4542D97F3AB3F7B656ABB941AED3F2BB
26 # UPI00024743AF A69E7EEE820CA54100AD43E86BE823E4
31 use English qw( -no_match_vars );
35 my $TABLE_NAME =
'checksum_xref';
38 my ($self, $ref_arg) = @_;
40 my $source_id = $ref_arg->{source_id};
41 my $species_id = $ref_arg->{species_id};
42 my $files = $ref_arg->{files};
43 my $verbose = $ref_arg->{verbose};
45 if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
46 croak
"Need to pass source_id, species_id and files as pairs";
50 # FIXME: this will fail if the input file is in a read-only directory (ENSCORESW-3197)
51 my $target_file = $files->[0].
'.mysqlinput';
52 my $input_fh = $self->get_filehandle($files->[0]);
54 print
"Target file '${target_file}' already exists; removing\n" if $verbose;
57 my $output_fh = IO::File->new($target_file,
'w')
58 || croak
"Failed to open ${target_file} for writing: ${OS_ERROR}";
60 $self->_transfer_contents($input_fh, $output_fh, $source_id);
65 $self->_load_table($target_file, $verbose, $source_id);
70 sub _transfer_contents {
71 my ($self, $input_fh, $output_fh, $source_id) = @_;
72 my $dbh = $self->dbi();
73 my ($counter) = $dbh->selectrow_array(
'select max(checksum_xref_id) from '.$TABLE_NAME );
74 while(my $line = <$input_fh>) {
76 my ($upi, $checksum) = split(/\s+/, $line);
78 # Use an ID one higher than the last. Obvious? Perhaps - except before
79 # the commit adding this comment the code only incremented $counter
83 my @output = ($counter, $source_id, $upi, $checksum);
84 print $output_fh join(
"\t", @output);
85 print $output_fh
"\n";
91 my ($self, $file, $verbose, $source_id) = @_;
92 my $dbh = $self->dbi();
93 my ($count) = $dbh->selectrow_array(
'select count(*) from '.$TABLE_NAME .
' WHERE source_id = ' . $source_id);
95 print
"'$TABLE_NAME' has rows for $source_id; deleting\n" if $verbose;
96 $dbh->do(
'delete from ' . $TABLE_NAME .
' WHERE source_id = ' . $source_id);
98 print
"Loading data into '$TABLE_NAME' from '$file'\n" if $verbose;
99 my $load = sprintf(q{LOAD DATA LOCAL INFILE
'%s' INTO TABLE %s}, $file, $TABLE_NAME);
101 print
"Finished loading data into '$TABLE_NAME'\n" if $verbose;