3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefMapper::Methods::ChecksumBasic;
30 my $DEFAULT_BATCH_SIZE = 1000;
33 my ($class, @args) = @_;
34 my $self = bless({}, $class);
36 my ($mapper, $batch_size) = rearrange([qw(mapper batch_size)], @args);
38 throw 'No -MAPPER given' unless $mapper;
39 $batch_size = $DEFAULT_BATCH_SIZE unless $batch_size;
41 $self->mapper($mapper);
42 $self->batch_size($batch_size);
47 my ($self, $_mapper) = @_;
48 $self->{mapper} = $_mapper
if defined $_mapper;
49 return $self->{mapper};
53 my ($self, $batch_size) = @_;
54 $self->{batch_size} = $batch_size
if defined $batch_size;
55 return $self->{batch_size};
59 my ($self, $target, $source_id, $object_type, $db_url) = @_;
61 my $reader = $self->_get_sequence_parser($target);
64 my $batch_size = $self->batch_size();
66 while ( my $sequence = $reader->next_seq() ) {
67 push(@tmp_list, $sequence);
69 if( ($count % $batch_size) == 0) {
70 my $res = $self->perform_mapping(\@tmp_list, $source_id, $object_type, $db_url);
71 push(@results, @{$res});
72 $self->mapper()->log_progress(
"Finished batch mapping of %d sequences\n", $batch_size);
78 #Final mapping if there were some left over
80 $self->mapper()->log_progress(
"Finishing progess\n");
81 my $res = $self->perform_mapping(\@tmp_list, $source_id, $object_type, $db_url);
82 push(@results, @{$res});
91 my ($self, $sequences) = @_;
92 throw(
'Override to perform the mapping you require');
95 sub _get_sequence_parser {
96 my ($self, $target) = @_;
97 throw "Cannot find the file '${target}'" unless -f $target;
98 my $reader = Bio::SeqIO->new(-FILE => $target, -FORMAT =>
'fasta');
103 my ($self, $sequence) = @_;
104 my $digest = Digest::MD5->new();
105 $digest->add($sequence->seq());
106 return $digest->hexdigest();