my $self = shift;
my $matrix = shift;
my $mappings = shift;
my $cache_file = shift; # base name, extension '.ser' will be added
# argument checks
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
unless ($mappings and
$mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
}
throw('Need a cache file name.') unless ($cache_file);
my $dump_path = path_append($self->conf->param('basedir'), 'matrix');
$cache_file .= '.ser';
-DUMP_PATH => $dump_path,
-CACHE_FILE => $cache_file,
-AUTO_LOAD => 1,
);
# if we already found a saved matrix, just return it
if ($shrinked_matrix->loaded) {
$self->logger->info("Read existing scoring matrix from $cache_file.\n");
} else {
# create lookup hashes for sources and targets in the MappingList
my %sources = ();
my %targets = ();
foreach my $entry (@{ $mappings->get_all_Entries }) {
$sources{$entry->source} = 1;
$targets{$entry->target} = 1;
}
# add all entries to shrinked matrix which are not in the MappingList
foreach my $entry (@{ $matrix->get_all_Entries }) {
unless ($sources{$entry->source} or $targets{$entry->target}) {
}
}
}
# log shrinking stats
$self->logger->info('Sources '.$matrix->get_source_count.' --> '.
$shrinked_matrix->get_source_count."\n");
$self->logger->info('Targets '.$matrix->get_target_count.' --> '.
$shrinked_matrix->get_target_count."\n");
$self->logger->info('Entries '.$matrix->get_entry_count.' --> '.
$shrinked_matrix->get_entry_count."\n");
$self->logger->info('New mappings: '.$mappings->get_entry_count."\n\n");
return $shrinked_matrix;
}