my $self = shift;
my $matrix = shift;
my $mapping_name = shift;
# argument checks
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
throw('Need a name for serialising the mapping.') unless ($mapping_name);
# Create a new MappingList object. Specify AUTO_LOAD to load serialised
# existing mappings if found
my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
-DUMP_PATH => $dump_path,
-CACHE_FILE => "${mapping_name}.ser",
-AUTO_LOAD => 1,
);
# checkpoint test: return a previously stored MappingList
if ($mappings->loaded) {
$self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
return $mappings;
}
my $sources_done = {};
my $targets_done = {};
# sort scoring matrix entries by descending score
my @sorted_entries = sort { $b->score <=> $a->score ||
$a->source <=> $b->source || $a->target <=> $b->target }
while (my $entry = shift(@sorted_entries)) {
# $self->logger->debug("\nxxx4 ".$entry->to_string." ");
# we already found a mapping for either source or target yet
next if ($sources_done->{$entry->source} or
$targets_done->{$entry->target});
#$self->logger->debug('d');
my $other_sources = [];
my $other_targets = [];
my %source_transcripts = ();
my %target_transcripts = ();
if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
#$self->logger->debug('a');
$other_sources = $self->filter_sources($other_sources, $sources_done);
$other_targets = $self->filter_targets($other_targets, $targets_done);
$source_transcripts{$self->cache->get_by_key('transcripts_by_exon_id',
'source', $entry->source)} = 1;
$target_transcripts{$self->cache->get_by_key('transcripts_by_exon_id',
'target', $entry->target)} = 1;
foreach my $other_source (@{ $other_sources }) {
$source_transcripts{$self->cache->get_by_key('transcripts_by_exon_id',
'source', $other_source)} = 1;
}
foreach my $other_target (@{ $other_targets }) {
$target_transcripts{$self->cache->get_by_key('transcripts_by_exon_id',
'target', $other_target)} = 1;
}
# only add mapping if only one source and target gene involved
if (scalar(keys %source_transcripts) == 1 and scalar(keys %target_transcripts) == 1) {
#$self->logger->debug('O');
$mappings->add_Entry($entry);
}
} else {
#$self->logger->debug('A');
# this is the best mapping, add it
$mappings->add_Entry($entry);
}
$sources_done->{$entry->source} = 1;
$targets_done->{$entry->target} = 1;
}
# create checkpoint
$mappings->write_to_file;
return $mappings;
}