my $self = shift;
my $mappings = shift;
my $type = shift;
unless ($mappings and
$mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
throw("Need a Bio::EnsEMBL::IdMapping::MappingList of ${type}s.");
}
# generate a new mapping_session and write all mapping_session data to a file
$self->generate_mapping_session;
$self->logger->info("== Stable ID mapping for $type...\n\n", 0, 'stamped');
# check if there are any objects of this type at all
my %all_sources = %{ $self->cache->get_by_name("${type}s_by_id", 'source') };
my %all_targets = %{ $self->cache->get_by_name("${type}s_by_id", 'target') };
unless (scalar(keys %all_sources)) {
$self->logger->info("No cached ${type}s found.\n\n");
return;
}
my %stats =
map { $_ => 0 }
qw(mapped new lost);
# create some lookup hashes from the mappings
my %sources_mapped = ();
my %targets_mapped = ();
my %scores_by_target = ();
foreach my $e (@{ $mappings->get_all_Entries }) {
$sources_mapped{$e->source} = $e->target;
$targets_mapped{$e->target} = $e->source;
$scores_by_target{$e->target} = $e->score;
}
# determine starting stable ID for new assignments
my $new_stable_id = $self->stable_id_generator->initial_stable_id($type);
#
# assign mapped and new stable IDs
#
foreach my $tid (keys %all_targets) {
my $t_obj = $all_targets{$tid};
# a mapping exists, assign stable ID accordingly
if (my $sid = $targets_mapped{$tid}) {
my $s_obj = $all_sources{$sid};
# set target's stable ID and created_date
$t_obj->stable_id($s_obj->stable_id);
$t_obj->created_date($s_obj->created_date);
# calculate and set version
my $old_version = $s_obj->version();
my $new_version = $self->stable_id_generator->calculate_version($s_obj, $t_obj) ;
$t_obj->version($new_version);
# change modified_date if version changed
if ($old_version == $new_version) {
$t_obj->modified_date($s_obj->modified_date);
} else {
$t_obj->modified_date($self->mapping_session_date);
# If version changed, score cannot be 1
if ($scores_by_target{$tid} == 1) {
$scores_by_target{$tid} = 0.99;
}
}
# create a stable_id_event entry (not for exons)
unless ( $type eq 'exon' ) {
# Only add events when something changed.
if ( !( $s_obj->stable_id eq $t_obj->stable_id &&
$s_obj->version == $t_obj->version &&
$scores_by_target{$tid} > 0.9999 ) )
{
my $key = join( "\t",
$s_obj->stable_id, $s_obj->version,
$t_obj->stable_id, $t_obj->version,
$self->mapping_session_id, $type,
$scores_by_target{$tid} );
$self->add_stable_id_event( 'new', $key );
}
}
# add to debug hash
push @{ $debug_mappings{$type} }, [ $sid, $tid, $t_obj->stable_id ];
# stats
$stats{'mapped'}++;
# no mapping was found, assign a new stable ID
} else {
$t_obj->stable_id($new_stable_id);
$t_obj->version(1);
$t_obj->created_date($self->mapping_session_date);
$t_obj->modified_date($self->mapping_session_date);
# create a stable_id_event entry (not for exons)
unless ($type eq 'exon') {
my $key = join("\t",
'\N',
0,
$t_obj->stable_id,
$t_obj->version,
$self->mapping_session_id,
$type,
0
);
$self->add_stable_id_event('new', $key);
}
# increment the stable Id (to be assigned to the next unmapped object)
$new_stable_id = $self->stable_id_generator->increment_stable_id(
$new_stable_id);
# stats
$stats{'new'}++;
}
}
#
# deletion events for lost sources
#
my $fh;
if ($type eq 'gene' or $type eq 'transcript') {
$fh = $self->get_filehandle("${type}s_lost.txt", 'debug');
}
foreach my $sid (keys %all_sources) {
my $s_obj = $all_sources{$sid};
# no mapping exists, add deletion event
unless ($sources_mapped{$sid}) {
unless ($type eq 'exon') {
my $key = join("\t",
$s_obj->stable_id,
$s_obj->version,
'\N',
0,
$self->mapping_session_id,
$type,
0
);
$self->add_stable_id_event('new', $key);
}
# stats
my $status;
$stats{'lost'}++;
# log lost genes and transcripts (for debug purposes)
#
# The Java app did this with a separate method
# (StableIdMapper.dumpLostGeneAndTranscripts()) which also claims to log
# losses due to merge. Since at that point this data isn't available yet
# the logging can be done much more efficient here
if ($type eq 'gene' or $type eq 'transcript') {
print $fh $s_obj->stable_id, "\t$status\n";
}
}
}
close($fh) if (defined($fh));
#
# write stable IDs to file
#
$self->write_stable_ids_to_file($type, \%all_targets);
# also generate and write stats to file
$self->generate_mapping_stats($type, \%stats);
$self->logger->info("Done.\n\n");
}