3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2024] EMBL-European Bioinformatics Institute
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
24 Please email comments or questions to the
public Ensembl
25 developers list at <http:
27 Questions may also be sent to the Ensembl help desk at
42 package Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
46 no warnings
'uninitialized';
55 # scores are considered the same if (2.0 * (s1-s2))/(s1 + s2) < this
56 use constant SIMILAR_SCORE_RATIO => 0.01;
59 # find the highest unambiguous score for all sources and targets in a scoring
65 my $mapping_name = shift;
69 and $matrix->isa(
'Bio::EnsEMBL::IdMapping::ScoredMappingMatrix') )
71 throw(
'Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
74 throw(
'Need a name for serialising the mapping.')
75 unless ($mapping_name);
77 # Create a new MappingList object. Specify AUTO_LOAD to load
78 # serialised existing mappings if found
80 path_append( $self->conf->param(
'basedir'),
'mapping' );
84 -DUMP_PATH => $dump_path,
85 -CACHE_FILE =>
"${mapping_name}.ser",
88 # checkpoint test: return a previously stored MappingList
89 if ( $mappings->loaded ) {
91 "Read existing mappings from ${mapping_name}.ser.\n");
95 my $sources_done = {};
96 my $targets_done = {};
98 # sort scoring matrix entries by descending score
103 #my $idx = substr($mapping_name, -1);
105 while ( my $entry = shift(@sorted_entries) ) {
107 #$self->logger->debug("\nxxx$idx ".$entry->to_string." ");
109 # we already found a mapping for either source or target
111 if ( $sources_done->{ $entry->source }
112 or $targets_done->{ $entry->target } );
114 #$self->logger->debug('d');
116 # there's a better mapping for either source or target
118 if ( $self->higher_score_exists(
119 $entry, $matrix, $sources_done, $targets_done
122 #$self->logger->debug('h');
124 # check for ambiguous mappings; they are dealt with later
125 my $other_sources = [];
126 my $other_targets = [];
128 if ( $self->ambiguous_mapping( $entry, $matrix,
129 $other_sources, $other_targets ) )
131 #$self->logger->debug('a');
134 $self->filter_sources( $other_sources, $sources_done );
136 $self->filter_targets( $other_targets, $targets_done );
138 next
if ( scalar(@$other_sources) or scalar(@$other_targets) );
141 #$self->logger->debug('A');
143 # this is the best mapping, add it
144 $mappings->add_Entry($entry);
146 $sources_done->{ $entry->source } = 1;
147 $targets_done->{ $entry->target } = 1;
148 } ## end
while ( my $entry = shift...)
151 $mappings->write_to_file;
154 } ## end sub basic_mapping
156 sub higher_score_exists {
157 my ( $self, $entry, $matrix, $sources_done, $targets_done ) = @_;
159 my $source = $entry->source;
160 my $target = $entry->target;
161 my $score = $entry->score;
164 my $other_source ( @{ $matrix->get_sources_for_target($target) } )
166 if ( $other_source != $source
167 and !$sources_done->{$other_source}
168 and $score < $matrix->get_score( $other_source, $target ) )
175 my $other_target ( @{ $matrix->get_targets_for_source($source) } )
177 if ( $other_target != $target
178 and !$targets_done->{$other_target}
179 and $score < $matrix->get_score( $source, $other_target ) )
186 } ## end sub higher_score_exists
189 # find ambiguous mappings (see scores_similar() for definition)
191 sub ambiguous_mapping {
192 my ( $self, $entry, $matrix, $other_sources, $other_targets ) = @_;
194 my $source = $entry->source;
195 my $target = $entry->target;
196 my $score = $entry->score;
201 my $other_source ( @{ $matrix->get_sources_for_target($target) } )
203 my $other_score = $matrix->get_score( $other_source, $target );
205 if ( $other_source != $source
206 and ( $self->scores_similar( $score, $other_score )
207 or $score < $other_score ) )
210 push @{$other_sources}, $other_source;
215 my $other_target ( @{ $matrix->get_targets_for_source($source) } )
217 my $other_score = $matrix->get_score( $source, $other_target );
219 if ( $other_target != $target
220 and ( $self->scores_similar( $score, $other_score )
221 or $score < $other_score ) )
224 push @{$other_targets}, $other_target;
229 } ## end sub ambiguous_mapping
232 # rule for similarity taken from java code...
235 my ( $self, $s1, $s2 ) = @_;
237 # always give priority to exact matches over very similar ones
238 return 0
if ( $s1 == 1 and $s2 < 1 );
240 my $diff = $s1 - $s2;
241 $diff = -$diff
if ( $diff < 0 );
243 my $pc = 2*$diff/( $s1 + $s2 );
245 return ( $pc < SIMILAR_SCORE_RATIO );
249 my ( $self, $other_sources, $sources_done ) = @_;
251 unless ( scalar( @{$other_sources} )
252 and scalar( keys %{$sources_done} ) )
254 return $other_sources;
259 foreach my $e ( @{$other_sources} ) {
260 push @tmp, $e unless ( $sources_done->{$e} );
267 my ( $self, $other_targets, $targets_done ) = @_;
269 unless ( scalar( @{$other_targets} )
270 and scalar( keys %{$targets_done} ) )
272 return $other_targets;
277 foreach my $e ( @{$other_targets} ) {
278 push @tmp, $e unless ( $targets_done->{$e} );