3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
41 package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric;
45 no warnings
'uninitialized';
62 my $exon_scores = shift;
64 $self->logger->info(
"Basic exon mapping...\n", 0,
'stamped');
66 $mappings = $self->basic_mapping($exon_scores,
"exon_mappings$num");
68 my $new_scores = $esb->create_shrinked_matrix($exon_scores, $mappings,
71 return ($new_scores, $mappings);
76 # reduce score for mappings of exons which do not belong to mapped
78 # (ie where source exon transcript does not map target exon transcript)
80 sub mapped_transcript {
85 my $exon_scores = shift;
86 my $transcript_mappings = shift;
88 $self->logger->info(
"Exons in mapped transcript...\n", 0,
'stamped');
90 unless ($exon_scores->loaded) {
91 $esb->non_mapped_transcript_rescore($exon_scores, $transcript_mappings);
92 $exon_scores->write_to_file;
95 $mappings = $self->basic_mapping($exon_scores,
"exon_mappings$num");
97 my $new_scores = $esb->create_shrinked_matrix($exon_scores, $mappings,
100 return ($new_scores, $mappings);
103 sub single_transcript {
107 my $mappings = shift;
108 my $exon_scores = shift;
110 $self->logger->info(
"Exons in single transcript...\n", 0,
'stamped');
112 unless ($exon_scores->loaded) {
113 $exon_scores->write_to_file;
116 $mappings = $self->same_transcript_exon_mapping($exon_scores,
"exon_mappings$num");
118 my $new_scores = $esb->create_shrinked_matrix($exon_scores, $mappings,
121 return ($new_scores, $mappings);
124 sub same_transcript_exon_mapping {
127 my $mapping_name = shift;
131 $matrix->isa(
'Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
132 throw(
'Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
135 throw(
'Need a name for serialising the mapping.') unless ($mapping_name);
137 # Create a new MappingList object. Specify AUTO_LOAD to load serialised
138 # existing mappings if found
139 my $dump_path = path_append($self->conf->param(
'basedir'),
'mapping');
142 -DUMP_PATH => $dump_path,
143 -CACHE_FILE =>
"${mapping_name}.ser",
147 # checkpoint test: return a previously stored MappingList
148 if ($mappings->loaded) {
149 $self->logger->info(
"Read existing mappings from ${mapping_name}.ser.\n");
153 my $sources_done = {};
154 my $targets_done = {};
156 # sort scoring matrix entries by descending score
157 my @sorted_entries = sort { $b->score <=> $a->score ||
158 $a->source <=> $b->source || $a->target <=> $b->target }
161 while (my $entry = shift(@sorted_entries)) {
163 # $self->logger->debug("\nxxx4 ".$entry->to_string." ");
165 # we already found a mapping for either source or target yet
166 next
if ($sources_done->{$entry->source} or
167 $targets_done->{$entry->target});
169 #$self->logger->debug('d');
171 my $other_sources = [];
172 my $other_targets = [];
173 my %source_transcripts = ();
174 my %target_transcripts = ();
176 if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
177 #$self->logger->debug('a');
179 $other_sources = $self->filter_sources($other_sources, $sources_done);
180 $other_targets = $self->filter_targets($other_targets, $targets_done);
182 $source_transcripts{$self->cache->get_by_key(
'transcripts_by_exon_id',
183 'source', $entry->source)} = 1;
184 $target_transcripts{$self->cache->get_by_key(
'transcripts_by_exon_id',
185 'target', $entry->target)} = 1;
187 foreach my $other_source (@{ $other_sources }) {
188 $source_transcripts{$self->cache->get_by_key(
'transcripts_by_exon_id',
189 'source', $other_source)} = 1;
192 foreach my $other_target (@{ $other_targets }) {
193 $target_transcripts{$self->cache->get_by_key(
'transcripts_by_exon_id',
194 'target', $other_target)} = 1;
197 # only add mapping if only one source and target gene involved
198 if (scalar(keys %source_transcripts) == 1 and scalar(keys %target_transcripts) == 1) {
199 #$self->logger->debug('O');
200 $mappings->add_Entry($entry);
204 #$self->logger->debug('A');
206 # this is the best mapping, add it
207 $mappings->add_Entry($entry);
210 $sources_done->{$entry->source} = 1;
211 $targets_done->{$entry->target} = 1;
215 $mappings->write_to_file;
222 # selectively rescore by penalising scores between exons with
223 # different internalIDs
229 my $mappings = shift;
230 my $exon_scores = shift;
232 $self->logger->info(
"Retry with internalID disambiguation...\n",
235 if ( !$exon_scores->loaded() ) {
236 $esb->internal_id_rescore($exon_scores);
237 $exon_scores->write_to_file();
240 $mappings = $self->basic_mapping( $exon_scores,
"exon_mappings$num" );
243 $esb->create_shrinked_matrix( $exon_scores, $mappings,
246 return ( $new_scores, $mappings );