3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
42 package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric;
46 no warnings
'uninitialized';
63 my $transcript_scores = shift;
65 $self->logger->info(
"Basic transcript mapping...\n", 0,
'stamped');
67 $mappings = $self->basic_mapping($transcript_scores,
68 "transcript_mappings$num");
70 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
71 "transcript_matrix$num");
73 return ($new_scores, $mappings);
78 # handle cases with exact match but different translation
80 sub non_exact_translation {
85 my $transcript_scores = shift;
87 $self->logger->info(
"Exact Transcript non-exact Translation...\n", 0,
'stamped');
89 unless ($transcript_scores->loaded) {
90 $tsb->different_translation_rescore($transcript_scores);
91 $transcript_scores->write_to_file;
94 $mappings = $self->basic_mapping($transcript_scores,
95 "transcript_mappings$num");
97 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
98 "transcript_matrix$num");
100 return ($new_scores, $mappings);
105 # reduce score for mappings of transcripts which do not belong to mapped
112 my $mappings = shift;
113 my $transcript_scores = shift;
114 my $gene_mappings = shift;
116 $self->logger->info(
"Transcripts in mapped genes...\n", 0,
'stamped');
118 unless ($transcript_scores->loaded) {
119 $tsb->non_mapped_gene_rescore($transcript_scores, $gene_mappings);
120 $transcript_scores->write_to_file;
123 $mappings = $self->basic_mapping($transcript_scores,
124 "transcript_mappings$num");
126 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
127 "transcript_matrix$num");
129 return ($new_scores, $mappings);
133 # rescore by penalising scores between transcripts with different biotypes
139 my $mappings = shift;
140 my $transcript_scores = shift;
142 $self->logger->info(
"Retry with biotype disambiguation...\n",
145 unless ( $transcript_scores->loaded() ) {
146 $tsb->biotype_transcript_rescore($transcript_scores);
147 $transcript_scores->write_to_file();
150 my $new_mappings = $self->basic_mapping( $transcript_scores,
151 "transcript_mappings$num" );
154 $tsb->create_shrinked_matrix( $transcript_scores, $new_mappings,
155 "transcript_matrix$num" );
157 return ( $new_scores, $new_mappings );
161 # selectively rescore by penalising scores between transcripts with
162 # different internalIDs
168 my $mappings = shift;
169 my $transcript_scores = shift;
171 $self->logger->info(
"Retry with internalID disambiguation...\n", 0,
'stamped');
173 unless ($transcript_scores->loaded) {
174 $tsb->internal_id_rescore($transcript_scores);
175 $transcript_scores->write_to_file;
178 $mappings = $self->basic_mapping($transcript_scores,
179 "transcript_mappings$num");
181 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
182 "transcript_matrix$num");
184 return ($new_scores, $mappings);
189 # handle ambiguities between transcripts in single genes
195 my $mappings = shift;
196 my $transcript_scores = shift;
198 $self->logger->info(
"Transcripts in single genes...\n", 0,
'stamped');
200 unless ($transcript_scores->loaded) {
201 $transcript_scores->write_to_file;
204 $mappings = $self->same_gene_transcript_mapping($transcript_scores,
205 "transcript_mappings$num");
207 my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
208 "transcript_matrix$num");
210 return ($new_scores, $mappings);
215 # modified basic mapper that maps transcripts that are ambiguous within one gene
217 sub same_gene_transcript_mapping {
220 my $mapping_name = shift;
224 $matrix->isa(
'Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
225 throw(
'Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
228 throw(
'Need a name for serialising the mapping.') unless ($mapping_name);
230 # Create a new MappingList object. Specify AUTO_LOAD to load serialised
231 # existing mappings if found
232 my $dump_path = path_append($self->conf->param(
'basedir'),
'mapping');
235 -DUMP_PATH => $dump_path,
236 -CACHE_FILE =>
"${mapping_name}.ser",
240 # checkpoint test: return a previously stored MappingList
241 if ($mappings->loaded) {
242 $self->logger->info(
"Read existing mappings from ${mapping_name}.ser.\n");
246 my $sources_done = {};
247 my $targets_done = {};
249 # sort scoring matrix entries by descending score
250 my @sorted_entries = sort { $b->score <=> $a->score ||
251 $a->source <=> $b->source || $a->target <=> $b->target }
254 while (my $entry = shift(@sorted_entries)) {
256 # $self->logger->debug("\nxxx4 ".$entry->to_string." ");
258 # we already found a mapping for either source or target yet
259 next
if ($sources_done->{$entry->source} or
260 $targets_done->{$entry->target});
262 #$self->logger->debug('d');
264 my $other_sources = [];
265 my $other_targets = [];
266 my %source_genes = ();
267 my %target_genes = ();
269 if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
270 #$self->logger->debug('a');
272 $other_sources = $self->filter_sources($other_sources, $sources_done);
273 $other_targets = $self->filter_targets($other_targets, $targets_done);
275 $source_genes{$self->cache->get_by_key(
'genes_by_transcript_id',
276 'source', $entry->source)} = 1;
277 $target_genes{$self->cache->get_by_key(
'genes_by_transcript_id',
278 'target', $entry->target)} = 1;
280 foreach my $other_source (@{ $other_sources }) {
281 $source_genes{$self->cache->get_by_key(
'genes_by_transcript_id',
282 'source', $other_source)} = 1;
285 foreach my $other_target (@{ $other_targets }) {
286 $target_genes{$self->cache->get_by_key(
'genes_by_transcript_id',
287 'target', $other_target)} = 1;
290 # only add mapping if only one source and target gene involved
291 if (scalar(keys %source_genes) == 1 and scalar(keys %target_genes) == 1) {
292 #$self->logger->debug('O');
293 $mappings->add_Entry($entry);
297 #$self->logger->debug('A');
299 # this is the best mapping, add it
300 $mappings->add_Entry($entry);
303 $sources_done->{$entry->source} = 1;
304 $targets_done->{$entry->target} = 1;
308 $mappings->write_to_file;