ensembl-hive  2.7.0
EnsemblExonGeneric.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 =head1 SYNOPSIS
34 
35 =head1 DESCRIPTION
36 
37 =head1 METHODS
38 
39 =cut
40 
41 package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblExonGeneric;
42 
43 use strict;
44 use warnings;
45 no warnings 'uninitialized';
46 
49 
50 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
51 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
52 
53 
54 #
55 # basic mapping
56 #
57 sub init_basic {
58  my $self = shift;
59  my $num = shift;
60  my $esb = shift;
61  my $mappings = shift;
62  my $exon_scores = shift;
63 
64  $self->logger->info("Basic exon mapping...\n", 0, 'stamped');
65 
66  $mappings = $self->basic_mapping($exon_scores, "exon_mappings$num");
67  $num++;
68  my $new_scores = $esb->create_shrinked_matrix($exon_scores, $mappings,
69  "exon_matrix$num");
70 
71  return ($new_scores, $mappings);
72 }
73 
74 
75 #
76 # reduce score for mappings of exons which do not belong to mapped
77 # transcripts
78 # (ie where source exon transcript does not map target exon transcript)
79 #
80 sub mapped_transcript {
81  my $self = shift;
82  my $num = shift;
83  my $esb = shift;
84  my $mappings = shift;
85  my $exon_scores = shift;
86  my $transcript_mappings = shift;
87 
88  $self->logger->info("Exons in mapped transcript...\n", 0, 'stamped');
89 
90  unless ($exon_scores->loaded) {
91  $esb->non_mapped_transcript_rescore($exon_scores, $transcript_mappings);
92  $exon_scores->write_to_file;
93  }
94 
95  $mappings = $self->basic_mapping($exon_scores, "exon_mappings$num");
96  $num++;
97  my $new_scores = $esb->create_shrinked_matrix($exon_scores, $mappings,
98  "exon_matrix$num");
99 
100  return ($new_scores, $mappings);
101 }
102 
103 sub single_transcript {
104  my $self = shift;
105  my $num = shift;
106  my $esb = shift;
107  my $mappings = shift;
108  my $exon_scores = shift;
109 
110  $self->logger->info("Exons in single transcript...\n", 0, 'stamped');
111 
112  unless ($exon_scores->loaded) {
113  $exon_scores->write_to_file;
114  }
115 
116  $mappings = $self->same_transcript_exon_mapping($exon_scores, "exon_mappings$num");
117  $num++;
118  my $new_scores = $esb->create_shrinked_matrix($exon_scores, $mappings,
119  "exon_matrix$num");
120 
121  return ($new_scores, $mappings);
122 }
123 
124 sub same_transcript_exon_mapping {
125  my $self = shift;
126  my $matrix = shift;
127  my $mapping_name = shift;
128 
129  # argument checks
130  unless ($matrix and
131  $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
132  throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
133  }
134 
135  throw('Need a name for serialising the mapping.') unless ($mapping_name);
136 
137  # Create a new MappingList object. Specify AUTO_LOAD to load serialised
138  # existing mappings if found
139  my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
140 
142  -DUMP_PATH => $dump_path,
143  -CACHE_FILE => "${mapping_name}.ser",
144  -AUTO_LOAD => 1,
145  );
146 
147  # checkpoint test: return a previously stored MappingList
148  if ($mappings->loaded) {
149  $self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
150  return $mappings;
151  }
152 
153  my $sources_done = {};
154  my $targets_done = {};
155 
156  # sort scoring matrix entries by descending score
157  my @sorted_entries = sort { $b->score <=> $a->score ||
158  $a->source <=> $b->source || $a->target <=> $b->target }
159  @{ $matrix->get_all_Entries };
160 
161  while (my $entry = shift(@sorted_entries)) {
162 
163  # $self->logger->debug("\nxxx4 ".$entry->to_string." ");
164 
165  # we already found a mapping for either source or target yet
166  next if ($sources_done->{$entry->source} or
167  $targets_done->{$entry->target});
168 
169  #$self->logger->debug('d');
170 
171  my $other_sources = [];
172  my $other_targets = [];
173  my %source_transcripts = ();
174  my %target_transcripts = ();
175 
176  if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
177  #$self->logger->debug('a');
178 
179  $other_sources = $self->filter_sources($other_sources, $sources_done);
180  $other_targets = $self->filter_targets($other_targets, $targets_done);
181 
182  $source_transcripts{$self->cache->get_by_key('transcripts_by_exon_id',
183  'source', $entry->source)} = 1;
184  $target_transcripts{$self->cache->get_by_key('transcripts_by_exon_id',
185  'target', $entry->target)} = 1;
186 
187  foreach my $other_source (@{ $other_sources }) {
188  $source_transcripts{$self->cache->get_by_key('transcripts_by_exon_id',
189  'source', $other_source)} = 1;
190  }
191 
192  foreach my $other_target (@{ $other_targets }) {
193  $target_transcripts{$self->cache->get_by_key('transcripts_by_exon_id',
194  'target', $other_target)} = 1;
195  }
196 
197  # only add mapping if only one source and target gene involved
198  if (scalar(keys %source_transcripts) == 1 and scalar(keys %target_transcripts) == 1) {
199  #$self->logger->debug('O');
200  $mappings->add_Entry($entry);
201  }
202 
203  } else {
204  #$self->logger->debug('A');
205 
206  # this is the best mapping, add it
207  $mappings->add_Entry($entry);
208  }
209 
210  $sources_done->{$entry->source} = 1;
211  $targets_done->{$entry->target} = 1;
212  }
213 
214  # create checkpoint
215  $mappings->write_to_file;
216 
217  return $mappings;
218 }
219 
220 
221 #
222 # selectively rescore by penalising scores between exons with
223 # different internalIDs
224 #
225 sub internal_id {
226  my $self = shift;
227  my $num = shift;
228  my $esb = shift;
229  my $mappings = shift;
230  my $exon_scores = shift;
231 
232  $self->logger->info( "Retry with internalID disambiguation...\n",
233  0, 'stamped' );
234 
235  if ( !$exon_scores->loaded() ) {
236  $esb->internal_id_rescore($exon_scores);
237  $exon_scores->write_to_file();
238  }
239 
240  $mappings = $self->basic_mapping( $exon_scores, "exon_mappings$num" );
241  $num++;
242  my $new_scores =
243  $esb->create_shrinked_matrix( $exon_scores, $mappings,
244  "exon_matrix$num" );
245 
246  return ( $new_scores, $mappings );
247 }
248 
249 
250 1;
251 
Bio::EnsEMBL::Utils::ScriptUtils
Definition: ScriptUtils.pm:11
Bio::EnsEMBL::IdMapping::MappingList::new
public Bio::EnsEMBL::IdMapping::MappingList new()
Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper
Definition: BaseMapper.pm:17
Bio::EnsEMBL::IdMapping::MappingList::get_all_Entries
public Arrayref get_all_Entries()
Bio::EnsEMBL::IdMapping::MappingList
Definition: MappingList.pm:38
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68