ensembl-hive  2.7.0
EnsemblGeneGeneric.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
34 InternalIdMapper implementation for genes
35 
36 =head1 SYNOPSIS
37 
38 =head1 DESCRIPTION
39 
40 =head1 METHODS
41 
42 =cut
43 
44 package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric;
45 
46 use strict;
47 use warnings;
48 no warnings 'uninitialized';
49 
52 
53 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
54 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
55 
56 
57 #
58 # basic mapping
59 #
60 sub init_basic {
61  my $self = shift;
62  my $num = shift;
63  my $gsb = shift;
64  my $mappings = shift;
65  my $gene_scores = shift;
66 
67  $self->logger->info("Basic gene mapping...\n", 0, 'stamped');
68 
69  $mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
70  $num++;
71  my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $mappings,
72  "gene_matrix$num");
73 
74  return ($new_scores, $mappings);
75 }
76 
77 
78 #
79 # build the synteny from unambiguous mappings
80 #
81 sub synteny {
82  my $self = shift;
83  my $num = shift;
84  my $gsb = shift;
85  my $mappings = shift;
86  my $gene_scores = shift;
87 
88  unless ($gene_scores->loaded) {
89  $self->logger->info("Synteny Framework building...\n", 0, 'stamped');
90  my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
92  -DUMP_PATH => $dump_path,
93  -CACHE_FILE => 'synteny_framework.ser',
94  -LOGGER => $self->logger,
95  -CONF => $self->conf,
96  -CACHE => $self->cache,
97  );
98  $sf->build_synteny($mappings);
99 
100  # use it to rescore the genes
101  $self->logger->info("\nSynteny assisted mapping...\n", 0, 'stamped');
102  $gene_scores = $sf->rescore_gene_matrix_lsf($gene_scores);
103 
104  # checkpoint
105  $gene_scores->write_to_file;
106  }
107 
108  my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
109  $num++;
110  my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
111  "gene_matrix$num");
112 
113  return ($new_scores, $new_mappings);
114 }
115 
116 
117 #
118 # rescore with simple scoring function and try again
119 #
120 sub best_transcript {
121  my $self = shift;
122  my $num = shift;
123  my $gsb = shift;
124  my $mappings = shift;
125  my $gene_scores = shift;
126  my $transcript_scores = shift;
127 
128  $self->logger->info("Retry with simple best transcript score...\n", 0, 'stamped');
129 
130  unless ($gene_scores->loaded) {
131  $gsb->simple_gene_rescore($gene_scores, $transcript_scores);
132  $gene_scores->write_to_file;
133  }
134 
135  my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
136  $num++;
137  my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
138  "gene_matrix$num");
139 
140  return ($new_scores, $new_mappings);
141 }
142 
143 
144 #
145 # rescore by penalising scores between genes with different biotypes
146 #
147 sub biotype {
148  my $self = shift;
149  my $num = shift;
150  my $gsb = shift;
151  my $mappings = shift;
152  my $gene_scores = shift;
153 
154  $self->logger->info("Retry with biotype disambiguation...\n", 0, 'stamped');
155 
156  unless ($gene_scores->loaded) {
157  $gsb->biotype_gene_rescore($gene_scores);
158  $gene_scores->write_to_file;
159  }
160 
161  my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
162  $num++;
163  my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
164  "gene_matrix$num");
165 
166  return ($new_scores, $new_mappings);
167 }
168 
169 sub location {
170  my $self = shift;
171  my $num = shift;
172  my $gsb = shift;
173  my $mappings = shift;
174  my $gene_scores = shift;
175 
176  $self->logger->info("Retry with location disambiguation...\n", 0, 'stamped');
177 
178  unless ($gene_scores->loaded) {
179  $gsb->location_gene_rescore($gene_scores);
180  $gene_scores->write_to_file;
181  }
182 
183  my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
184  $num++;
185  my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
186  "gene_matrix$num");
187 
188  return ($new_scores, $new_mappings);
189 
190 }
191 
192 
193 #
194 # selectively rescore by penalising scores between genes with different
195 # internalIDs
196 #
197 sub internal_id {
198  my $self = shift;
199  my $num = shift;
200  my $gsb = shift;
201  my $mappings = shift;
202  my $gene_scores = shift;
203 
204  $self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
205 
206  unless ($gene_scores->loaded) {
207  $gsb->internal_id_rescore($gene_scores);
208  $gene_scores->write_to_file;
209  }
210 
211  my $new_mappings = $self->basic_mapping($gene_scores, "gene_mappings$num");
212  $num++;
213  my $new_scores = $gsb->create_shrinked_matrix($gene_scores, $new_mappings,
214  "gene_matrix$num");
215 
216  return ($new_scores, $new_mappings);
217 }
218 
219 
220 1;
221 
Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblGeneGeneric
Definition: EnsemblGeneGeneric.pm:15
Bio::EnsEMBL::Utils::ScriptUtils
Definition: ScriptUtils.pm:11
Bio::EnsEMBL::IdMapping::SyntenyFramework
Definition: SyntenyFramework.pm:41
Bio::EnsEMBL::IdMapping::SyntenyFramework::build_synteny
public void build_synteny()
Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper
Definition: BaseMapper.pm:17
Bio::EnsEMBL::IdMapping::SyntenyFramework::new
public Bio::EnsEMBL::IdMapping::SyntenyFramework new()
Bio::EnsEMBL::IdMapping::InternalIdMapper
Definition: InternalIdMapper.pm:18
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68