ensembl-hive  2.8.1
EnsemblGeneric.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
34 StableIdGenerator implementation
35 
36 =head1 SYNOPSIS
37 
38  # inject the confiured StableIdGenerator plugin
39  my $stable_id_generator = $conf->param('plugin_stable_id_generator');
40  inject($stable_id_generator);
41 
42  # create a new StableIdGenerator object
43  my $generator_instance = $stable_id_generator->new(
44  -LOGGER => $self->logger,
45  -CONF => $self->conf,
46  -CACHE => $self->cache
47  );
48 
49  # determine starting stable ID for new assignments
50  my $new_stable_id = $generator_instance->initial_stable_id('gene');
51 
52  # loop over genes
53  foreach my $target_gene (@all_target_genes) {
54 
55  # if the stable Id for this gene was mapped, assign it
56  if ( $mapping_exists{ $target_gene->id } ) {
57  my $source_gene = $mappings{ $target_gene->id };
58  $target_gene->stable_id( $source_gene->stable_id );
59 
60  # calculate and set version
61  my $version =
62  $generator_instance->calculate_version( $source_gene,
63  $target_gene );
64  $target_gene->version($version);
65 
66  # no mapping exists, assign a new stable Id
67  } else {
68  $target_gene->stable_id($new_stable_id);
69  $target_gene->version('1');
70 
71  # increment the stable Id (to be assigned to the next unmapped gene)
72  $new_stable_id =
73  $generator_instance->increment_stable_id($new_stable_id);
74  }
75  }
76 
77 =head1 DESCRIPTION
78 
79 This is the default implementation for a StableIdGenerator, which
80 is used by Bio::EnsEMBL::IdMapping::StableIdMapper to generate new
81 stable Ids and increment versions on mapped stable Ids. Refer to the
82 documentation in this module if you would like to implement your own
83 StableIdGenerator.
84 
85 The stable Id mapping application allows you to plugin your own
86 implementation by specifying it with the --plugin_stable_id_generator
87 configuration parameter.
88 
89 Requirements for a StableIdGenerator plugin:
90 
92  - implement all methods listed in METHODS below (see method POD for
93  signatures)
94 
95 =head1 METHODS
96 
97  initial_stable_id
99  calculate_version
100 
101 =cut
102 
103 package Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric;
104 
105 use strict;
106 use warnings;
107 no warnings 'uninitialized';
108 
111 
112 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
113 
114 
115 =head2 initial_stable_id
116 
117  Arg[1] : String $type - an entity type (gene|transcript|translation|exon)
118  Example : my $new_stable_id = $generator->initial_stable_id('gene');
119  Description : Determine the initial stable Id to use for new assignments. This
120  method is called once at the beginning of stable Id mapping.
121  Return type : String - a stable Id of appropriate type
122  Exceptions : none
124  Status : At Risk
125  : under development
126 
127 =cut
128 
129 sub initial_stable_id {
130  my ( $self, $type ) = @_;
131 
132  my $init_stable_id;
133 
134  # Use stable ID from configuration if set.
135  $init_stable_id = $self->conf->param("starting_${type}_stable_id");
136  if ( defined($init_stable_id) ) {
137  $self->logger->debug( "Using pre-configured $init_stable_id " .
138  "as base for new $type stable IDs.\n" );
139  return $init_stable_id;
140  }
141 
142  my $s_dba = $self->cache->get_DBAdaptor('source');
143  my $s_dbh = $s_dba->dbc->db_handle;
144 
145  # look in the ${type} table first
146  my $sql = qq(
147  SELECT MAX(stable_id)
148  FROM ${type}
149  WHERE stable_id LIKE "ENS%"
150  OR stable_id LIKE "ASMPATCH%"
151  OR stable_id LIKE "BRAKER%"
152  );
153 
154  $init_stable_id = $self->fetch_value_from_db( $s_dbh, $sql );
155 
156  # Also look in gene_archive to make sure there are no larger IDs
157  # there.
158  if ( $type ne 'exon' ) {
159  $sql = qq(SELECT MAX(${type}_stable_id) FROM gene_archive);
160  my $archived_stable_id = $self->fetch_value_from_db( $s_dbh, $sql );
161  if ( $archived_stable_id &&
162  $self->is_valid($archived_stable_id) &&
163  ( $archived_stable_id gt $init_stable_id ) )
164  {
165  $init_stable_id = $archived_stable_id;
166  }
167  }
168 
169  if ( defined($init_stable_id) ) {
170  # Since $init_stable_id now is the highest existing stable ID for
171  # this object type, we need to increment it to find the first one we
172  # want to use for new assignments.
173  $init_stable_id = $self->increment_stable_id($init_stable_id);
174 
175  $self->logger->debug(
176  "Using $init_stable_id as base for new $type stable IDs.\n");
177 
178  }
179  else {
180  $self->logger->warning(
181  "Can't find highest ${type}_stable_id in source db.\n" );
182  }
183 
184  return $init_stable_id;
185 } ## end sub initial_stable_id
186 
187 
188 =head2 increment_stable_id
189 
190  Arg[1] : String $stable_id - the stable Id to increment
191  Example : $next_stable_id = $generator->increment_stable_id(
192  $current_stable_id);
193  Description : Increments the stable Id used for new assigments. This method is
194  called after each new stable Id assigment to generate the next
195  stable Id to be used.
196  Return type : String - the next new stable Id
197  Exceptions : thrown on missing or malformed argument
199  Status : At Risk
200  : under development
201 
202 =cut
203 
205  my $self = shift;
206  my $stable_id = shift;
207 
208  if ( !$self->is_valid($stable_id) ) {
209  throw( sprintf( "Unknown or missing stable ID '%s'", $stable_id ) );
210  }
211 
212  if ( $stable_id =~ /^LRG/ ) {
213  throw( sprintf( "We do not increment LRG genes... (got '%s'). "
214  . "Something's wrong.",
215  $stable_id ) );
216  }
217 
218  $stable_id =~ /^(ENS|ASMPATCH|BRAKER)([A-Z]+)(\d+)$/;
219 
220  my $number = $3;
221  my $new_stable_id = $1 . $2 . ( ++$number );
222 
223  return $new_stable_id;
224 }
225 
226 
227 =head2 is_valid
228 
229  Arg[1] : String $stable_id - the stable Id to check
230  Example : unless ($generator->is_valid($stable_id)) {
231  die "Invalid stable Id: $stable_id.\n";
232  }
233  Description : Tests a stable Id to be valid (according to the Ensembl stable
234  Id format definition).
235  Return type : Boolean - TRUE if valid, FALSE otherwise
236  Exceptions : none
237  Caller : general
238  Status : At Risk
239  : under development
240 
241 =cut
242 
243 sub is_valid {
244  my ( $self, $stable_id ) = @_;
245 
246  if ( defined($stable_id) ) {
247  if ( $stable_id =~ /^(ENS|ASMPATCH|BRAKER)([A-Z]+)(\d+)$/
248  || $stable_id =~ /^LRG/ )
249  {
250  return 1;
251  }
252  }
253 
254  return 0;
255 }
256 
257 
258 =head2 calculate_version
259 
260  Arg[1] : Bio::EnsEMBL::IdMapping::TinyFeature $s_obj - source object
261  Arg[2] : Bio::EnsEMBL::IdMapping::TinyFeature $t_obj - target object
262  Example : my $version = $generator->calculate_version($source_gene,
263  $target_gene);
264  $target_gene->version($version);
265  Description : Determines the version for a mapped stable Id. For Ensembl
266  genes, the rules for incrementing the version number are:
267  - exons: if exon sequence changed
268  - transcript: if spliced exon sequence changed or if number of exons changed
269  - translation: if translated sequence changed
270  - gene: if any of its transcript changed
271  Return type : String - the version to be used
272  Exceptions : thrown on wrong argument
274  Status : At Risk
275  : under development
276 
277 =cut
278 
279 sub calculate_version {
280  my ( $self, $s_obj, $t_obj ) = @_;
281 
282  my $version = $s_obj->version();
283 
284  if ( $s_obj->isa('Bio::EnsEMBL::IdMapping::TinyExon') ) {
285  # increment version if sequence changed
286  if ( $s_obj->seq() ne $t_obj->seq() ) { ++$version }
287  }
288  elsif ( $s_obj->isa('Bio::EnsEMBL::IdMapping::TinyTranscript') ) {
289  my $change = 0;
290  # increment version if spliced exon sequence changed
291  if ( $s_obj->seq_md5_sum() ne $t_obj->seq_md5_sum() ) { $change = 1 }
292 
293  # Look for changes in exon version
294  my $source_exon_string;
295  my $target_exon_string;
296  foreach my $exon (@{ $s_obj->get_all_Exons() } ) {
297  $source_exon_string .= $exon->start();
298  $source_exon_string .= $exon->end();
299  }
300  foreach my $exon (@{ $t_obj->get_all_Exons() } ) {
301  $target_exon_string .= $exon->start();
302  $target_exon_string .= $exon->end();
303  }
304 
305  if ($source_exon_string ne $target_exon_string) { $change = 1; }
306 
307  # increment version if translation sequence changed
308  # Can happen if Havana move initiation start site or stop codon
309  if ($s_obj->translation and $t_obj->translation) {
310  if ($s_obj->translation->seq ne $t_obj->translation->seq) { $change = 1; }
311  }
312 
313  # Look for changes on the region
314  if ( $s_obj->seq_region_name() ne $t_obj->seq_region_name() ) { $change = 1 }
315 
316  if ($change) { ++$version }
317 
318  }
319  elsif ( $s_obj->isa('Bio::EnsEMBL::IdMapping::TinyTranslation') ) {
320  # increment version if transcript or translation sequences changed
321  if ( $s_obj->seq() ne $t_obj->seq() ) { ++$version }
322  }
323  elsif ( $s_obj->isa('Bio::EnsEMBL::IdMapping::TinyGene') ) {
324  # increment version if any transcript changed
325 
326  my $s_tr_ident = join(
327  ":",
328  map { $_->stable_id() . '.' . $_->version() } sort {
329  $a->stable_id() cmp $b->stable_id()
330  } @{ $s_obj->get_all_Transcripts() } );
331  my $t_tr_ident = join(
332  ":",
333  map { $_->stable_id() . '.' . $_->version() } sort {
334  $a->stable_id() cmp $b->stable_id()
335  } @{ $t_obj->get_all_Transcripts() } );
336 
337  if ( $s_tr_ident ne $t_tr_ident ) { ++$version }
338  }
339  else {
340  throw( "Unknown object type: " . ref($s_obj) );
341  }
342 
343  return $version;
344 } ## end sub calculate_version
345 
346 
347 1;
348 
transcript
public transcript()
Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric
Definition: EnsemblGeneric.pm:69
map
public map()
Bio::EnsEMBL::IdMapping::BaseObject
Definition: BaseObject.pm:25
Bio::EnsEMBL::IdMapping::StableIdMapper
Definition: StableIdMapper.pm:16
increment_stable_id
public increment_stable_id()
exon
public exon()
Bio::EnsEMBL::IdMapping::TinyFeature
Definition: TinyFeature.pm:30
Bio::EnsEMBL::IdMapping::StableIdMapper::map_stable_ids
public map_stable_ids()
Bio::EnsEMBL::IdMapping::TinyFeature::version
public Int version()
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68