3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
34 StableIdGenerator implementation
38 # inject the confiured StableIdGenerator plugin
39 my $stable_id_generator = $conf->param(
'plugin_stable_id_generator');
40 inject($stable_id_generator);
42 # create a new StableIdGenerator object
43 my $generator_instance = $stable_id_generator->new(
44 -LOGGER => $self->logger,
46 -CACHE => $self->cache
49 # determine starting stable ID for new assignments
50 my $new_stable_id = $generator_instance->initial_stable_id(
'gene');
53 foreach my $target_gene (@all_target_genes) {
55 # if the stable Id for this gene was mapped, assign it
56 if ( $mapping_exists{ $target_gene->id } ) {
57 my $source_gene = $mappings{ $target_gene->id };
58 $target_gene->stable_id( $source_gene->stable_id );
60 # calculate and set version
62 $generator_instance->calculate_version( $source_gene,
64 $target_gene->version($version);
66 # no mapping exists, assign a new stable Id
68 $target_gene->stable_id($new_stable_id);
69 $target_gene->version(
'1');
71 # increment the stable Id (to be assigned to the next unmapped gene)
73 $generator_instance->increment_stable_id($new_stable_id);
79 This is the
default implementation
for a StableIdGenerator, which
81 stable Ids and increment versions on mapped stable Ids. Refer to the
82 documentation in
this module
if you would like to implement your own
85 The stable Id mapping application allows you to plugin your own
86 implementation by specifying it with the --plugin_stable_id_generator
87 configuration parameter.
89 Requirements
for a StableIdGenerator plugin:
92 - implement all methods listed in METHODS below (see method POD
for
103 package Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric;
107 no warnings
'uninitialized';
115 =head2 initial_stable_id
117 Arg[1] : String $type - an entity type (gene|
transcript|translation|
exon)
118 Example : my $new_stable_id = $generator->initial_stable_id(
'gene');
119 Description : Determine the initial stable Id to use
for new assignments. This
120 method is called once at the beginning of stable Id mapping.
121 Return type : String - a stable Id of appropriate type
129 sub initial_stable_id {
130 my ( $self, $type ) = @_;
134 # Use stable ID from configuration if set.
135 $init_stable_id = $self->conf->param(
"starting_${type}_stable_id");
136 if ( defined($init_stable_id) ) {
137 $self->logger->debug(
"Using pre-configured $init_stable_id " .
138 "as base for new $type stable IDs.\n" );
139 return $init_stable_id;
142 my $s_dba = $self->cache->get_DBAdaptor(
'source');
143 my $s_dbh = $s_dba->dbc->db_handle;
145 # look in the ${type} table first
147 SELECT MAX(stable_id)
149 WHERE stable_id LIKE
"ENS%"
150 OR stable_id LIKE
"ASMPATCH%"
151 OR stable_id LIKE
"BRAKER%"
154 $init_stable_id = $self->fetch_value_from_db( $s_dbh, $sql );
156 # Also look in gene_archive to make sure there are no larger IDs
158 if ( $type ne
'exon' ) {
159 $sql = qq(SELECT MAX(${type}_stable_id) FROM gene_archive);
160 my $archived_stable_id = $self->fetch_value_from_db( $s_dbh, $sql );
161 if ( $archived_stable_id &&
162 $self->is_valid($archived_stable_id) &&
163 ( $archived_stable_id gt $init_stable_id ) )
165 $init_stable_id = $archived_stable_id;
169 if ( defined($init_stable_id) ) {
170 # Since $init_stable_id now is the highest existing stable ID for
171 # this object type, we need to increment it to find the first one we
172 # want to use for new assignments.
173 $init_stable_id = $self->increment_stable_id($init_stable_id);
175 $self->logger->debug(
176 "Using $init_stable_id as base for new $type stable IDs.\n");
180 $self->logger->warning(
181 "Can't find highest ${type}_stable_id in source db.\n" );
184 return $init_stable_id;
185 } ## end sub initial_stable_id
190 Arg[1] : String $stable_id - the stable Id to increment
191 Example : $next_stable_id = $generator->increment_stable_id(
193 Description : Increments the stable Id used
for new assigments. This method is
194 called after each
new stable Id assigment to generate the next
195 stable Id to be used.
196 Return type : String - the next
new stable Id
197 Exceptions : thrown on missing or malformed argument
206 my $stable_id = shift;
208 if ( !$self->is_valid($stable_id) ) {
209 throw( sprintf(
"Unknown or missing stable ID '%s'", $stable_id ) );
212 if ( $stable_id =~ /^LRG/ ) {
213 throw( sprintf(
"We do not increment LRG genes... (got '%s'). "
214 .
"Something's wrong.",
218 $stable_id =~ /^(ENS|ASMPATCH|BRAKER)([A-Z]+)(\d+)$/;
221 my $new_stable_id = $1 . $2 . ( ++$number );
223 return $new_stable_id;
229 Arg[1] : String $stable_id - the stable Id to check
230 Example : unless ($generator->is_valid($stable_id)) {
231 die
"Invalid stable Id: $stable_id.\n";
233 Description : Tests a stable Id to be valid (according to the Ensembl stable
234 Id format definition).
235 Return type : Boolean - TRUE
if valid, FALSE otherwise
244 my ( $self, $stable_id ) = @_;
246 if ( defined($stable_id) ) {
247 if ( $stable_id =~ /^(ENS|ASMPATCH|BRAKER)([A-Z]+)(\d+)$/
248 || $stable_id =~ /^LRG/ )
258 =head2 calculate_version
262 Example : my $version = $generator->calculate_version($source_gene,
264 $target_gene->
version($version);
265 Description : Determines the version
for a mapped stable Id. For Ensembl
266 genes, the rules
for incrementing the version number are:
267 - exons:
if exon sequence changed
268 -
transcript:
if spliced
exon sequence changed or
if number of exons changed
269 - translation:
if translated sequence changed
271 Return type : String - the version to be used
272 Exceptions : thrown on wrong argument
279 sub calculate_version {
280 my ( $self, $s_obj, $t_obj ) = @_;
282 my $version = $s_obj->version();
284 if ( $s_obj->isa(
'Bio::EnsEMBL::IdMapping::TinyExon') ) {
285 # increment version if sequence changed
286 if ( $s_obj->seq() ne $t_obj->seq() ) { ++$version }
288 elsif ( $s_obj->isa(
'Bio::EnsEMBL::IdMapping::TinyTranscript') ) {
290 # increment version if spliced exon sequence changed
291 if ( $s_obj->seq_md5_sum() ne $t_obj->seq_md5_sum() ) { $change = 1 }
293 # Look for changes in exon version
294 my $source_exon_string;
295 my $target_exon_string;
296 foreach my $exon (@{ $s_obj->get_all_Exons() } ) {
297 $source_exon_string .= $exon->start();
298 $source_exon_string .= $exon->end();
300 foreach my $exon (@{ $t_obj->get_all_Exons() } ) {
301 $target_exon_string .= $exon->start();
302 $target_exon_string .= $exon->end();
305 if ($source_exon_string ne $target_exon_string) { $change = 1; }
307 # increment version if translation sequence changed
308 # Can happen if Havana move initiation start site or stop codon
309 if ($s_obj->translation and $t_obj->translation) {
310 if ($s_obj->translation->seq ne $t_obj->translation->seq) { $change = 1; }
313 # Look for changes on the region
314 if ( $s_obj->seq_region_name() ne $t_obj->seq_region_name() ) { $change = 1 }
316 if ($change) { ++$version }
319 elsif ( $s_obj->isa(
'Bio::EnsEMBL::IdMapping::TinyTranslation') ) {
320 # increment version if transcript or translation sequences changed
321 if ( $s_obj->seq() ne $t_obj->seq() ) { ++$version }
323 elsif ( $s_obj->isa(
'Bio::EnsEMBL::IdMapping::TinyGene') ) {
324 # increment version if any transcript changed
326 my $s_tr_ident = join(
328 map { $_->stable_id() .
'.' . $_->version() } sort {
329 $a->stable_id() cmp $b->stable_id()
330 } @{ $s_obj->get_all_Transcripts() } );
331 my $t_tr_ident = join(
333 map { $_->stable_id() .
'.' . $_->version() } sort {
334 $a->stable_id() cmp $b->stable_id()
335 } @{ $t_obj->get_all_Transcripts() } );
337 if ( $s_tr_ident ne $t_tr_ident ) { ++$version }
340 throw(
"Unknown object type: " . ref($s_obj) );
344 } ## end sub calculate_version