3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
34 Handles mapping between two coordinate systems
using the information
35 stored in the assembly table.
40 $asma = $db->get_AssemblyMapperAdaptor();
41 $csa = $db->get_CoordSystemAdaptor();
43 my $chr_cs = $cs_adaptor->fetch_by_name(
'chromosome',
'NCBI33' );
44 my $ctg_cs = $cs_adaptor->fetch_by_name(
'contig');
46 $asm_mapper = $map_adaptor->fetch_by_CoordSystems( $cs1, $cs2 );
48 # Map to contig coordinate system from chromosomal.
50 $asm_mapper->map(
'X', 1_000_000, 2_000_000, 1, $chr_cs );
52 # Map to chromosome coordinate system from contig.
54 $asm_mapper->map(
'AL30421.1.200.92341', 100, 10000, -1,
57 # List contig names for a region of chromsome.
58 @ctg_ids = $asm_mapper->list_ids(
'13', 1_000_000, 1, $chr_cs );
60 # List chromosome names for a contig region.
62 $asm_mapper->list_ids(
'AL30421.1.200.92341', 1, 1000, -1,
68 conversion of coordinates between any two coordinate systems with an
69 relationship explicitly defined in the assembly table. In the future
70 it may be possible to perform multiple step (implicit) mapping between
74 generic mapper
object between disjoint coordinate systems.
81 package Bio::EnsEMBL::AssemblyMapper;
88 use Scalar::Util qw(weaken);
91 my $ASSEMBLED =
'assembled';
92 my $COMPONENT =
'component';
94 my $DEFAULT_MAX_PAIR_COUNT = 1000;
102 Example : Should use AssemblyMapperAdaptor->fetch_by_CoordSystems()
105 Exceptions : Throws
if multiple coord_systems are provided
106 Caller : AssemblyMapperAdaptor
112 my ( $proto, $adaptor, @coord_systems ) = @_;
114 my $class = ref($proto) || $proto;
116 my $self = bless( {}, $class );
118 $self->adaptor($adaptor);
122 if ( @coord_systems != 2 ) {
123 throw(
'Can only map between two coordinate systems. '
124 . scalar(@coord_systems)
125 .
' were provided' );
128 # Set the component and assembled coordinate systems
129 $self->{
'asm_cs'} = $coord_systems[0];
130 $self->{
'cmp_cs'} = $coord_systems[1];
132 # We load the mapper calling the 'ASSEMBLED' the 'from' coord system
133 # and the 'COMPONENT' the 'to' coord system.
136 $coord_systems[0], $coord_systems[1] );
138 $self->{
'max_pair_count'} = $DEFAULT_MAX_PAIR_COUNT;
143 =head2 max_pair_count
145 Arg [1] : (optional)
int $max_pair_count
146 Example : $mapper->max_pair_count(100000)
147 Description: Getter/Setter
for the number of mapping pairs allowed
148 in the
internal cache. This can be used to
override
149 the
default value (1000) to tune the performance and
150 memory
usage for certain scenarios. Higher value
151 means bigger cache, more memory used.
160 my ( $self, $value ) = @_;
162 if ( defined($value) ) {
163 $self->{
'max_pair_count'} = $value;
166 return $self->{
'max_pair_count'};
172 Example : $mapper->max_pair_count(10e6);
173 $mapper->register_all();
174 Description: Pre-registers all assembly information in
this
175 mapper. The cache size should be set to a
176 sufficiently large value so that all of the
177 information can be stored. This method is useful
178 when *a lot* of mapping will be done in regions
179 which are distributed around the genome. After
180 registration the mapper will consume a lot of memory
181 but will not have to perform any SQL and will be
185 Caller : Specialised programs doing a lot of mapping.
193 $self->adaptor()->register_all($self);
198 Arg [1] :
string $frm_seq_region
199 The name of the sequence region to transform FROM.
200 Arg [2] :
int $frm_start
201 The start of the region to transform FROM.
202 Arg [3] :
int $frm_end
203 The end of the region to transform FROM.
204 Arg [4] :
int $strand
205 The strand of the region to transform FROM.
207 The coordinate system to transform FROM
208 Arg [6] : Dummy placeholder to keep the
interface consistent
209 across different mappers
212 Arg [8] : (optional)
boolean
213 Whether to include the original coordinates or not
215 $asm_mapper->map(
'X', 1_000_000, 2_000_000, 1,
217 Description: Transforms coordinates from one coordinate system to
221 Exceptions : Throws
if if the specified TO coordinat system is not
222 one of the coordinate systems associated with
this
230 throw(
'Incorrect number of arguments.')
if (!( @_ >= 6));
232 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand,
233 $frm_cs, $dummy, $to_slice, $include_org_coord )
236 my $mapper = $self->{
'mapper'};
237 my $asm_cs = $self->{
'asm_cs'};
238 my $cmp_cs = $self->{
'cmp_cs'};
239 my $adaptor = $self->{
'adaptor'};
245 ->seq_regions_to_ids( $frm_cs, [$frm_seq_region_name] )->[0];
247 # Speed critical section:
248 # Try to do simple pointer equality comparisons of the coord system
249 # objects first since this is likely to work most of the time and is
250 # much faster than a function call.
252 if ( $frm_cs == $cmp_cs
253 || ( $frm_cs != $asm_cs && $frm_cs->equals($cmp_cs) ) )
255 if ( !$self->{
'cmp_register'}->{$seq_region_id} ) {
256 $adaptor->register_component( $self, $seq_region_id );
260 } elsif ( $frm_cs == $asm_cs || $frm_cs->equals($asm_cs) ) {
262 # This can be probably be sped up some by only calling registered
263 # assembled if needed.
264 $adaptor->register_assembled( $self, $seq_region_id, $frm_start,
271 sprintf(
"Coordinate system %s %s is neither the assembled "
272 .
"nor the component coordinate system "
273 .
"of this AssemblyMapper",
274 $frm_cs->name(), $frm_cs->version() ) );
279 $mapper->map_coordinates( $seq_region_id, $frm_start, $frm_end,
280 $frm_strand, $frm, $include_org_coord );
282 # decorate (org,)mapped coordinates with their corresponding region names
283 if ($include_org_coord) {
285 check_ref($_,
'Bio::EnsEMBL::Mapper::Coordinate') && # exclude gap
286 $_->{original}->name($adaptor->seq_ids_to_regions([$_->{original}->id])->[0]) &&
287 $_->{mapped}->name($adaptor->seq_ids_to_regions([$_->{mapped}->id])->[0])
291 check_ref($_,
'Bio::EnsEMBL::Mapper::Coordinate') && # exclude gap
292 $_->name($adaptor->seq_ids_to_regions([$_->id])->[0])
307 Caller : AssemblyMapperAdaptor
315 $self->{
'mapper'}->flush();
316 $self->{
'cmp_register'} = {};
317 $self->{
'asm_register'} = {};
323 Example : $num_of_pairs = $mapper->size();
324 Description: Returns the number of pairs currently stored.
335 return $self->{
'mapper'}->{
'pair_count'};
340 Arg [1] :
string $frm_seq_region
341 The name of the sequence region to transform FROM.
342 Arg [2] :
int $frm_start
343 The start of the region to transform FROM.
344 Arg [3] :
int $frm_end
345 The end of the region to transform FROM.
346 Arg [4] :
int $strand
347 The strand of the region to transform FROM.
349 The coordinate system to transform FROM.
351 $asm_mapper->map(
'X', 1_000_000, 2_000_000, 1,
353 Description: Transforms coordinates from one coordinate system to
357 Exceptions : Throws
if the specified TO coordinat system is not
358 one of the coordinate systems associated with
this
367 throw(
'Incorrect number of arguments.');
370 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand,
374 my $mapper = $self->{
'mapper'};
375 my $asm_cs = $self->{
'asm_cs'};
376 my $cmp_cs = $self->{
'cmp_cs'};
377 my $adaptor = $self->adaptor();
381 push @tmp, $frm_seq_region_name;
384 $self->adaptor()->seq_regions_to_ids( $frm_cs, \@tmp )->[0];
386 # Speed critical section:
387 # Try to do simple pointer equality comparisons of the coord system
388 # objects first since this is likely to work most of the time and is
389 # much faster than a function call.
391 if ( $frm_cs == $cmp_cs
392 || ( $frm_cs != $asm_cs && $frm_cs->equals($cmp_cs) ) )
395 if ( !$self->{
'cmp_register'}->{$seq_region_id} ) {
396 $adaptor->register_component( $self, $seq_region_id );
400 } elsif ( $frm_cs == $asm_cs || $frm_cs->equals($asm_cs) ) {
402 # This can be probably be sped up some by only calling registered
403 # assembled if needed
404 $adaptor->register_assembled( $self, $seq_region_id, $frm_start,
411 sprintf(
"Coordinate system %s %s is neither the assembled "
412 .
"nor the component coordinate system "
413 .
"of this AssemblyMapper",
414 $frm_cs->name(), $frm_cs->version() ) );
419 $mapper->fastmap( $seq_region_id, $frm_start, $frm_end, $frm_strand,
425 Arg [1] :
string $frm_seq_region
426 The name of the sequence region of interest.
427 Arg [2] :
int $frm_start
428 The start of the region of interest.
429 Arg [3] :
int $frm_end
430 The end of the region to transform of interest.
432 The coordinate system to obtain overlapping IDs of.
433 Example :
foreach my $id (
434 $asm_mapper->list_ids(
'X', 1, 1000, $ctg_cs ) )
436 Description: Retrieves a list of overlapping seq_region names of
437 another coordinate system. This is the same as the
438 list_ids method but uses seq_region names rather
440 Return type: List of strings.
449 throw(
'Incorrect number of arguments.');
452 my ( $self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs ) =
455 my @tmp = ($frm_seq_region_name);
458 $self->
adaptor()->seq_regions_to_ids( $frm_cs, \@tmp )->[0];
460 if ( $frm_cs->equals( $self->component_CoordSystem() ) ) {
462 if ( !$self->have_registered_component($seq_region_id) ) {
463 $self->adaptor->register_component( $self, $seq_region_id );
466 # Pull out the 'from' identifiers of the mapper pairs. The we
467 # loaded the assembled side as the 'from' side in the constructor.
470 map ( { $_->from()->
id() }
471 $self->mapper()->list_pairs(
472 $seq_region_id, $frm_start, $frm_end, $COMPONENT
475 } elsif ( $frm_cs->equals( $self->assembled_CoordSystem() ) ) {
477 $self->adaptor->register_assembled( $self, $seq_region_id,
478 $frm_start, $frm_end );
480 # Pull out the 'to' identifiers of the mapper pairs we loaded the
481 # component side as the 'to' coord system in the constructor.
484 map ( { $_->to->id() }
485 $self->mapper()->list_pairs(
486 $seq_region_id, $frm_start, $frm_end, $ASSEMBLED
492 sprintf(
"Coordinate system %s %s is neither the assembled "
493 .
"nor the component coordinate system "
494 .
"of this AssemblyMapper",
495 $frm_cs->name(), $frm_cs->version() ) );
498 } ## end sub list_ids
500 =head2 list_seq_regions
502 Arg [1] :
string $frm_seq_region
503 The name of the sequence region of interest.
504 Arg [2] :
int $frm_start
505 The start of the region of interest.
506 Arg [3] :
int $frm_end
507 The end of the region to transform of interest.
509 The coordinate system to obtain overlapping IDs of.
510 Example :
foreach my $id (
511 $asm_mapper->list_seq_regions(
512 'X', 1, 1000, $chr_cs
514 Description: Retrieves a list of overlapping seq_region
internal
515 identifiers of another coordinate system. This is
516 the same as the list_seq_regions method but uses
517 internal identfiers rather than seq_region strings.
518 Return type: List of ints.
525 sub list_seq_regions {
527 throw(
'Incorrect number of arguments.');
530 my ( $self, $frm_seq_region, $frm_start, $frm_end, $frm_cs ) = @_;
532 # Retrieve the seq_region names.
535 $self->list_ids( $frm_seq_region, $frm_start, $frm_end, $frm_cs );
537 # The seq_regions are from the 'to' coordinate system not the from
538 # coordinate system we used to obtain them.
541 if ( $frm_cs->equals( $self->assembled_CoordSystem() ) ) {
542 $to_cs = $self->component_CoordSystem();
544 $to_cs = $self->assembled_CoordSystem();
547 # Convert them to IDs.
548 return @{ $self->
adaptor()->seq_ids_to_regions( \@seq_ids ) };
552 =head2 have_registered_component
554 Arg [1] :
string $cmp_seq_region
555 The name of the sequence region to check
for
557 Example :
if ( $asm_mapper->have_registered_component(
'AL240214.1') ) {}
558 Description: Returns
true if a given component region has
559 been registered with
this assembly mapper. This
560 should only be called by
this class or the
561 AssemblyMapperAdaptor. In other words,
do not use
562 this method unless you really know what you are
564 Return type: Boolean (0 or 1)
565 Exceptions : Throws on incorrect arguments.
566 Caller : Internal, AssemblyMapperAdaptor
571 sub have_registered_component {
572 my ( $self, $cmp_seq_region ) = @_;
574 if ( !defined($cmp_seq_region) ) {
575 throw(
'cmp_seq_region argument is required');
578 if ( exists( $self->{
'cmp_register'}->{$cmp_seq_region} ) ) {
585 =head2 have_registered_assembled
587 Arg [1] :
string $asm_seq_region
588 The name of the sequence region to check
for
590 Arg [2] :
int $chunk_id
591 The chunk number of the provided seq_region to check
593 Example :
if ( $asm_mapper->have_registered_component(
'X', 9 ) ) { }
594 Description: Returns
true if a given assembled region chunk
595 has been registered with
this assembly mapper.
596 This should only be called by
this class or the
597 AssemblyMapperAdaptor. In other words,
do not use
598 this method unless you really know what you are
600 Return type: Boolean (0 or 1)
601 Exceptions : Throws on incorrect arguments
602 Caller : Internal, AssemblyMapperAdaptor
607 sub have_registered_assembled {
608 my ( $self, $asm_seq_region, $chunk_id ) = @_;
610 if ( !defined($asm_seq_region) ) {
611 throw(
'asm_seq_region argument is required');
613 if ( !defined($chunk_id) ) {
614 throw(
'chunk_id is required');
618 exists( $self->{
'asm_register'}->{$asm_seq_region}->{$chunk_id} ) )
627 =head2 register_component
629 Arg [1] : integer $cmp_seq_region
630 The dbID of the component sequence region to
632 Example : $asm_mapper->register_component(
'AL312341.1');
633 Description: Flags a given component sequence region as registered
634 in
this assembly mapper. This should only be called
635 by
this class or the AssemblyMapperAdaptor.
637 Exceptions : Throws on incorrect arguments
638 Caller : Internal, AssemblyMapperAdaptor
643 sub register_component {
644 my ( $self, $cmp_seq_region ) = @_;
646 if ( !defined($cmp_seq_region) ) {
647 throw(
'cmp_seq_region argument is required');
650 $self->{
'cmp_register'}->{$cmp_seq_region} = 1;
653 =head2 register_assembled
655 Arg [1] : integer $asm_seq_region
656 The dbID of the sequence region to
register.
657 Arg [2] :
int $chunk_id
658 The chunk number of the provided seq_region to
register.
659 Example : $asm_mapper->register_assembled(
'X', 4 );
660 Description: Flags a given assembled region as registered in
this
661 assembly mapper. This should only be called by
this
662 class or the AssemblyMapperAdaptor. Do not call
this
663 method unless you really know what you are doing.
665 Exceptions : Throws on incorrect arguments
666 Caller : Internal, AssemblyMapperAdaptor
671 sub register_assembled {
672 my ( $self, $asm_seq_region, $chunk_id ) = @_;
674 if ( !defined($asm_seq_region) ) {
675 throw(
'asm_seq_region argument is required');
677 if ( !defined($chunk_id) ) {
678 throw(
'chunk_id srgument is required');
681 $self->{
'asm_register'}->{$asm_seq_region}->{$chunk_id} = 1;
687 Example : $mapper = $asm_mapper->mapper();
688 Description: Retrieves the
internal mapper used by
this Assembly
689 Mapper. This is unlikely to be useful unless you
690 _really_ know what you are doing.
693 Caller : Internal, AssemblyMapperAdaptor
701 return $self->{
'mapper'};
704 =head2 assembled_CoordSystem
707 Example : $cs = $asm_mapper->assembled_CoordSystem();
708 Description: Retrieves the assembled CoordSystem from
this
712 Caller : Internal, AssemblyMapperAdaptor
717 sub assembled_CoordSystem {
720 return $self->{
'asm_cs'};
723 =head2 component_CoordSystem
726 Example : $cs = $asm_mapper->component_CoordSystem();
727 Description: Retrieves the component CoordSystem from
this
731 Caller : Internal, AssemblyMapperAdaptor
736 sub component_CoordSystem {
739 return $self->{
'cmp_cs'};
745 Description: Getter/set terfor
this object's database adaptor.
746 Returntype : Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
754 my ( $self, $value ) = @_;
756 if ( defined($value) ) {
757 weaken($self->{'adaptor
'} = $value);
760 return $self->{'adaptor
'};