3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
40 -host =>
'ensembldb.ensembl.org',
50 my $chr33_cs = $csa->fetch_by_name(
'chromosome',
'NCBI33' );
51 my $chr34_cs = $csa->fetch_by_name(
'chromosome',
'NCBI34' );
52 my $ctg_cs = $csa->fetch_by_name(
'contig');
53 my $clone_cs = $csa->fetch_by_name(
'clone');
56 $asma->fetch_by_CoordSystems( $chr33_cs, $ctg_cs );
58 my $ncbi33_ncbi34_mapper =
59 $asm_adptr->fetch_by_CoordSystems( $chr33, $chr34 );
61 my $ctg_clone_mapper =
62 $asm_adptr->fetch_by_CoordSystems( $ctg_cs, $clone_cs );
67 Adaptor
for handling Assembly mappers. This is a I<Singleton>
class.
68 ie: There is only one per database (C<DBAdaptor>).
70 This is used to retrieve mappers between any two coordinate systems
71 whose makeup is described by the assembly table. Currently one step
72 (
explicit) and two step (implicit) pairwise mapping is supported. In
73 one-step mapping an explicit relationship between the coordinate systems
74 is defined in the assembly table. In two-step 'chained' mapping no
75 explicit mapping is present but the coordinate systems must share a
76 common mapping to an intermediate coordinate system.
91 use
Bio::
EnsEMBL::Utils::Cache;
#CPAN LRU cache
95 use integer; #
do proper arithmetic bitshifts
100 my $CHUNKFACTOR = 20; # 2^20 = approx. 10^6
104 Arg [1] : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor
for
105 the database
this assembly mapper is
using.
106 Example : my $asma =
new Bio::EnsEMBL::AssemblyMapperAdaptor($dbadaptor);
116 my($class, $dbadaptor) = @_;
118 my $self = $class->SUPER::new($dbadaptor);
120 $self->{
'_asm_mapper_cache'} = {};
122 # use a shared cache (for this database) that contains info about
125 $self->{
'sr_name_cache'} = $seq_region_cache->{
'name_cache'};
126 $self->{
'sr_id_cache'} = $seq_region_cache->{
'id_cache'};
133 =head2 cache_seq_ids_with_mult_assemblys
135 Example : $self->adaptor->cache_seq_ids_with_mult_assemblys();
136 Description: Creates a hash of the component seq region ids that
137 map to more than one assembly from the assembly table.
145 sub cache_seq_ids_with_mult_assemblys{
149 return if (defined($self->{
'multi_seq_ids'}));
151 $self->{
'multi_seq_ids'} = {};
154 SELECT sra.seq_region_id
155 FROM seq_region_attrib sra,
159 WHERE sra.attrib_type_id = at.attrib_type_id
160 AND code =
"MultAssem"
161 AND sra.seq_region_id = sr.seq_region_id
162 AND sr.coord_system_id = cs.coord_system_id
163 AND cs.species_id = ?);
165 my $sth = $self->prepare($sql);
167 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
173 $sth->bind_columns(\$seq_region_id);
175 while($sth->fetch()) {
176 $self->{
'multi_seq_ids'}->{$seq_region_id} = 1;
182 =head2 fetch_by_CoordSystems
185 One of the coordinate systems to retrieve the mapper
188 The other coordinate system to
map between
189 Description: Retrieves an Assembly mapper
for two coordinate
190 systems whose relationship is described in the
193 The ordering of the coodinate systems is arbitrary.
194 The following two statements are equivalent:
195 $mapper = $asma->fetch_by_CoordSystems($cs1,$cs2);
196 $mapper = $asma->fetch_by_CoordSystems($cs2,$cs1);
198 Exceptions : wrong argument types
204 sub fetch_by_CoordSystems {
209 if(!ref($cs1) || !$cs1->isa(
'Bio::EnsEMBL::CoordSystem')) {
210 throw(
"cs1 argument must be a Bio::EnsEMBL::CoordSystem.");
212 if(!ref($cs2) || !$cs2->isa(
'Bio::EnsEMBL::CoordSystem')) {
213 throw(
"cs2 argument must be a Bio::EnsEMBL::CoordSystem.");
216 # if($cs1->equals($cs2)) {
217 # throw("Cannot create mapper between same coord systems: " .
218 # $cs1->name . " " . $cs1->version);
221 if($cs1->is_top_level()) {
224 if($cs2->is_top_level()) {
228 my $csa = $self->db->get_CoordSystemAdaptor();
230 #retrieve the shortest possible mapping path between these systems
231 my @mapping_path = @{$csa->get_mapping_path($cs1,$cs2)};
238 my $key = join(
':',
map({defined($_)?$_->dbID():
"-"} @mapping_path));
240 my $asm_mapper = $self->{
'_asm_mapper_cache'}->{$key};
242 return $asm_mapper
if($asm_mapper);
244 if(@mapping_path == 1) {
245 throw(
"Incorrect mapping path defined in meta table. " .
246 "0 step mapping encountered between:\n" .
247 $cs1->name() .
" " . $cs1->version() .
" and " . $cs2->name .
" " .
251 if(@mapping_path == 2) {
252 #1 step regular mapping
255 # If you want multiple pieces on two seqRegions to map to each other
256 # you need to make an assembly.mapping entry that is seperated with a #
259 $self->{
'_asm_mapper_cache'}->{$key} = $asm_mapper;
263 if(@mapping_path == 3) {
264 #two step chained mapping
266 #in multi-step mapping it is possible get requests with the
267 #coordinate system ordering reversed since both mappings directions
268 #cache on both orderings just in case
269 #e.g. chr <-> contig <-> clone and clone <-> contig <-> chr
271 $self->{
'_asm_mapper_cache'}->{$key} = $asm_mapper;
272 $key = join(
':',
map({defined($_)?$_->dbID():
"-"} reverse(@mapping_path)));
273 $self->{
'_asm_mapper_cache'}->{$key} = $asm_mapper;
277 throw(
"Only 1 and 2 step coordinate system mapping is currently\n" .
278 "supported. Mapping between " .
279 $cs1->name() .
" " . $cs1->version() .
" and " .
280 $cs2->name() .
" " . $cs2->version() .
281 " requires ". (scalar(@mapping_path)-1) .
" steps.");
286 =head2 register_assembled
290 Arg [2] : integer $asm_seq_region
291 The dbID of the seq_region to be registered
292 Arg [3] :
int $asm_start
293 The start of the region to be registered
294 Arg [4] :
int $asm_end
295 The end of the region to be registered
297 This extracts the relevant data from the assembly
298 table and stores it in Mapper
internal to the $asm_mapper.
299 It therefore must be called before any mapping is
300 attempted on that region. Otherwise only gaps will
302 calls
this method when the need arises.
304 Exceptions :
throw if the seq_region to be registered does not exist
305 or
if it associated with multiple assembled pieces (bad data
313 sub register_assembled {
315 my $asm_mapper = shift;
316 my $asm_seq_region = shift;
317 my $asm_start = shift;
320 if(!ref($asm_mapper) || !$asm_mapper->isa(
'Bio::EnsEMBL::AssemblyMapper')) {
321 throw(
"Bio::EnsEMBL::AssemblyMapper argument expected");
324 throw(
"asm_seq_region argument expected")
if(!defined($asm_seq_region));
325 throw(
"asm_start argument expected")
if(!defined($asm_start));
326 throw(
"asm_end argument expected")
if(!defined($asm_end));
329 my $cmp_cs_id = $asm_mapper->component_CoordSystem->dbID();
331 #split up the region to be registered into fixed chunks
332 #this allows us to keep track of regions that have already been
333 #registered and also works under the assumption that if a small region
334 #is requested it is likely that other requests will be made in the
335 #vicinity (the minimum size registered the chunksize (2^chunkfactor)
338 #determine span of chunks
339 #bitwise shift right is fast and easy integer division
341 my($start_chunk, $end_chunk);
343 $start_chunk = $asm_start >> $CHUNKFACTOR;
344 $end_chunk = $asm_end >> $CHUNKFACTOR;
346 # inserts have start = end + 1, on boundary condition start_chunk
347 # could be less than end chunk
348 if($asm_start == $asm_end + 1) {
349 ($start_chunk, $end_chunk) = ($end_chunk, $start_chunk);
352 #find regions of continuous unregistered chunks
354 my ($begin_chunk_region,$end_chunk_region);
355 for ($i = $start_chunk; $i <= $end_chunk; $i++) {
356 if($asm_mapper->have_registered_assembled($asm_seq_region, $i)) {
357 if(defined($begin_chunk_region)) {
358 #this is the end of an unregistered region.
359 my $region = [($begin_chunk_region << $CHUNKFACTOR),
360 (($end_chunk_region+1) << $CHUNKFACTOR)-1];
361 push @chunk_regions, $region;
362 $begin_chunk_region = $end_chunk_region = undef;
365 $begin_chunk_region = $i
if(!defined($begin_chunk_region));
366 $end_chunk_region = $i+1;
367 $asm_mapper->register_assembled($asm_seq_region,$i);
371 #the last part may have been an unregistered region too
372 if(defined($begin_chunk_region)) {
373 my $region = [($begin_chunk_region << $CHUNKFACTOR),
374 (($end_chunk_region+1) << $CHUNKFACTOR) -1];
375 push @chunk_regions, $region;
378 return if(!@chunk_regions);
380 # keep the Mapper to a reasonable size
381 if( $asm_mapper->size() > $asm_mapper->max_pair_count() ) {
382 $asm_mapper->flush();
383 #we now have to go and register the entire requested region since we
384 #just flushed everything
386 @chunk_regions = ( [ ( $start_chunk << $CHUNKFACTOR)
387 , (($end_chunk+1) << $CHUNKFACTOR)-1 ] );
389 for( my $i = $start_chunk; $i <= $end_chunk; $i++ ) {
390 $asm_mapper->register_assembled( $asm_seq_region, $i );
394 # my $asm_seq_region_id =
395 # $self->_seq_region_name_to_id($asm_seq_region,$asm_cs_id);
397 # Retrieve the description of how the assembled region is made from
398 # component regions for each of the continuous blocks of unregistered,
405 asm.cmp_seq_region_id,
412 assembly
asm, seq_region sr
414 asm.asm_seq_region_id = ? AND
416 ? >=
asm.asm_start AND
417 asm.cmp_seq_region_id = sr.seq_region_id AND
418 sr.coord_system_id = ?
421 my $sth = $self->prepare($q);
423 foreach my $region (@chunk_regions) {
424 my($region_start, $region_end) = @$region;
425 $sth->bind_param(1,$asm_seq_region,SQL_INTEGER);
426 $sth->bind_param(2,$region_start,SQL_INTEGER);
427 $sth->bind_param(3,$region_end,SQL_INTEGER);
428 $sth->bind_param(4,$cmp_cs_id,SQL_INTEGER);
432 my($cmp_start, $cmp_end, $cmp_seq_region_id, $cmp_seq_region, $ori,
433 $asm_start, $asm_end, $cmp_seq_region_length);
435 $sth->bind_columns(\$cmp_start, \$cmp_end, \$cmp_seq_region_id,
436 \$cmp_seq_region, \$cmp_seq_region_length, \$ori,
437 \$asm_start, \$asm_end);
440 # Load the unregistered regions of the mapper
442 while($sth->fetch()) {
443 next
if($asm_mapper->have_registered_component($cmp_seq_region_id)
444 and !defined($self->{
'multi_seq_ids'}->{$cmp_seq_region_id}));
445 $asm_mapper->register_component($cmp_seq_region_id);
446 $asm_mapper->mapper->add_map_coordinates(
447 $asm_seq_region, $asm_start, $asm_end,
449 $cmp_seq_region_id, $cmp_start, $cmp_end);
451 my $arr = [ $cmp_seq_region_id, $cmp_seq_region,
452 $cmp_cs_id, $cmp_seq_region_length ];
454 $self->{
'sr_name_cache'}->{
"$cmp_seq_region:$cmp_cs_id"} = $arr;
455 $self->{
'sr_id_cache'}->{
"$cmp_seq_region_id"} = $arr;
464 sub _seq_region_name_to_id {
469 if(!defined($sr_name) or
471 throw(
'seq_region_name and coord_system_id args are required');
474 my $arr = $self->{
'sr_name_cache'}->{
"$sr_name:$cs_id"};
479 # Get the seq_region_id via the name. This would be quicker if we just
480 # used internal ids instead but stored but then we lose the ability
481 # the transform accross databases with different internal ids
483 my $sth = $self->prepare(
"SELECT seq_region_id, length " .
485 "WHERE name = ? AND coord_system_id = ?");
487 $sth->bind_param(1,$sr_name,SQL_VARCHAR);
488 $sth->bind_param(2,$cs_id,SQL_INTEGER);
491 my @row = $sth->fetchrow_array();
493 throw(
"No-existent seq_region [$sr_name] in coord system $cs_id");
495 my @more = $sth->fetchrow_array();
497 throw(
"Ambiguous seq_region [$sr_name] in coord system $cs_id");
500 my ($sr_id, $sr_length) = @row;
503 $arr = [ $sr_id, $sr_name, $cs_id, $sr_length ];
505 $self->{
'sr_name_cache'}->{
"$sr_name:$cs_id"} = $arr;
506 $self->{
'sr_id_cache'}->{
"$sr_id"} = $arr;
511 sub _seq_region_id_to_name {
515 ($sr_id) ||
throw(
'seq_region_is required');
517 my $arr = $self->{
'sr_id_cache'}->{
"$sr_id"};
522 # Get the seq_region name via the id. This would be quicker if we just
523 # used internal ids instead but stored but then we lose the ability
524 # the transform accross databases with different internal ids
526 my $sth = $self->prepare(
"SELECT name, length ,coord_system_id " .
528 "WHERE seq_region_id = ? ");
530 $sth->bind_param(1,$sr_id,SQL_INTEGER);
533 my @row = $sth->fetchrow_array();
534 if(!$sth->rows() == 1) {
535 throw(
"non-existant seq_region [$sr_id]");
538 my ($sr_name, $sr_length, $cs_id) = @row;
541 $arr = [ $sr_id, $sr_name, $cs_id, $sr_length ];
543 $self->{
'sr_name_cache'}->{
"$sr_name:$cs_id"} = $arr;
544 $self->{
'sr_id_cache'}->{
"$sr_id"} = $arr;
550 =head2 register_component
554 Arg [2] : integer $cmp_seq_region
555 The dbID of the seq_region to be registered
557 This extracts the relevant data from the assembly
558 table and stores it in Mapper
internal to the $asm_mapper.
559 It therefore must be called before any mapping is
560 attempted on that region. Otherwise only gaps will
562 calls
this method when the need arises.
564 Exceptions :
throw if the seq_region to be registered does not exist
565 or
if it associated with multiple assembled pieces (bad data
572 sub register_component {
574 my $asm_mapper = shift;
575 my $cmp_seq_region = shift;
577 if(!ref($asm_mapper) || !$asm_mapper->isa(
'Bio::EnsEMBL::AssemblyMapper')) {
578 throw(
"Bio::EnsEMBL::AssemblyMapper argument expected");
581 if(!defined($cmp_seq_region)) {
582 throw(
"cmp_seq_region argument expected");
586 my $asm_cs_id = $asm_mapper->assembled_CoordSystem()->dbID();
588 #do nothing if this region is already registered or special case
589 return if($asm_mapper->have_registered_component($cmp_seq_region)
590 and !defined($self->{
'multi_seq_ids'}->{$cmp_seq_region}));
592 # my $cmp_seq_region_id =
593 # $self->_seq_region_name_to_id($cmp_seq_region, $cmp_cs_id);
595 # Determine what part of the assembled region this component region makes up
601 asm.asm_seq_region_id,
605 assembly
asm, seq_region sr
607 asm.cmp_seq_region_id = ? AND
608 asm.asm_seq_region_id = sr.seq_region_id AND
609 sr.coord_system_id = ?
612 my $sth = $self->prepare($q);
613 $sth->bind_param(1,$cmp_seq_region,SQL_INTEGER);
614 $sth->bind_param(2,$asm_cs_id,SQL_INTEGER);
617 my @rows = $sth->fetchrow_array();
619 if($sth->rows() == 0) {
620 #this component is not used in the assembled part i.e. gap
621 $asm_mapper->register_component($cmp_seq_region);
626 #we do not currently support components mapping to multiple assembled
627 # make sure that you've got the correct mapping in the meta-table :
628 # chromosome:EquCab2#contig ( use'#' for multiple mappings )
629 # chromosome:EquCab2|contig ( use '|' delimiter for 1-1 mappings )
631 my @more = $sth->fetchrow_array();
632 if($sth->rows() != 1) {
634 throw(
"Multiple assembled regions for single " .
635 "component region cmp_seq_region_id=[$cmp_seq_region]\n".
636 "Remember that multiple mappings use the \#-operaator".
637 " in the meta-table (i.e. chromosome:EquCab2\#contig\n");
640 my ($asm_start, $asm_end, $asm_seq_region_id,
641 $asm_seq_region, $asm_seq_region_length) = @rows;
643 my $arr = [ $asm_seq_region_id, $asm_seq_region,
644 $asm_cs_id, $asm_seq_region_length ];
646 $self->{
'sr_name_cache'}->{
"$asm_seq_region:$asm_cs_id"} = $arr;
647 $self->{
'sr_id_cache'}->{
"$asm_seq_region_id"} = $arr;
651 # Register the corresponding assembled region. This allows a us to
652 # register things in assembled chunks which allows us to:
653 # (1) Keep track of what assembled regions are registered
654 # (2) Use locality of reference (if they want something in same general
655 # region it will already be registered).
657 $self->register_assembled($asm_mapper,$asm_seq_region_id,$asm_start,$asm_end);
662 =head2 register_chained
665 The chained assembly mapper to
register regions on
666 Arg [2] :
string $from (
'first' or
'last')
667 The direction we are registering from, and the name of the
669 Arg [3] :
string $seq_region_name
670 The name of the seqregion we are registering on
671 Arg [4] : listref $ranges
672 A list of ranges to
register (in [$start,$end] tuples).
673 Arg [5] : (optional) $to_slice
674 Only
register those on
this Slice.
675 Description: Registers a set of ranges on a chained assembly mapper.
676 This
function is at the heart of the chained mapping process.
677 It retrieves information from the assembly table and
678 dynamically constructs the mappings between two coordinate
679 systems which are 2 mapping steps apart. It does
this by
using
680 two
internal mappers to load up a third mapper which is
681 actually used by the ChainedAssemblyMapper to perform the
684 This method must be called before any mapping is
685 attempted on regions of interest, otherwise only gaps will
686 be returned. Note that the ChainedAssemblyMapper automatically
687 calls
this method when the need arises.
689 Exceptions :
throw if the seq_region to be registered does not exist
690 or
if it associated with multiple assembled pieces (bad data
693 throw if the mapping between the coordinate systems cannot
694 be performed in two steps, which means there is an
internal
695 error in the data in the meta table or in the code that creates
702 sub register_chained {
704 my $casm_mapper = shift;
706 my $seq_region_id = shift;
708 my $to_slice = shift;
710 my $to_seq_region_id;
711 if(defined($to_slice)){
712 if($casm_mapper->first_CoordSystem()->equals($casm_mapper->last_CoordSystem())){
713 return $self->_register_chained_special($casm_mapper, $from, $seq_region_id, $ranges, $to_slice);
715 $to_seq_region_id = $to_slice->get_seq_region_id();
716 if(!defined($to_seq_region_id)){
717 die
"Could not get seq_region_id for to_slice".$to_slice->seq_region_name.
"\n";
721 my ($start_name, $start_mid_mapper, $start_cs, $start_registry,
722 $end_name, $end_mid_mapper, $end_cs, $end_registry);
724 if($from eq
'first') {
725 $start_name =
'first';
726 $start_mid_mapper = $casm_mapper->first_middle_mapper();
727 $start_cs = $casm_mapper->first_CoordSystem();
728 $start_registry = $casm_mapper->first_registry();
729 $end_mid_mapper = $casm_mapper->last_middle_mapper();
730 $end_cs = $casm_mapper->last_CoordSystem();
731 $end_registry = $casm_mapper->last_registry();
733 } elsif($from eq
'last') {
734 $start_name =
'last';
735 $start_mid_mapper = $casm_mapper->last_middle_mapper();
736 $start_cs = $casm_mapper->last_CoordSystem();
737 $start_registry = $casm_mapper->last_registry();
738 $end_mid_mapper = $casm_mapper->first_middle_mapper();
739 $end_cs = $casm_mapper->first_CoordSystem();
740 $end_registry = $casm_mapper->first_registry();
743 throw(
"Invalid from argument: [$from], must be 'first' or 'last'");
746 my $combined_mapper = $casm_mapper->first_last_mapper();
747 my $mid_cs = $casm_mapper->middle_CoordSystem();
748 my $mid_name =
'middle';
749 my $csa = $self->db->get_CoordSystemAdaptor();
751 # Check for the simple case where the ChainedMapper is short
752 if( ! defined $mid_cs ) {
753 $start_mid_mapper = $combined_mapper;
758 # obtain the first half of the mappings and load them into the start mapper
761 #ascertain which is component and which is actually assembled coord system
764 # check for the simple case, where the ChainedMapper is short
765 if( defined $mid_cs ) {
766 @path = @{$csa->get_mapping_path($start_cs, $mid_cs)};
768 @path = @{$csa->get_mapping_path( $start_cs, $end_cs )};
771 if(@path != 2 && defined( $path[1] )) {
772 my $path = join(
',',
map({$_->name .
' '. $_->version} @path));
773 my $len = scalar(@path) - 1;
774 throw(
"Unexpected mapping path between start and intermediate " .
775 "coord systems (". $start_cs->name .
" " . $start_cs->version .
776 " and " . $mid_cs->name .
" " . $mid_cs->version .
")." .
777 "\nExpected path length 1, got $len. " .
782 my ($asm_cs,$cmp_cs);
786 #the SQL varies depending on whether we are coming from assembled or
787 #component coordinate system
789 my $asm2cmp = (<<ASMCMP);
793 asm.cmp_seq_region_id,
800 assembly
asm, seq_region sr
802 asm.asm_seq_region_id = ? AND
804 ? >=
asm.asm_start AND
805 asm.cmp_seq_region_id = sr.seq_region_id AND
806 sr.coord_system_id = ?
810 my $cmp2asm = (<<CMPASM);
814 asm.asm_seq_region_id,
821 assembly
asm, seq_region sr
823 asm.cmp_seq_region_id = ? AND
825 ? >=
asm.cmp_start AND
826 asm.asm_seq_region_id = sr.seq_region_id AND
827 sr.coord_system_id = ?
832 if(defined($to_slice)){
833 my $to_cs = $to_slice->coord_system;
834 if($asm_cs->equals($to_cs)){
835 $asm2cmp_sth = $self->prepare($asm2cmp);
836 $cmp2asm_sth = $self->prepare($cmp2asm.
" AND asm.asm_seq_region_id = $to_seq_region_id");
838 elsif($cmp_cs->equals($to_cs)){
839 $asm2cmp_sth = $self->prepare($asm2cmp.
" AND asm.cmp_seq_region_id = $to_seq_region_id");
840 $cmp2asm_sth = $self->prepare($cmp2asm);
843 $asm2cmp_sth = $self->prepare($asm2cmp);
844 $cmp2asm_sth = $self->prepare($cmp2asm);
848 $asm2cmp_sth = $self->prepare($asm2cmp);
849 $cmp2asm_sth = $self->prepare($cmp2asm);
854 $sth = ($asm_cs->equals($start_cs)) ? $asm2cmp_sth : $cmp2asm_sth;
858 # check for the simple case where the ChainedMapper is short
859 if( defined $mid_cs ) {
860 $mid_cs_id = $mid_cs->dbID();
862 $mid_cs_id = $end_cs->dbID();
868 #need to perform the query for each unregistered range
869 foreach my $range (@$ranges) {
870 my ($start, $end) = @$range;
871 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
872 $sth->bind_param(2,$start,SQL_INTEGER);
873 $sth->bind_param(3,$end,SQL_INTEGER);
874 $sth->bind_param(4,$mid_cs_id,SQL_INTEGER);
877 #load the start <-> mid mapper with the results and record the mid cs
878 #ranges we just added to the mapper
880 my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
881 $ori, $start_start, $start_end);
883 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
884 \$mid_seq_region, \$mid_length, \$ori, \$start_start,
887 while($sth->fetch()) {
889 if( defined $mid_cs ) {
890 $start_mid_mapper->add_map_coordinates
892 $seq_region_id,$start_start, $start_end, $ori,
893 $mid_seq_region_id, $mid_start, $mid_end
896 if( $from eq
"first" ) {
897 $combined_mapper->add_map_coordinates
899 $seq_region_id,$start_start, $start_end, $ori,
900 $mid_seq_region_id, $mid_start, $mid_end
903 $combined_mapper->add_map_coordinates
905 $mid_seq_region_id, $mid_start, $mid_end, $ori,
906 $seq_region_id,$start_start, $start_end
911 #update sr_name cache
912 my $arr = [ $mid_seq_region_id, $mid_seq_region,
913 $mid_cs_id, $mid_length ];
915 $self->{
'sr_name_cache'}->{
"$mid_seq_region:$mid_cs_id"} = $arr;
916 $self->{
'sr_id_cache'}->{
"$mid_seq_region_id"} = $arr;
918 push @mid_ranges,[$mid_seq_region_id,$mid_seq_region,
919 $mid_start,$mid_end];
920 push @start_ranges, [ $seq_region_id, $start_start, $start_end ];
922 #the region that we actually register may actually be larger or smaller
923 #than the region that we wanted to register.
924 #register the intersection of the region so we do not end up doing
927 if($start_start < $start || $start_end > $end) {
928 $start_registry->check_and_register($seq_region_id,$start_start,
935 # in the one step case, we load the mid ranges in the
936 # last_registry and we are done
937 if( ! defined $mid_cs ) {
938 for my $range ( @mid_ranges ) {
939 $end_registry->check_and_register( $range->[0], $range->[2],
943 # and thats it for the simple case ...
949 # now the second half of the mapping
950 # perform another query and load the mid <-> end mapper using the mid cs
954 #ascertain which is component and which is actually assembled coord system
955 @path = @{$csa->get_mapping_path($mid_cs, $end_cs)};
956 if(@path == 2 || ( @path == 3 && !defined $path[1])) {
961 my $path = join(
',',
map({$_->name .
' '. $_->version} @path));
962 my $len = scalar(@path)-1;
963 throw(
"Unexpected mapping path between intermediate and last" .
964 "coord systems (". $mid_cs->name .
" " . $mid_cs->version .
965 " and " . $end_cs->name .
" " . $end_cs->version .
")." .
966 "\nExpected path length 1, got $len. " .
970 if(defined($to_slice)){
971 my $to_cs = $to_slice->coord_system;
972 if($asm_cs->equals($to_cs)){
973 $asm2cmp_sth = $self->prepare($asm2cmp);
974 $cmp2asm_sth = $self->prepare($cmp2asm.
" AND asm.asm_seq_region_id = $to_seq_region_id");
976 elsif($cmp_cs->equals($to_cs)){
977 $asm2cmp_sth = $self->prepare($asm2cmp.
" AND asm.cmp_seq_region_id = $to_seq_region_id");
978 $cmp2asm_sth = $self->prepare($cmp2asm);
981 $asm2cmp_sth = $self->prepare($asm2cmp);
982 $cmp2asm_sth = $self->prepare($cmp2asm);
986 $sth = ($asm_cs->equals($mid_cs)) ? $asm2cmp_sth : $cmp2asm_sth;
988 my $end_cs_id = $end_cs->dbID();
989 foreach my $mid_range (@mid_ranges) {
990 my ($mid_seq_region_id, $mid_seq_region,$start, $end) = @$mid_range;
991 $sth->bind_param(1,$mid_seq_region_id,SQL_INTEGER);
992 $sth->bind_param(2,$start,SQL_INTEGER);
993 $sth->bind_param(3,$end,SQL_INTEGER);
994 $sth->bind_param(4,$end_cs_id,SQL_INTEGER);
997 #load the end <-> mid mapper with the results and record the mid cs
998 #ranges we just added to the mapper
1000 my ($end_start, $end_end, $end_seq_region_id, $end_seq_region, $end_length,
1001 $ori, $mid_start, $mid_end);
1003 $sth->bind_columns(\$end_start, \$end_end, \$end_seq_region_id,
1004 \$end_seq_region, \$end_length, \$ori, \$mid_start,
1007 while($sth->fetch()) {
1008 $end_mid_mapper->add_map_coordinates
1010 $end_seq_region_id, $end_start, $end_end, $ori,
1011 $mid_seq_region_id, $mid_start, $mid_end
1014 #update sr_name cache
1015 my $arr = [ $end_seq_region_id,$end_seq_region,$end_cs_id,$end_length ];
1017 $self->{
'sr_name_cache'}->{
"$end_seq_region:$end_cs_id"} = $arr;
1018 $self->{
'sr_id_cache'}->{
"$end_seq_region_id"} = $arr;
1020 #register this region on the end coord system
1021 $end_registry->check_and_register($end_seq_region_id, $end_start, $end_end);
1027 # Now that both halves are loaded
1028 # Do stepwise mapping using both of the loaded mappers to load
1029 # the final start <-> end mapper
1032 _build_combined_mapper(\@start_ranges, $start_mid_mapper, $end_mid_mapper,
1033 $combined_mapper, $start_name);
1039 =head2 _register_chained_special
1042 The chained assembly mapper to
register regions on
1043 Arg [2] :
string $from (
'first' or
'last')
1044 The direction we are registering from, and the name of the
1046 Arg [3] :
string $seq_region_name
1047 The name of the seqregion we are registering on
1048 Arg [4] : listref $ranges
1049 A list of ranges to
register (in [$start,$end] tuples).
1050 Arg [5] : (optional) $to_slice
1051 Only
register those on
this Slice.
1052 Description: Registers a set of ranges on a chained assembly mapper.
1053 This
function is at the heart of the chained mapping process.
1054 It retrieves information from the assembly table and
1055 dynamically constructs the mappings between two coordinate
1056 systems which are 2 mapping steps apart. It does
this by
using
1057 two
internal mappers to load up a third mapper which is
1058 actually used by the ChainedAssemblyMapper to perform the
1061 This method must be called before any mapping is
1062 attempted on regions of interest, otherwise only gaps will
1063 be returned. Note that the ChainedAssemblyMapper automatically
1064 calls
this method when the need arises.
1066 Exceptions :
throw if the seq_region to be registered does not exist
1067 or
if it associated with multiple assembled pieces (bad data
1070 throw if the mapping between the coordinate systems cannot
1071 be performed in two steps, which means there is an
internal
1072 error in the data in the meta table or in the code that creates
1079 sub _register_chained_special {
1081 my $casm_mapper = shift;
1083 my $seq_region_id = shift;
1085 my $to_slice = shift;
1088 my $sth = $self->prepare(
"SELECT
1091 asm.cmp_seq_region_id,
1098 assembly asm, seq_region sr
1100 asm.asm_seq_region_id = ? AND
1101 ? <= asm.asm_end AND
1102 ? >= asm.asm_start AND
1103 asm.cmp_seq_region_id = sr.seq_region_id AND
1104 sr.coord_system_id = ? AND
1105 asm.cmp_seq_region_id = ?");
1108 my ($start_name, $start_mid_mapper, $start_cs, $start_registry,
1109 $end_name, $end_mid_mapper, $end_cs, $end_registry);
1111 if($from eq
'first') {
1112 $start_name =
'first';
1113 $start_mid_mapper = $casm_mapper->first_middle_mapper();
1114 $start_cs = $casm_mapper->first_CoordSystem();
1115 $start_registry = $casm_mapper->first_registry();
1116 $end_mid_mapper = $casm_mapper->last_middle_mapper();
1117 $end_cs = $casm_mapper->last_CoordSystem();
1118 $end_registry = $casm_mapper->last_registry();
1120 } elsif($from eq
'last') {
1121 $start_name =
'last';
1122 $start_mid_mapper = $casm_mapper->last_middle_mapper();
1123 $start_cs = $casm_mapper->last_CoordSystem();
1124 $start_registry = $casm_mapper->last_registry();
1125 $end_mid_mapper = $casm_mapper->first_middle_mapper();
1126 $end_cs = $casm_mapper->first_CoordSystem();
1127 $end_registry = $casm_mapper->first_registry();
1128 $end_name =
'first';
1130 throw(
"Invalid from argument: [$from], must be 'first' or 'last'");
1133 my $combined_mapper = $casm_mapper->first_last_mapper();
1134 my $mid_cs = $casm_mapper->middle_CoordSystem();
1135 my $mid_name =
'middle';
1136 my $csa = $self->db->get_CoordSystemAdaptor();
1138 # Check for the simple case where the ChainedMapper is short
1139 if( ! defined $mid_cs ) {
1140 $start_mid_mapper = $combined_mapper;
1145 if( defined $mid_cs ) {
1146 @path = @{$csa->get_mapping_path($start_cs, $mid_cs)};
1148 @path = @{$csa->get_mapping_path( $start_cs, $end_cs )};
1150 if( ! defined $mid_cs ) {
1151 $start_mid_mapper = $combined_mapper;
1154 if(@path != 2 && defined( $path[1] )) {
1155 my $path = join(
',',
map({$_->name .
' '. $_->version} @path));
1156 my $len = scalar(@path) - 1;
1157 throw(
"Unexpected mapping path between start and intermediate " .
1158 "coord systems (". $start_cs->name .
" " . $start_cs->version .
1159 " and " . $mid_cs->name .
" " . $mid_cs->version .
")." .
1160 "\nExpected path length 1, got $len. " .
1164 my ($asm_cs,$cmp_cs);
1166 $cmp_cs = $path[-1];
1168 $combined_mapper = $casm_mapper->first_last_mapper();
1169 $mid_cs = $casm_mapper->middle_CoordSystem();
1170 $mid_name =
'middle';
1171 $csa = $self->db->get_CoordSystemAdaptor();
1175 # Check for the simple case where the ChainedMapper is short
1176 if ( !defined $mid_cs ) {
1177 $start_mid_mapper = $combined_mapper;
1179 $mid_cs_id = $mid_cs->dbID();
1185 my $to_cs = $to_slice->coord_system;
1186 foreach my $direction (1, 0){
1190 $id1 = $seq_region_id;
1191 $id2 = $to_slice->get_seq_region_id();
1194 $id2 = $seq_region_id;
1195 $id1 = $to_slice->get_seq_region_id();
1198 foreach my $range (@$ranges) {
1199 my ($start, $end) = @$range;
1200 $sth->bind_param(1,$id1,SQL_INTEGER);
1201 $sth->bind_param(2,$start,SQL_INTEGER);
1202 $sth->bind_param(3,$end,SQL_INTEGER);
1203 $sth->bind_param(4,$to_cs->dbID,SQL_INTEGER);
1204 $sth->bind_param(5,$id2,SQL_INTEGER);
1207 my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
1208 $ori, $start_start, $start_end);
1210 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
1211 \$mid_seq_region, \$mid_length, \$ori, \$start_start,
1214 while($sth->fetch()) {
1217 if( defined $mid_cs ) {
1218 $start_mid_mapper->add_map_coordinates
1220 $id1,$start_start, $start_end, $ori,
1221 $mid_seq_region_id, $mid_start, $mid_end
1224 if( $from eq
"first") {
1226 $combined_mapper->add_map_coordinates
1228 $id1,$start_start, $start_end, $ori,
1229 $mid_seq_region_id, $mid_start, $mid_end
1233 $combined_mapper->add_map_coordinates
1235 $mid_seq_region_id, $mid_start, $mid_end, $ori,
1236 $id1,$start_start, $start_end
1241 $combined_mapper->add_map_coordinates
1243 $mid_seq_region_id, $mid_start, $mid_end, $ori,
1244 $id1,$start_start, $start_end
1248 $combined_mapper->add_map_coordinates
1250 $id1,$start_start, $start_end, $ori,
1251 $mid_seq_region_id, $mid_start, $mid_end
1257 #update sr_name cache
1258 my $arr = [ $mid_seq_region_id, $mid_seq_region,
1259 $mid_cs_id, $mid_length ];
1261 $self->{
'sr_name_cache'}->{
"$mid_seq_region:$mid_cs_id"} = $arr;
1262 $self->{
'sr_id_cache'}->{
"$mid_seq_region_id"} = $arr;
1264 push @mid_ranges,[$mid_seq_region_id,$mid_seq_region,
1265 $mid_start,$mid_end];
1266 push @start_ranges, [ $id1, $start_start, $start_end ];
1268 #the region that we actually register may actually be larger or smaller
1269 #than the region that we wanted to register.
1270 #register the intersection of the region so we do not end up doing
1273 if($start_start < $start || $start_end > $end) {
1274 $start_registry->check_and_register($id1,$start_start,
1281 if( ! defined $mid_cs ) {
1282 for my $range ( @mid_ranges ) {
1283 $end_registry->check_and_register( $range->[0], $range->[2],
1287 # and thats it for the simple case ...
1298 Example : $mapper = $asm_mapper_adaptor->fetch_by_CoordSystems($cs1,$cs2);
1300 # make cache large enough to hold all of the mappings
1302 $asm_mapper_adaptor->register_all($mapper);
1304 # perform mappings as normal
1305 $mapper->map($slice->seq_region_name(), $sr_start, $sr_end,
1308 Description: This
function registers the entire set of mappings between
1309 two coordinate systems in an assembly mapper.
1310 This will use a lot of memory but will be much more efficient
1311 when doing a lot of mapping which is spread over the entire
1315 Caller : specialised prograhsm
1324 my $asm_cs_id = $mapper->assembled_CoordSystem()->dbID();
1325 my $cmp_cs_id = $mapper->component_CoordSystem()->dbID();
1327 # retrieve every relevant assembled/component pair from the assembly table
1333 asm.cmp_seq_region_id,
1339 asm.asm_seq_region_id,
1343 assembly
asm, seq_region asm_sr, seq_region cmp_sr
1345 asm.cmp_seq_region_id = cmp_sr.seq_region_id AND
1346 asm.asm_seq_region_id = asm_sr.seq_region_id AND
1347 cmp_sr.coord_system_id = ? AND
1348 asm_sr.coord_system_id = ?
1351 my $sth = $self->prepare($q);
1353 $sth->bind_param(1,$cmp_cs_id,SQL_INTEGER);
1354 $sth->bind_param(2,$asm_cs_id,SQL_INTEGER);
1357 # load the mapper with the assembly information
1359 my ($cmp_start, $cmp_end, $cmp_seq_region_id, $cmp_seq_region, $cmp_length,
1361 $asm_start, $asm_end, $asm_seq_region_id, $asm_seq_region, $asm_length);
1363 $sth->bind_columns(\$cmp_start, \$cmp_end, \$cmp_seq_region_id,
1364 \$cmp_seq_region, \$cmp_length, \$ori,
1365 \$asm_start, \$asm_end, \$asm_seq_region_id,
1366 \$asm_seq_region, \$asm_length);
1370 while($sth->fetch()) {
1371 $mapper->register_component($cmp_seq_region_id);
1372 $mapper->mapper->add_map_coordinates(
1373 $asm_seq_region_id, $asm_start, $asm_end,
1375 $cmp_seq_region_id, $cmp_start, $cmp_end);
1377 my $arr = [$cmp_seq_region_id, $cmp_seq_region, $cmp_cs_id, $cmp_length];
1379 $self->{
'sr_name_cache'}->{
"$cmp_seq_region:$cmp_cs_id"} = $arr;
1380 $self->{
'sr_id_cache'}->{
"$cmp_seq_region_id"} = $arr;
1382 # only register each asm seq_region once since it requires some work
1383 if(!$asm_registered{$asm_seq_region_id}) {
1384 $asm_registered{$asm_seq_region_id} = 1;
1386 # register all chunks from start of seq region to end
1387 my $end_chunk = $asm_length >> $CHUNKFACTOR;
1388 for(my $i = 0; $i <= $end_chunk; $i++) {
1389 $mapper->register_assembled($asm_seq_region_id, $i);
1392 $arr = [$asm_seq_region_id, $asm_seq_region, $asm_cs_id, $asm_length];
1394 $self->{
'sr_name_cache'}->{
"$asm_seq_region:$asm_cs_id"} = $arr;
1395 $self->{
'sr_id_cache'}->{
"$asm_seq_region_id"} = $arr;
1406 =head2 register_all_chained
1409 Example : $mapper = $asm_mapper_adaptor->fetch_by_CoordSystems($cs1,$cs2);
1411 # make the cache large enough to hold all of the mappings
1413 # load all of the mapping data
1414 $asm_mapper_adaptor->register_all_chained($mapper);
1416 # perform mappings as normal
1417 $mapper->map($slice->seq_region_name(), $sr_start, $sr_end,
1420 Description: This
function registers the entire set of mappings between
1421 two coordinate systems in a chained mapper. This will use a lot
1422 of memory but will be much more efficient when doing a lot of
1423 mapping which is spread over the entire genome.
1425 Exceptions :
throw if mapper is between coord systems with unexpected
1427 Caller : specialised programs doing a lot of genome-wide mapping
1432 sub register_all_chained {
1434 my $casm_mapper = shift;
1436 my $first_cs = $casm_mapper->first_CoordSystem();
1437 my $mid_cs = $casm_mapper->middle_CoordSystem();
1438 my $last_cs = $casm_mapper->last_CoordSystem();
1440 my $start_mid_mapper = $casm_mapper->first_middle_mapper();
1441 my $end_mid_mapper = $casm_mapper->last_middle_mapper();
1442 my $combined_mapper = $casm_mapper->first_last_mapper();
1446 my $sth = $self->prepare(
1450 asm.cmp_seq_region_id,
1456 asm.asm_seq_region_id,
1460 assembly asm, seq_region sr_asm, seq_region sr_cmp
1462 sr_asm.seq_region_id = asm.asm_seq_region_id AND
1463 sr_cmp.seq_region_id = asm.cmp_seq_region_id AND
1464 sr_asm.coord_system_id = ? AND
1465 sr_cmp.coord_system_id = ?');
1467 my $csa = $self->db()->get_CoordSystemAdaptor();
1471 if ( !defined $mid_cs ) {
1472 @path = @{ $csa->get_mapping_path( $first_cs, $last_cs ) };
1473 if ( !defined( $path[1] ) ) {
1474 splice( @path, 1, 1 );
1477 @path = @{ $csa->get_mapping_path( $first_cs, $mid_cs ) };
1478 # fix for when we have something like supercontig#contig#chromosome
1479 if ( !defined( $path[1] ) ) {
1480 splice( @path, 1, 1 );
1486 join(
',',
map( { $_->name .
' ' . $_->version } @path ) );
1487 my $len = scalar(@path) - 1;
1488 throw(
"Unexpected mapping path between start and intermediate "
1490 . $first_cs->name .
" "
1491 . $first_cs->version .
" and "
1492 . $mid_cs->name .
" "
1493 . $mid_cs->version .
")."
1494 .
"\nExpected path length 1, got $len. "
1498 my ($asm_cs,$cmp_cs) = @path;
1500 $sth->{mysql_use_result} = 1;
1501 $sth->bind_param(1,$asm_cs->dbID,SQL_INTEGER);
1502 $sth->bind_param(2,$cmp_cs->dbID,SQL_INTEGER);
1506 my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
1507 $ori, $start_start, $start_end, $start_seq_region_id, $start_seq_region,
1510 if($asm_cs->equals($first_cs)) {
1511 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
1512 \$mid_seq_region, \$mid_length, \$ori, \$start_start,
1513 \$start_end, \$start_seq_region_id, \$start_seq_region,
1516 $sth->bind_columns(\$start_start, \$start_end, \$start_seq_region_id,
1517 \$start_seq_region, \$start_length, \$ori,
1518 \$mid_start, \$mid_end, \$mid_seq_region_id,
1519 \$mid_seq_region, \$mid_length);
1523 my ( $mid_cs_id, $start_cs_id, $registry, $mapper );
1524 if ( !defined $mid_cs ) {
1525 $mid_cs_id = $last_cs->dbID();
1526 $start_cs_id = $first_cs->dbID();
1527 $mapper = $combined_mapper;
1529 $mid_cs_id = $mid_cs->dbID();
1530 $start_cs_id = $first_cs->dbID();
1531 $mapper = $start_mid_mapper;
1534 $registry = $casm_mapper->first_registry();
1536 while($sth->fetch()) {
1537 $mapper->add_map_coordinates
1539 $start_seq_region_id, $start_start, $start_end, $ori,
1540 $mid_seq_region_id, $mid_start, $mid_end
1542 push( @ranges, [$start_seq_region_id, $start_start, $start_end ] );
1544 $registry->check_and_register( $start_seq_region_id, 1, $start_length );
1545 if( ! defined $mid_cs ) {
1546 $casm_mapper->last_registry()->check_and_register
1547 ( $mid_seq_region_id, $mid_start, $mid_end );
1550 my $arr = [ $mid_seq_region_id, $mid_seq_region,
1551 $mid_cs_id, $mid_length ];
1553 $self->{
'sr_name_cache'}->{
"$mid_seq_region:$mid_cs_id"} = $arr;
1554 $self->{
'sr_id_cache'}->{
"$mid_seq_region_id"} = $arr;
1556 $arr = [ $start_seq_region_id, $start_seq_region,
1557 $start_cs_id, $start_length ];
1559 $self->{
'sr_name_cache'}->{
"$start_seq_region:$start_cs_id"} = $arr;
1560 $self->{
'sr_id_cache'}->{
"$start_seq_region_id"} = $arr;
1563 if( ! defined $mid_cs ) {
1564 # thats it for the simple case
1569 @path = @{ $csa->get_mapping_path( $last_cs, $mid_cs ) };
1570 if ( defined($mid_cs) ) {
1571 if ( !defined( $path[1] ) ) {
1572 splice( @path, 1, 1 );
1578 join(
',',
map( { $_->name .
' ' . $_->version } @path ) );
1579 my $len = scalar(@path) - 1;
1580 throw(
"Unexpected mapping path between intermediate and last "
1582 . $last_cs->name .
" "
1583 . $last_cs->version .
" and "
1584 . $mid_cs->name .
" "
1585 . $mid_cs->version .
")."
1586 .
"\nExpected path length 1, got $len. "
1590 ($asm_cs,$cmp_cs) = @path;
1592 $sth->bind_param(1,$asm_cs->dbID,SQL_INTEGER);
1593 $sth->bind_param(2,$cmp_cs->dbID,SQL_INTEGER);
1597 my ($end_start, $end_end, $end_seq_region_id, $end_seq_region,
1600 if($asm_cs->equals($mid_cs)) {
1601 $sth->bind_columns(\$end_start, \$end_end, \$end_seq_region_id,
1602 \$end_seq_region, \$end_length, \$ori,
1603 \$mid_start, \$mid_end, \$mid_seq_region_id,
1604 \$mid_seq_region, \$mid_length);
1606 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
1607 \$mid_seq_region, \$mid_length, \$ori, \$end_start,
1608 \$end_end, \$end_seq_region_id, \$end_seq_region,
1612 my $end_cs_id = $last_cs->dbID();
1613 $registry = $casm_mapper->last_registry();
1615 while($sth->fetch()) {
1616 $end_mid_mapper->add_map_coordinates
1618 $end_seq_region_id, $end_start, $end_end, $ori,
1619 $mid_seq_region_id, $mid_start, $mid_end
1622 $registry->check_and_register( $end_seq_region_id, 1, $end_length );
1624 my $arr = [ $end_seq_region_id, $end_seq_region,
1625 $end_cs_id, $end_length ];
1626 $self->{
'sr_name_cache'}->{
"$end_seq_region:$end_cs_id"} = $arr;
1627 $self->{
'sr_id_cache'}->{
"$end_seq_region_id"} = $arr;
1630 _build_combined_mapper( \@ranges, $start_mid_mapper, $end_mid_mapper,
1631 $combined_mapper,
"first" );
1638 # after both halves of a chained mapper are loaded
1639 # this function maps all ranges in $ranges and loads the
1640 # results into the combined mapper
1641 sub _build_combined_mapper {
1643 my $start_mid_mapper = shift;
1644 my $end_mid_mapper = shift;
1645 my $combined_mapper = shift;
1646 my $start_name = shift;
1648 my $mid_name =
"middle";
1650 foreach my $range (@$ranges) {
1651 my ( $seq_region_id, $start, $end) = @$range;
1655 my @initial_coords = $start_mid_mapper->map_coordinates($seq_region_id,
1659 foreach my $icoord (@initial_coords) {
1661 if($icoord->isa(
'Bio::EnsEMBL::Mapper::Gap')) {
1662 $sum += $icoord->length();
1667 #feed the results of the first mapping into the second mapper
1669 $end_mid_mapper->map_coordinates($icoord->id, $icoord->start,
1671 $icoord->strand, $mid_name);
1674 foreach my $fcoord (@final_coords) {
1675 #load up the final mapper
1677 if($fcoord->isa(
'Bio::EnsEMBL::Mapper::Coordinate')) {
1678 my $total_start = $start + $sum;
1679 my $total_end = $total_start + $fcoord->length - 1;
1680 my $ori = $fcoord->strand();
1682 if($start_name eq
'first') { # add coords in consistant order
1683 $combined_mapper->add_map_coordinates(
1684 $seq_region_id, $total_start, $total_end, $ori,
1685 $fcoord->id(), $fcoord->start(), $fcoord->end());
1687 $combined_mapper->add_map_coordinates(
1688 $fcoord->id(), $fcoord->start(), $fcoord->end(),$ori,
1689 $seq_region_id, $total_start, $total_end);
1693 $sum += $fcoord->length();
1701 =head2 seq_regions_to_ids
1704 Arg [2] : listref of strings $seq_regions
1705 Example : my @ids = @{$asma->seq_regions_to_ids($coord_sys, \@seq_regs)};
1706 Description: Converts a list of seq_region names to
internal identifiers
1707 using the
internal cache that has accumulated
while registering
1708 regions
for AssemblyMappers. If any requested regions are
1709 not found in the cache an attempt is made to retrieve them
1711 Returntype : listref of ints
1712 Exceptions :
throw if a non-existant seqregion is provided
1718 sub seq_regions_to_ids {
1720 my $coord_system = shift;
1721 my $seq_regions = shift;
1723 my $cs_id = $coord_system->
dbID();
1727 foreach my $sr (@$seq_regions) {
1728 my $arr = $self->{
'sr_name_cache'}->{
"$sr:$cs_id"};
1730 push( @out, $arr->[0] );
1732 push @out, $self->_seq_region_name_to_id($sr,$cs_id);
1740 =head2 seq_ids_to_regions
1742 Arg [1] : listref of seq_region ids
1743 Example : my @ids = @{$asma->ids_to_seq_regions(\@seq_ids)};
1744 Description: Converts a list of seq_region ids to seq region names
1745 using the
internal cache that has accumulated
while registering
1746 regions
for AssemblyMappers. If any requested regions are
1747 not found in the cache an attempt is made to retrieve them
1749 Returntype : listref of strings
1750 Exceptions :
throw if a non-existant seq_region_id is provided
1756 sub seq_ids_to_regions {
1758 my $seq_region_ids = shift;
1762 foreach my $sr (@$seq_region_ids) {
1763 my $arr = $self->{
'sr_id_cache'}->{
"$sr"};
1765 push( @out, $arr->[1] );
1767 push @out, $self->_seq_region_id_to_name($sr);
1776 Description: Delete all the caches
for the mappings/seq_regions
1787 %{$self->{
'sr_name_cache'}} = ();
1788 %{$self->{
'sr_id_cache'}} = ();
1790 foreach my $key (keys %{$self->{
'_asm_mapper_cache'}}){
1791 $self->{
'_asm_mapper_cache'}->{$key}->flush();
1793 %{$self->{
'_asm_mapper_cache'}} = ();