3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
40 -host =>
'ensembldb.ensembl.org',
50 my $chr33_cs = $csa->fetch_by_name(
'chromosome',
'NCBI33' );
51 my $chr34_cs = $csa->fetch_by_name(
'chromosome',
'NCBI34' );
52 my $ctg_cs = $csa->fetch_by_name(
'contig');
53 my $clone_cs = $csa->fetch_by_name(
'clone');
56 $asma->fetch_by_CoordSystems( $chr33_cs, $ctg_cs );
58 my $ncbi33_ncbi34_mapper =
59 $asm_adptr->fetch_by_CoordSystems( $chr33, $chr34 );
61 my $ctg_clone_mapper =
62 $asm_adptr->fetch_by_CoordSystems( $ctg_cs, $clone_cs );
67 Adaptor
for handling Assembly mappers. This is a I<Singleton>
class.
68 ie: There is only one per database (C<DBAdaptor>).
70 This is used to retrieve mappers between any two coordinate systems
71 whose makeup is described by the assembly table. Currently one step
72 (
explicit) and two step (implicit) pairwise mapping is supported. In
73 one-step mapping an explicit relationship between the coordinate systems
74 is defined in the assembly table. In two-step 'chained' mapping no
75 explicit mapping is present but the coordinate systems must share a
76 common mapping to an intermediate coordinate system.
92 use
Bio::
EnsEMBL::Utils::Cache;
#CPAN LRU cache
96 use integer; #
do proper arithmetic bitshifts
101 my $CHUNKFACTOR = 20; # 2^20 = approx. 10^6
105 Arg [1] : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor
for
106 the database
this assembly mapper is
using.
107 Example : my $asma =
new Bio::EnsEMBL::AssemblyMapperAdaptor($dbadaptor);
117 my($class, $dbadaptor) = @_;
119 my $self = $class->SUPER::new($dbadaptor);
121 $self->{
'_asm_mapper_cache'} = {};
123 # use a shared cache (for this database) that contains info about
126 $self->{
'sr_name_cache'} = $seq_region_cache->{
'name_cache'};
127 $self->{
'sr_id_cache'} = $seq_region_cache->{
'id_cache'};
134 =head2 cache_seq_ids_with_mult_assemblys
136 Example : $self->adaptor->cache_seq_ids_with_mult_assemblys();
137 Description: Creates a hash of the component seq region ids that
138 map to more than one assembly from the assembly table.
146 sub cache_seq_ids_with_mult_assemblys{
150 return if (defined($self->{
'multi_seq_ids'}));
152 $self->{
'multi_seq_ids'} = {};
155 SELECT sra.seq_region_id
156 FROM seq_region_attrib sra,
160 WHERE sra.attrib_type_id = at.attrib_type_id
161 AND code =
"MultAssem"
162 AND sra.seq_region_id = sr.seq_region_id
163 AND sr.coord_system_id = cs.coord_system_id
164 AND cs.species_id = ?);
166 my $sth = $self->prepare($sql);
168 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
174 $sth->bind_columns(\$seq_region_id);
176 while($sth->fetch()) {
177 $self->{
'multi_seq_ids'}->{$seq_region_id} = 1;
183 =head2 fetch_by_CoordSystems
186 One of the coordinate systems to retrieve the mapper
189 The other coordinate system to
map between
190 Description: Retrieves an Assembly mapper
for two coordinate
191 systems whose relationship is described in the
194 The ordering of the coodinate systems is arbitrary.
195 The following two statements are equivalent:
196 $mapper = $asma->fetch_by_CoordSystems($cs1,$cs2);
197 $mapper = $asma->fetch_by_CoordSystems($cs2,$cs1);
199 Exceptions : wrong argument types
205 sub fetch_by_CoordSystems {
210 if(!ref($cs1) || !$cs1->isa(
'Bio::EnsEMBL::CoordSystem')) {
211 throw(
"cs1 argument must be a Bio::EnsEMBL::CoordSystem.");
213 if(!ref($cs2) || !$cs2->isa(
'Bio::EnsEMBL::CoordSystem')) {
214 throw(
"cs2 argument must be a Bio::EnsEMBL::CoordSystem.");
217 # if($cs1->equals($cs2)) {
218 # throw("Cannot create mapper between same coord systems: " .
219 # $cs1->name . " " . $cs1->version);
222 if($cs1->is_top_level()) {
225 if($cs2->is_top_level()) {
229 my $csa = $self->db->get_CoordSystemAdaptor();
231 #retrieve the shortest possible mapping path between these systems
232 my @mapping_path = @{$csa->get_mapping_path($cs1,$cs2)};
239 my $key = join(
':',
map({defined($_)?$_->dbID():
"-"} @mapping_path));
241 my $asm_mapper = $self->{
'_asm_mapper_cache'}->{$key};
243 return $asm_mapper
if($asm_mapper);
245 if(@mapping_path == 1) {
246 throw(
"Incorrect mapping path defined in meta table. " .
247 "0 step mapping encountered between:\n" .
248 $cs1->name() .
" " . $cs1->version() .
" and " . $cs2->name .
" " .
252 if(@mapping_path == 2) {
253 #1 step regular mapping
256 # If you want multiple pieces on two seqRegions to map to each other
257 # you need to make an assembly.mapping entry that is seperated with a #
260 $self->{
'_asm_mapper_cache'}->{$key} = $asm_mapper;
264 if(@mapping_path == 3) {
265 #two step chained mapping
267 #in multi-step mapping it is possible get requests with the
268 #coordinate system ordering reversed since both mappings directions
269 #cache on both orderings just in case
270 #e.g. chr <-> contig <-> clone and clone <-> contig <-> chr
272 $self->{
'_asm_mapper_cache'}->{$key} = $asm_mapper;
273 $key = join(
':',
map({defined($_)?$_->dbID():
"-"} reverse(@mapping_path)));
274 $self->{
'_asm_mapper_cache'}->{$key} = $asm_mapper;
278 throw(
"Only 1 and 2 step coordinate system mapping is currently\n" .
279 "supported. Mapping between " .
280 $cs1->name() .
" " . $cs1->version() .
" and " .
281 $cs2->name() .
" " . $cs2->version() .
282 " requires ". (scalar(@mapping_path)-1) .
" steps.");
287 =head2 register_assembled
291 Arg [2] : integer $asm_seq_region
292 The dbID of the seq_region to be registered
293 Arg [3] :
int $asm_start
294 The start of the region to be registered
295 Arg [4] :
int $asm_end
296 The end of the region to be registered
298 This extracts the relevant data from the assembly
299 table and stores it in Mapper
internal to the $asm_mapper.
300 It therefore must be called before any mapping is
301 attempted on that region. Otherwise only gaps will
303 calls
this method when the need arises.
305 Exceptions :
throw if the seq_region to be registered does not exist
306 or
if it associated with multiple assembled pieces (bad data
314 sub register_assembled {
316 my $asm_mapper = shift;
317 my $asm_seq_region = shift;
318 my $asm_start = shift;
321 if(!ref($asm_mapper) || !$asm_mapper->isa(
'Bio::EnsEMBL::AssemblyMapper')) {
322 throw(
"Bio::EnsEMBL::AssemblyMapper argument expected");
325 throw(
"asm_seq_region argument expected")
if(!defined($asm_seq_region));
326 throw(
"asm_start argument expected")
if(!defined($asm_start));
327 throw(
"asm_end argument expected")
if(!defined($asm_end));
330 my $cmp_cs_id = $asm_mapper->component_CoordSystem->dbID();
332 #split up the region to be registered into fixed chunks
333 #this allows us to keep track of regions that have already been
334 #registered and also works under the assumption that if a small region
335 #is requested it is likely that other requests will be made in the
336 #vicinity (the minimum size registered the chunksize (2^chunkfactor)
339 #determine span of chunks
340 #bitwise shift right is fast and easy integer division
342 my($start_chunk, $end_chunk);
344 $start_chunk = $asm_start >> $CHUNKFACTOR;
345 $end_chunk = $asm_end >> $CHUNKFACTOR;
347 # inserts have start = end + 1, on boundary condition start_chunk
348 # could be less than end chunk
349 if($asm_start == $asm_end + 1) {
350 ($start_chunk, $end_chunk) = ($end_chunk, $start_chunk);
353 #find regions of continuous unregistered chunks
355 my ($begin_chunk_region,$end_chunk_region);
356 for ($i = $start_chunk; $i <= $end_chunk; $i++) {
357 if($asm_mapper->have_registered_assembled($asm_seq_region, $i)) {
358 if(defined($begin_chunk_region)) {
359 #this is the end of an unregistered region.
360 my $region = [($begin_chunk_region << $CHUNKFACTOR),
361 (($end_chunk_region+1) << $CHUNKFACTOR)-1];
362 push @chunk_regions, $region;
363 $begin_chunk_region = $end_chunk_region = undef;
366 $begin_chunk_region = $i
if(!defined($begin_chunk_region));
367 $end_chunk_region = $i+1;
368 $asm_mapper->register_assembled($asm_seq_region,$i);
372 #the last part may have been an unregistered region too
373 if(defined($begin_chunk_region)) {
374 my $region = [($begin_chunk_region << $CHUNKFACTOR),
375 (($end_chunk_region+1) << $CHUNKFACTOR) -1];
376 push @chunk_regions, $region;
379 return if(!@chunk_regions);
381 # keep the Mapper to a reasonable size
382 if( $asm_mapper->size() > $asm_mapper->max_pair_count() ) {
383 $asm_mapper->flush();
384 #we now have to go and register the entire requested region since we
385 #just flushed everything
387 @chunk_regions = ( [ ( $start_chunk << $CHUNKFACTOR)
388 , (($end_chunk+1) << $CHUNKFACTOR)-1 ] );
390 for( my $i = $start_chunk; $i <= $end_chunk; $i++ ) {
391 $asm_mapper->register_assembled( $asm_seq_region, $i );
395 # my $asm_seq_region_id =
396 # $self->_seq_region_name_to_id($asm_seq_region,$asm_cs_id);
398 # Retrieve the description of how the assembled region is made from
399 # component regions for each of the continuous blocks of unregistered,
406 asm.cmp_seq_region_id,
413 assembly
asm, seq_region sr
415 asm.asm_seq_region_id = ? AND
417 ? >=
asm.asm_start AND
418 asm.cmp_seq_region_id = sr.seq_region_id AND
419 sr.coord_system_id = ?
422 my $sth = $self->prepare($q);
424 foreach my $region (@chunk_regions) {
425 my($region_start, $region_end) = @$region;
426 $sth->bind_param(1,$asm_seq_region,SQL_INTEGER);
427 $sth->bind_param(2,$region_start,SQL_INTEGER);
428 $sth->bind_param(3,$region_end,SQL_INTEGER);
429 $sth->bind_param(4,$cmp_cs_id,SQL_INTEGER);
433 my($cmp_start, $cmp_end, $cmp_seq_region_id, $cmp_seq_region, $ori,
434 $asm_start, $asm_end, $cmp_seq_region_length);
436 $sth->bind_columns(\$cmp_start, \$cmp_end, \$cmp_seq_region_id,
437 \$cmp_seq_region, \$cmp_seq_region_length, \$ori,
438 \$asm_start, \$asm_end);
441 # Load the unregistered regions of the mapper
443 while($sth->fetch()) {
444 next
if($asm_mapper->have_registered_component($cmp_seq_region_id)
445 and !defined($self->{
'multi_seq_ids'}->{$cmp_seq_region_id}));
446 $asm_mapper->register_component($cmp_seq_region_id);
447 $asm_mapper->mapper->add_map_coordinates(
448 $asm_seq_region, $asm_start, $asm_end,
450 $cmp_seq_region_id, $cmp_start, $cmp_end);
452 my $arr = [ $cmp_seq_region_id, $cmp_seq_region,
453 $cmp_cs_id, $cmp_seq_region_length ];
455 $self->{
'sr_name_cache'}->{
"$cmp_seq_region:$cmp_cs_id"} = $arr;
456 $self->{
'sr_id_cache'}->{
"$cmp_seq_region_id"} = $arr;
465 sub _seq_region_name_to_id {
470 if(!defined($sr_name) or
472 throw(
'seq_region_name and coord_system_id args are required');
475 my $arr = $self->{
'sr_name_cache'}->{
"$sr_name:$cs_id"};
480 # Get the seq_region_id via the name. This would be quicker if we just
481 # used internal ids instead but stored but then we lose the ability
482 # the transform accross databases with different internal ids
484 my $sth = $self->prepare(
"SELECT seq_region_id, length " .
486 "WHERE name = ? AND coord_system_id = ?");
488 $sth->bind_param(1,$sr_name,SQL_VARCHAR);
489 $sth->bind_param(2,$cs_id,SQL_INTEGER);
492 my @row = $sth->fetchrow_array();
494 throw(
"No-existent seq_region [$sr_name] in coord system $cs_id");
496 my @more = $sth->fetchrow_array();
498 throw(
"Ambiguous seq_region [$sr_name] in coord system $cs_id");
501 my ($sr_id, $sr_length) = @row;
504 $arr = [ $sr_id, $sr_name, $cs_id, $sr_length ];
506 $self->{
'sr_name_cache'}->{
"$sr_name:$cs_id"} = $arr;
507 $self->{
'sr_id_cache'}->{
"$sr_id"} = $arr;
512 sub _seq_region_id_to_name {
516 ($sr_id) ||
throw(
'seq_region_is required');
518 my $arr = $self->{
'sr_id_cache'}->{
"$sr_id"};
523 # Get the seq_region name via the id. This would be quicker if we just
524 # used internal ids instead but stored but then we lose the ability
525 # the transform accross databases with different internal ids
527 my $sth = $self->prepare(
"SELECT name, length ,coord_system_id " .
529 "WHERE seq_region_id = ? ");
531 $sth->bind_param(1,$sr_id,SQL_INTEGER);
534 my @row = $sth->fetchrow_array();
535 if(!$sth->rows() == 1) {
536 throw(
"non-existant seq_region [$sr_id]");
539 my ($sr_name, $sr_length, $cs_id) = @row;
542 $arr = [ $sr_id, $sr_name, $cs_id, $sr_length ];
544 $self->{
'sr_name_cache'}->{
"$sr_name:$cs_id"} = $arr;
545 $self->{
'sr_id_cache'}->{
"$sr_id"} = $arr;
551 =head2 register_component
555 Arg [2] : integer $cmp_seq_region
556 The dbID of the seq_region to be registered
558 This extracts the relevant data from the assembly
559 table and stores it in Mapper
internal to the $asm_mapper.
560 It therefore must be called before any mapping is
561 attempted on that region. Otherwise only gaps will
563 calls
this method when the need arises.
565 Exceptions :
throw if the seq_region to be registered does not exist
566 or
if it associated with multiple assembled pieces (bad data
573 sub register_component {
575 my $asm_mapper = shift;
576 my $cmp_seq_region = shift;
578 if(!ref($asm_mapper) || !$asm_mapper->isa(
'Bio::EnsEMBL::AssemblyMapper')) {
579 throw(
"Bio::EnsEMBL::AssemblyMapper argument expected");
582 if(!defined($cmp_seq_region)) {
583 throw(
"cmp_seq_region argument expected");
587 my $asm_cs_id = $asm_mapper->assembled_CoordSystem()->dbID();
589 #do nothing if this region is already registered or special case
590 return if($asm_mapper->have_registered_component($cmp_seq_region)
591 and !defined($self->{
'multi_seq_ids'}->{$cmp_seq_region}));
593 # my $cmp_seq_region_id =
594 # $self->_seq_region_name_to_id($cmp_seq_region, $cmp_cs_id);
596 # Determine what part of the assembled region this component region makes up
602 asm.asm_seq_region_id,
606 assembly
asm, seq_region sr
608 asm.cmp_seq_region_id = ? AND
609 asm.asm_seq_region_id = sr.seq_region_id AND
610 sr.coord_system_id = ?
613 my $sth = $self->prepare($q);
614 $sth->bind_param(1,$cmp_seq_region,SQL_INTEGER);
615 $sth->bind_param(2,$asm_cs_id,SQL_INTEGER);
618 my @rows = $sth->fetchrow_array();
620 if($sth->rows() == 0) {
621 #this component is not used in the assembled part i.e. gap
622 $asm_mapper->register_component($cmp_seq_region);
627 #we do not currently support components mapping to multiple assembled
628 # make sure that you've got the correct mapping in the meta-table :
629 # chromosome:EquCab2#contig ( use'#' for multiple mappings )
630 # chromosome:EquCab2|contig ( use '|' delimiter for 1-1 mappings )
632 my @more = $sth->fetchrow_array();
633 if($sth->rows() != 1) {
635 throw(
"Multiple assembled regions for single " .
636 "component region cmp_seq_region_id=[$cmp_seq_region]\n".
637 "Remember that multiple mappings use the \#-operaator".
638 " in the meta-table (i.e. chromosome:EquCab2\#contig\n");
641 my ($asm_start, $asm_end, $asm_seq_region_id,
642 $asm_seq_region, $asm_seq_region_length) = @rows;
644 my $arr = [ $asm_seq_region_id, $asm_seq_region,
645 $asm_cs_id, $asm_seq_region_length ];
647 $self->{
'sr_name_cache'}->{
"$asm_seq_region:$asm_cs_id"} = $arr;
648 $self->{
'sr_id_cache'}->{
"$asm_seq_region_id"} = $arr;
652 # Register the corresponding assembled region. This allows a us to
653 # register things in assembled chunks which allows us to:
654 # (1) Keep track of what assembled regions are registered
655 # (2) Use locality of reference (if they want something in same general
656 # region it will already be registered).
658 $self->register_assembled($asm_mapper,$asm_seq_region_id,$asm_start,$asm_end);
663 =head2 register_chained
666 The chained assembly mapper to
register regions on
667 Arg [2] :
string $from (
'first' or
'last')
668 The direction we are registering from, and the name of the
670 Arg [3] :
string $seq_region_name
671 The name of the seqregion we are registering on
672 Arg [4] : listref $ranges
673 A list of ranges to
register (in [$start,$end] tuples).
674 Arg [5] : (optional) $to_slice
675 Only
register those on
this Slice.
676 Description: Registers a set of ranges on a chained assembly mapper.
677 This
function is at the heart of the chained mapping process.
678 It retrieves information from the assembly table and
679 dynamically constructs the mappings between two coordinate
680 systems which are 2 mapping steps apart. It does
this by
using
681 two
internal mappers to load up a third mapper which is
682 actually used by the ChainedAssemblyMapper to perform the
685 This method must be called before any mapping is
686 attempted on regions of interest, otherwise only gaps will
687 be returned. Note that the ChainedAssemblyMapper automatically
688 calls
this method when the need arises.
690 Exceptions :
throw if the seq_region to be registered does not exist
691 or
if it associated with multiple assembled pieces (bad data
694 throw if the mapping between the coordinate systems cannot
695 be performed in two steps, which means there is an
internal
696 error in the data in the meta table or in the code that creates
703 sub register_chained {
705 my $casm_mapper = shift;
707 my $seq_region_id = shift;
709 my $to_slice = shift;
711 my $to_seq_region_id;
712 if(defined($to_slice)){
713 if($casm_mapper->first_CoordSystem()->equals($casm_mapper->last_CoordSystem())){
714 return $self->_register_chained_special($casm_mapper, $from, $seq_region_id, $ranges, $to_slice);
716 $to_seq_region_id = $to_slice->get_seq_region_id();
717 if(!defined($to_seq_region_id)){
718 die
"Could not get seq_region_id for to_slice".$to_slice->seq_region_name.
"\n";
722 my ($start_name, $start_mid_mapper, $start_cs, $start_registry,
723 $end_name, $end_mid_mapper, $end_cs, $end_registry);
725 if($from eq
'first') {
726 $start_name =
'first';
727 $start_mid_mapper = $casm_mapper->first_middle_mapper();
728 $start_cs = $casm_mapper->first_CoordSystem();
729 $start_registry = $casm_mapper->first_registry();
730 $end_mid_mapper = $casm_mapper->last_middle_mapper();
731 $end_cs = $casm_mapper->last_CoordSystem();
732 $end_registry = $casm_mapper->last_registry();
734 } elsif($from eq
'last') {
735 $start_name =
'last';
736 $start_mid_mapper = $casm_mapper->last_middle_mapper();
737 $start_cs = $casm_mapper->last_CoordSystem();
738 $start_registry = $casm_mapper->last_registry();
739 $end_mid_mapper = $casm_mapper->first_middle_mapper();
740 $end_cs = $casm_mapper->first_CoordSystem();
741 $end_registry = $casm_mapper->first_registry();
744 throw(
"Invalid from argument: [$from], must be 'first' or 'last'");
747 my $combined_mapper = $casm_mapper->first_last_mapper();
748 my $mid_cs = $casm_mapper->middle_CoordSystem();
749 my $mid_name =
'middle';
750 my $csa = $self->db->get_CoordSystemAdaptor();
752 # Check for the simple case where the ChainedMapper is short
753 if( ! defined $mid_cs ) {
754 $start_mid_mapper = $combined_mapper;
759 # obtain the first half of the mappings and load them into the start mapper
762 #ascertain which is component and which is actually assembled coord system
765 # check for the simple case, where the ChainedMapper is short
766 if( defined $mid_cs ) {
767 @path = @{$csa->get_mapping_path($start_cs, $mid_cs)};
769 @path = @{$csa->get_mapping_path( $start_cs, $end_cs )};
772 if(@path != 2 && defined( $path[1] )) {
773 my $path = join(
',',
map({$_->name .
' '. $_->version} @path));
774 my $len = scalar(@path) - 1;
775 throw(
"Unexpected mapping path between start and intermediate " .
776 "coord systems (". $start_cs->name .
" " . $start_cs->version .
777 " and " . $mid_cs->name .
" " . $mid_cs->version .
")." .
778 "\nExpected path length 1, got $len. " .
783 my ($asm_cs,$cmp_cs);
787 #the SQL varies depending on whether we are coming from assembled or
788 #component coordinate system
790 my $asm2cmp = (<<ASMCMP);
794 asm.cmp_seq_region_id,
801 assembly
asm, seq_region sr
803 asm.asm_seq_region_id = ? AND
805 ? >=
asm.asm_start AND
806 asm.cmp_seq_region_id = sr.seq_region_id AND
807 sr.coord_system_id = ?
811 my $cmp2asm = (<<CMPASM);
815 asm.asm_seq_region_id,
822 assembly
asm, seq_region sr
824 asm.cmp_seq_region_id = ? AND
826 ? >=
asm.cmp_start AND
827 asm.asm_seq_region_id = sr.seq_region_id AND
828 sr.coord_system_id = ?
833 if(defined($to_slice)){
834 my $to_cs = $to_slice->coord_system;
835 if($asm_cs->equals($to_cs)){
836 $asm2cmp_sth = $self->prepare($asm2cmp);
837 $cmp2asm_sth = $self->prepare($cmp2asm.
" AND asm.asm_seq_region_id = $to_seq_region_id");
839 elsif($cmp_cs->equals($to_cs)){
840 $asm2cmp_sth = $self->prepare($asm2cmp.
" AND asm.cmp_seq_region_id = $to_seq_region_id");
841 $cmp2asm_sth = $self->prepare($cmp2asm);
844 $asm2cmp_sth = $self->prepare($asm2cmp);
845 $cmp2asm_sth = $self->prepare($cmp2asm);
849 $asm2cmp_sth = $self->prepare($asm2cmp);
850 $cmp2asm_sth = $self->prepare($cmp2asm);
855 $sth = ($asm_cs->equals($start_cs)) ? $asm2cmp_sth : $cmp2asm_sth;
859 # check for the simple case where the ChainedMapper is short
860 if( defined $mid_cs ) {
861 $mid_cs_id = $mid_cs->dbID();
863 $mid_cs_id = $end_cs->dbID();
869 #need to perform the query for each unregistered range
870 foreach my $range (@$ranges) {
871 my ($start, $end) = @$range;
872 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
873 $sth->bind_param(2,$start,SQL_INTEGER);
874 $sth->bind_param(3,$end,SQL_INTEGER);
875 $sth->bind_param(4,$mid_cs_id,SQL_INTEGER);
878 #load the start <-> mid mapper with the results and record the mid cs
879 #ranges we just added to the mapper
881 my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
882 $ori, $start_start, $start_end);
884 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
885 \$mid_seq_region, \$mid_length, \$ori, \$start_start,
888 while($sth->fetch()) {
890 if( defined $mid_cs ) {
891 $start_mid_mapper->add_map_coordinates
893 $seq_region_id,$start_start, $start_end, $ori,
894 $mid_seq_region_id, $mid_start, $mid_end
897 if( $from eq
"first" ) {
898 $combined_mapper->add_map_coordinates
900 $seq_region_id,$start_start, $start_end, $ori,
901 $mid_seq_region_id, $mid_start, $mid_end
904 $combined_mapper->add_map_coordinates
906 $mid_seq_region_id, $mid_start, $mid_end, $ori,
907 $seq_region_id,$start_start, $start_end
912 #update sr_name cache
913 my $arr = [ $mid_seq_region_id, $mid_seq_region,
914 $mid_cs_id, $mid_length ];
916 $self->{
'sr_name_cache'}->{
"$mid_seq_region:$mid_cs_id"} = $arr;
917 $self->{
'sr_id_cache'}->{
"$mid_seq_region_id"} = $arr;
919 push @mid_ranges,[$mid_seq_region_id,$mid_seq_region,
920 $mid_start,$mid_end];
921 push @start_ranges, [ $seq_region_id, $start_start, $start_end ];
923 #the region that we actually register may actually be larger or smaller
924 #than the region that we wanted to register.
925 #register the intersection of the region so we do not end up doing
928 if($start_start < $start || $start_end > $end) {
929 $start_registry->check_and_register($seq_region_id,$start_start,
936 # in the one step case, we load the mid ranges in the
937 # last_registry and we are done
938 if( ! defined $mid_cs ) {
939 for my $range ( @mid_ranges ) {
940 $end_registry->check_and_register( $range->[0], $range->[2],
944 # and thats it for the simple case ...
950 # now the second half of the mapping
951 # perform another query and load the mid <-> end mapper using the mid cs
955 #ascertain which is component and which is actually assembled coord system
956 @path = @{$csa->get_mapping_path($mid_cs, $end_cs)};
957 if(@path == 2 || ( @path == 3 && !defined $path[1])) {
962 my $path = join(
',',
map({$_->name .
' '. $_->version} @path));
963 my $len = scalar(@path)-1;
964 throw(
"Unexpected mapping path between intermediate and last" .
965 "coord systems (". $mid_cs->name .
" " . $mid_cs->version .
966 " and " . $end_cs->name .
" " . $end_cs->version .
")." .
967 "\nExpected path length 1, got $len. " .
971 if(defined($to_slice)){
972 my $to_cs = $to_slice->coord_system;
973 if($asm_cs->equals($to_cs)){
974 $asm2cmp_sth = $self->prepare($asm2cmp);
975 $cmp2asm_sth = $self->prepare($cmp2asm.
" AND asm.asm_seq_region_id = $to_seq_region_id");
977 elsif($cmp_cs->equals($to_cs)){
978 $asm2cmp_sth = $self->prepare($asm2cmp.
" AND asm.cmp_seq_region_id = $to_seq_region_id");
979 $cmp2asm_sth = $self->prepare($cmp2asm);
982 $asm2cmp_sth = $self->prepare($asm2cmp);
983 $cmp2asm_sth = $self->prepare($cmp2asm);
987 $sth = ($asm_cs->equals($mid_cs)) ? $asm2cmp_sth : $cmp2asm_sth;
989 my $end_cs_id = $end_cs->dbID();
990 foreach my $mid_range (@mid_ranges) {
991 my ($mid_seq_region_id, $mid_seq_region,$start, $end) = @$mid_range;
992 $sth->bind_param(1,$mid_seq_region_id,SQL_INTEGER);
993 $sth->bind_param(2,$start,SQL_INTEGER);
994 $sth->bind_param(3,$end,SQL_INTEGER);
995 $sth->bind_param(4,$end_cs_id,SQL_INTEGER);
998 #load the end <-> mid mapper with the results and record the mid cs
999 #ranges we just added to the mapper
1001 my ($end_start, $end_end, $end_seq_region_id, $end_seq_region, $end_length,
1002 $ori, $mid_start, $mid_end);
1004 $sth->bind_columns(\$end_start, \$end_end, \$end_seq_region_id,
1005 \$end_seq_region, \$end_length, \$ori, \$mid_start,
1008 while($sth->fetch()) {
1009 $end_mid_mapper->add_map_coordinates
1011 $end_seq_region_id, $end_start, $end_end, $ori,
1012 $mid_seq_region_id, $mid_start, $mid_end
1015 #update sr_name cache
1016 my $arr = [ $end_seq_region_id,$end_seq_region,$end_cs_id,$end_length ];
1018 $self->{
'sr_name_cache'}->{
"$end_seq_region:$end_cs_id"} = $arr;
1019 $self->{
'sr_id_cache'}->{
"$end_seq_region_id"} = $arr;
1021 #register this region on the end coord system
1022 $end_registry->check_and_register($end_seq_region_id, $end_start, $end_end);
1028 # Now that both halves are loaded
1029 # Do stepwise mapping using both of the loaded mappers to load
1030 # the final start <-> end mapper
1033 _build_combined_mapper(\@start_ranges, $start_mid_mapper, $end_mid_mapper,
1034 $combined_mapper, $start_name);
1040 =head2 _register_chained_special
1043 The chained assembly mapper to
register regions on
1044 Arg [2] :
string $from (
'first' or
'last')
1045 The direction we are registering from, and the name of the
1047 Arg [3] :
string $seq_region_name
1048 The name of the seqregion we are registering on
1049 Arg [4] : listref $ranges
1050 A list of ranges to
register (in [$start,$end] tuples).
1051 Arg [5] : (optional) $to_slice
1052 Only
register those on
this Slice.
1053 Description: Registers a set of ranges on a chained assembly mapper.
1054 This
function is at the heart of the chained mapping process.
1055 It retrieves information from the assembly table and
1056 dynamically constructs the mappings between two coordinate
1057 systems which are 2 mapping steps apart. It does
this by
using
1058 two
internal mappers to load up a third mapper which is
1059 actually used by the ChainedAssemblyMapper to perform the
1062 This method must be called before any mapping is
1063 attempted on regions of interest, otherwise only gaps will
1064 be returned. Note that the ChainedAssemblyMapper automatically
1065 calls
this method when the need arises.
1067 Exceptions :
throw if the seq_region to be registered does not exist
1068 or
if it associated with multiple assembled pieces (bad data
1071 throw if the mapping between the coordinate systems cannot
1072 be performed in two steps, which means there is an
internal
1073 error in the data in the meta table or in the code that creates
1080 sub _register_chained_special {
1082 my $casm_mapper = shift;
1084 my $seq_region_id = shift;
1086 my $to_slice = shift;
1089 my $sth = $self->prepare(
"SELECT
1092 asm.cmp_seq_region_id,
1099 assembly asm, seq_region sr
1101 asm.asm_seq_region_id = ? AND
1102 ? <= asm.asm_end AND
1103 ? >= asm.asm_start AND
1104 asm.cmp_seq_region_id = sr.seq_region_id AND
1105 sr.coord_system_id = ? AND
1106 asm.cmp_seq_region_id = ?");
1109 my ($start_name, $start_mid_mapper, $start_cs, $start_registry,
1110 $end_name, $end_mid_mapper, $end_cs, $end_registry);
1112 if($from eq
'first') {
1113 $start_name =
'first';
1114 $start_mid_mapper = $casm_mapper->first_middle_mapper();
1115 $start_cs = $casm_mapper->first_CoordSystem();
1116 $start_registry = $casm_mapper->first_registry();
1117 $end_mid_mapper = $casm_mapper->last_middle_mapper();
1118 $end_cs = $casm_mapper->last_CoordSystem();
1119 $end_registry = $casm_mapper->last_registry();
1121 } elsif($from eq
'last') {
1122 $start_name =
'last';
1123 $start_mid_mapper = $casm_mapper->last_middle_mapper();
1124 $start_cs = $casm_mapper->last_CoordSystem();
1125 $start_registry = $casm_mapper->last_registry();
1126 $end_mid_mapper = $casm_mapper->first_middle_mapper();
1127 $end_cs = $casm_mapper->first_CoordSystem();
1128 $end_registry = $casm_mapper->first_registry();
1129 $end_name =
'first';
1131 throw(
"Invalid from argument: [$from], must be 'first' or 'last'");
1134 my $combined_mapper = $casm_mapper->first_last_mapper();
1135 my $mid_cs = $casm_mapper->middle_CoordSystem();
1136 my $mid_name =
'middle';
1137 my $csa = $self->db->get_CoordSystemAdaptor();
1139 # Check for the simple case where the ChainedMapper is short
1140 if( ! defined $mid_cs ) {
1141 $start_mid_mapper = $combined_mapper;
1146 if( defined $mid_cs ) {
1147 @path = @{$csa->get_mapping_path($start_cs, $mid_cs)};
1149 @path = @{$csa->get_mapping_path( $start_cs, $end_cs )};
1151 if( ! defined $mid_cs ) {
1152 $start_mid_mapper = $combined_mapper;
1155 if(@path != 2 && defined( $path[1] )) {
1156 my $path = join(
',',
map({$_->name .
' '. $_->version} @path));
1157 my $len = scalar(@path) - 1;
1158 throw(
"Unexpected mapping path between start and intermediate " .
1159 "coord systems (". $start_cs->name .
" " . $start_cs->version .
1160 " and " . $mid_cs->name .
" " . $mid_cs->version .
")." .
1161 "\nExpected path length 1, got $len. " .
1165 my ($asm_cs,$cmp_cs);
1167 $cmp_cs = $path[-1];
1169 $combined_mapper = $casm_mapper->first_last_mapper();
1170 $mid_cs = $casm_mapper->middle_CoordSystem();
1171 $mid_name =
'middle';
1172 $csa = $self->db->get_CoordSystemAdaptor();
1176 # Check for the simple case where the ChainedMapper is short
1177 if ( !defined $mid_cs ) {
1178 $start_mid_mapper = $combined_mapper;
1180 $mid_cs_id = $mid_cs->dbID();
1186 my $to_cs = $to_slice->coord_system;
1187 foreach my $direction (1, 0){
1191 $id1 = $seq_region_id;
1192 $id2 = $to_slice->get_seq_region_id();
1195 $id2 = $seq_region_id;
1196 $id1 = $to_slice->get_seq_region_id();
1199 foreach my $range (@$ranges) {
1200 my ($start, $end) = @$range;
1201 $sth->bind_param(1,$id1,SQL_INTEGER);
1202 $sth->bind_param(2,$start,SQL_INTEGER);
1203 $sth->bind_param(3,$end,SQL_INTEGER);
1204 $sth->bind_param(4,$to_cs->dbID,SQL_INTEGER);
1205 $sth->bind_param(5,$id2,SQL_INTEGER);
1208 my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
1209 $ori, $start_start, $start_end);
1211 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
1212 \$mid_seq_region, \$mid_length, \$ori, \$start_start,
1215 while($sth->fetch()) {
1218 if( defined $mid_cs ) {
1219 $start_mid_mapper->add_map_coordinates
1221 $id1,$start_start, $start_end, $ori,
1222 $mid_seq_region_id, $mid_start, $mid_end
1225 if( $from eq
"first") {
1227 $combined_mapper->add_map_coordinates
1229 $id1,$start_start, $start_end, $ori,
1230 $mid_seq_region_id, $mid_start, $mid_end
1234 $combined_mapper->add_map_coordinates
1236 $mid_seq_region_id, $mid_start, $mid_end, $ori,
1237 $id1,$start_start, $start_end
1242 $combined_mapper->add_map_coordinates
1244 $mid_seq_region_id, $mid_start, $mid_end, $ori,
1245 $id1,$start_start, $start_end
1249 $combined_mapper->add_map_coordinates
1251 $id1,$start_start, $start_end, $ori,
1252 $mid_seq_region_id, $mid_start, $mid_end
1258 #update sr_name cache
1259 my $arr = [ $mid_seq_region_id, $mid_seq_region,
1260 $mid_cs_id, $mid_length ];
1262 $self->{
'sr_name_cache'}->{
"$mid_seq_region:$mid_cs_id"} = $arr;
1263 $self->{
'sr_id_cache'}->{
"$mid_seq_region_id"} = $arr;
1265 push @mid_ranges,[$mid_seq_region_id,$mid_seq_region,
1266 $mid_start,$mid_end];
1267 push @start_ranges, [ $id1, $start_start, $start_end ];
1269 #the region that we actually register may actually be larger or smaller
1270 #than the region that we wanted to register.
1271 #register the intersection of the region so we do not end up doing
1274 if($start_start < $start || $start_end > $end) {
1275 $start_registry->check_and_register($id1,$start_start,
1282 if( ! defined $mid_cs ) {
1283 for my $range ( @mid_ranges ) {
1284 $end_registry->check_and_register( $range->[0], $range->[2],
1288 # and thats it for the simple case ...
1299 Example : $mapper = $asm_mapper_adaptor->fetch_by_CoordSystems($cs1,$cs2);
1301 # make cache large enough to hold all of the mappings
1303 $asm_mapper_adaptor->register_all($mapper);
1305 # perform mappings as normal
1306 $mapper->map($slice->seq_region_name(), $sr_start, $sr_end,
1309 Description: This
function registers the entire set of mappings between
1310 two coordinate systems in an assembly mapper.
1311 This will use a lot of memory but will be much more efficient
1312 when doing a lot of mapping which is spread over the entire
1316 Caller : specialised prograhsm
1325 my $asm_cs_id = $mapper->assembled_CoordSystem()->dbID();
1326 my $cmp_cs_id = $mapper->component_CoordSystem()->dbID();
1328 # retrieve every relevant assembled/component pair from the assembly table
1334 asm.cmp_seq_region_id,
1340 asm.asm_seq_region_id,
1344 assembly
asm, seq_region asm_sr, seq_region cmp_sr
1346 asm.cmp_seq_region_id = cmp_sr.seq_region_id AND
1347 asm.asm_seq_region_id = asm_sr.seq_region_id AND
1348 cmp_sr.coord_system_id = ? AND
1349 asm_sr.coord_system_id = ?
1352 my $sth = $self->prepare($q);
1354 $sth->bind_param(1,$cmp_cs_id,SQL_INTEGER);
1355 $sth->bind_param(2,$asm_cs_id,SQL_INTEGER);
1358 # load the mapper with the assembly information
1360 my ($cmp_start, $cmp_end, $cmp_seq_region_id, $cmp_seq_region, $cmp_length,
1362 $asm_start, $asm_end, $asm_seq_region_id, $asm_seq_region, $asm_length);
1364 $sth->bind_columns(\$cmp_start, \$cmp_end, \$cmp_seq_region_id,
1365 \$cmp_seq_region, \$cmp_length, \$ori,
1366 \$asm_start, \$asm_end, \$asm_seq_region_id,
1367 \$asm_seq_region, \$asm_length);
1371 while($sth->fetch()) {
1372 $mapper->register_component($cmp_seq_region_id);
1373 $mapper->mapper->add_map_coordinates(
1374 $asm_seq_region_id, $asm_start, $asm_end,
1376 $cmp_seq_region_id, $cmp_start, $cmp_end);
1378 my $arr = [$cmp_seq_region_id, $cmp_seq_region, $cmp_cs_id, $cmp_length];
1380 $self->{
'sr_name_cache'}->{
"$cmp_seq_region:$cmp_cs_id"} = $arr;
1381 $self->{
'sr_id_cache'}->{
"$cmp_seq_region_id"} = $arr;
1383 # only register each asm seq_region once since it requires some work
1384 if(!$asm_registered{$asm_seq_region_id}) {
1385 $asm_registered{$asm_seq_region_id} = 1;
1387 # register all chunks from start of seq region to end
1388 my $end_chunk = $asm_length >> $CHUNKFACTOR;
1389 for(my $i = 0; $i <= $end_chunk; $i++) {
1390 $mapper->register_assembled($asm_seq_region_id, $i);
1393 $arr = [$asm_seq_region_id, $asm_seq_region, $asm_cs_id, $asm_length];
1395 $self->{
'sr_name_cache'}->{
"$asm_seq_region:$asm_cs_id"} = $arr;
1396 $self->{
'sr_id_cache'}->{
"$asm_seq_region_id"} = $arr;
1407 =head2 register_all_chained
1410 Example : $mapper = $asm_mapper_adaptor->fetch_by_CoordSystems($cs1,$cs2);
1412 # make the cache large enough to hold all of the mappings
1414 # load all of the mapping data
1415 $asm_mapper_adaptor->register_all_chained($mapper);
1417 # perform mappings as normal
1418 $mapper->map($slice->seq_region_name(), $sr_start, $sr_end,
1421 Description: This
function registers the entire set of mappings between
1422 two coordinate systems in a chained mapper. This will use a lot
1423 of memory but will be much more efficient when doing a lot of
1424 mapping which is spread over the entire genome.
1426 Exceptions :
throw if mapper is between coord systems with unexpected
1428 Caller : specialised programs doing a lot of genome-wide mapping
1433 sub register_all_chained {
1435 my $casm_mapper = shift;
1437 my $first_cs = $casm_mapper->first_CoordSystem();
1438 my $mid_cs = $casm_mapper->middle_CoordSystem();
1439 my $last_cs = $casm_mapper->last_CoordSystem();
1441 my $start_mid_mapper = $casm_mapper->first_middle_mapper();
1442 my $end_mid_mapper = $casm_mapper->last_middle_mapper();
1443 my $combined_mapper = $casm_mapper->first_last_mapper();
1447 my $sth = $self->prepare(
1451 asm.cmp_seq_region_id,
1457 asm.asm_seq_region_id,
1461 assembly asm, seq_region sr_asm, seq_region sr_cmp
1463 sr_asm.seq_region_id = asm.asm_seq_region_id AND
1464 sr_cmp.seq_region_id = asm.cmp_seq_region_id AND
1465 sr_asm.coord_system_id = ? AND
1466 sr_cmp.coord_system_id = ?');
1468 my $csa = $self->db()->get_CoordSystemAdaptor();
1472 if ( !defined $mid_cs ) {
1473 @path = @{ $csa->get_mapping_path( $first_cs, $last_cs ) };
1474 if ( !defined( $path[1] ) ) {
1475 splice( @path, 1, 1 );
1478 @path = @{ $csa->get_mapping_path( $first_cs, $mid_cs ) };
1479 # fix for when we have something like supercontig#contig#chromosome
1480 if ( !defined( $path[1] ) ) {
1481 splice( @path, 1, 1 );
1487 join(
',',
map( { $_->name .
' ' . $_->version } @path ) );
1488 my $len = scalar(@path) - 1;
1489 throw(
"Unexpected mapping path between start and intermediate "
1491 . $first_cs->name .
" "
1492 . $first_cs->version .
" and "
1493 . $mid_cs->name .
" "
1494 . $mid_cs->version .
")."
1495 .
"\nExpected path length 1, got $len. "
1499 my ($asm_cs,$cmp_cs) = @path;
1501 $sth->{mysql_use_result} = 1;
1502 $sth->bind_param(1,$asm_cs->dbID,SQL_INTEGER);
1503 $sth->bind_param(2,$cmp_cs->dbID,SQL_INTEGER);
1507 my ($mid_start, $mid_end, $mid_seq_region_id, $mid_seq_region, $mid_length,
1508 $ori, $start_start, $start_end, $start_seq_region_id, $start_seq_region,
1511 if($asm_cs->equals($first_cs)) {
1512 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
1513 \$mid_seq_region, \$mid_length, \$ori, \$start_start,
1514 \$start_end, \$start_seq_region_id, \$start_seq_region,
1517 $sth->bind_columns(\$start_start, \$start_end, \$start_seq_region_id,
1518 \$start_seq_region, \$start_length, \$ori,
1519 \$mid_start, \$mid_end, \$mid_seq_region_id,
1520 \$mid_seq_region, \$mid_length);
1524 my ( $mid_cs_id, $start_cs_id, $registry, $mapper );
1525 if ( !defined $mid_cs ) {
1526 $mid_cs_id = $last_cs->dbID();
1527 $start_cs_id = $first_cs->dbID();
1528 $mapper = $combined_mapper;
1530 $mid_cs_id = $mid_cs->dbID();
1531 $start_cs_id = $first_cs->dbID();
1532 $mapper = $start_mid_mapper;
1535 $registry = $casm_mapper->first_registry();
1537 while($sth->fetch()) {
1538 $mapper->add_map_coordinates
1540 $start_seq_region_id, $start_start, $start_end, $ori,
1541 $mid_seq_region_id, $mid_start, $mid_end
1543 push( @ranges, [$start_seq_region_id, $start_start, $start_end ] );
1545 $registry->check_and_register( $start_seq_region_id, 1, $start_length );
1546 if( ! defined $mid_cs ) {
1547 $casm_mapper->last_registry()->check_and_register
1548 ( $mid_seq_region_id, $mid_start, $mid_end );
1551 my $arr = [ $mid_seq_region_id, $mid_seq_region,
1552 $mid_cs_id, $mid_length ];
1554 $self->{
'sr_name_cache'}->{
"$mid_seq_region:$mid_cs_id"} = $arr;
1555 $self->{
'sr_id_cache'}->{
"$mid_seq_region_id"} = $arr;
1557 $arr = [ $start_seq_region_id, $start_seq_region,
1558 $start_cs_id, $start_length ];
1560 $self->{
'sr_name_cache'}->{
"$start_seq_region:$start_cs_id"} = $arr;
1561 $self->{
'sr_id_cache'}->{
"$start_seq_region_id"} = $arr;
1564 if( ! defined $mid_cs ) {
1565 # thats it for the simple case
1570 @path = @{ $csa->get_mapping_path( $last_cs, $mid_cs ) };
1571 if ( defined($mid_cs) ) {
1572 if ( !defined( $path[1] ) ) {
1573 splice( @path, 1, 1 );
1579 join(
',',
map( { $_->name .
' ' . $_->version } @path ) );
1580 my $len = scalar(@path) - 1;
1581 throw(
"Unexpected mapping path between intermediate and last "
1583 . $last_cs->name .
" "
1584 . $last_cs->version .
" and "
1585 . $mid_cs->name .
" "
1586 . $mid_cs->version .
")."
1587 .
"\nExpected path length 1, got $len. "
1591 ($asm_cs,$cmp_cs) = @path;
1593 $sth->bind_param(1,$asm_cs->dbID,SQL_INTEGER);
1594 $sth->bind_param(2,$cmp_cs->dbID,SQL_INTEGER);
1598 my ($end_start, $end_end, $end_seq_region_id, $end_seq_region,
1601 if($asm_cs->equals($mid_cs)) {
1602 $sth->bind_columns(\$end_start, \$end_end, \$end_seq_region_id,
1603 \$end_seq_region, \$end_length, \$ori,
1604 \$mid_start, \$mid_end, \$mid_seq_region_id,
1605 \$mid_seq_region, \$mid_length);
1607 $sth->bind_columns(\$mid_start, \$mid_end, \$mid_seq_region_id,
1608 \$mid_seq_region, \$mid_length, \$ori, \$end_start,
1609 \$end_end, \$end_seq_region_id, \$end_seq_region,
1613 my $end_cs_id = $last_cs->dbID();
1614 $registry = $casm_mapper->last_registry();
1616 while($sth->fetch()) {
1617 $end_mid_mapper->add_map_coordinates
1619 $end_seq_region_id, $end_start, $end_end, $ori,
1620 $mid_seq_region_id, $mid_start, $mid_end
1623 $registry->check_and_register( $end_seq_region_id, 1, $end_length );
1625 my $arr = [ $end_seq_region_id, $end_seq_region,
1626 $end_cs_id, $end_length ];
1627 $self->{
'sr_name_cache'}->{
"$end_seq_region:$end_cs_id"} = $arr;
1628 $self->{
'sr_id_cache'}->{
"$end_seq_region_id"} = $arr;
1631 _build_combined_mapper( \@ranges, $start_mid_mapper, $end_mid_mapper,
1632 $combined_mapper,
"first" );
1639 # after both halves of a chained mapper are loaded
1640 # this function maps all ranges in $ranges and loads the
1641 # results into the combined mapper
1642 sub _build_combined_mapper {
1644 my $start_mid_mapper = shift;
1645 my $end_mid_mapper = shift;
1646 my $combined_mapper = shift;
1647 my $start_name = shift;
1649 my $mid_name =
"middle";
1651 foreach my $range (@$ranges) {
1652 my ( $seq_region_id, $start, $end) = @$range;
1656 my @initial_coords = $start_mid_mapper->map_coordinates($seq_region_id,
1660 foreach my $icoord (@initial_coords) {
1662 if($icoord->isa(
'Bio::EnsEMBL::Mapper::Gap')) {
1663 $sum += $icoord->length();
1668 #feed the results of the first mapping into the second mapper
1670 $end_mid_mapper->map_coordinates($icoord->id, $icoord->start,
1672 $icoord->strand, $mid_name);
1675 foreach my $fcoord (@final_coords) {
1676 #load up the final mapper
1678 if($fcoord->isa(
'Bio::EnsEMBL::Mapper::Coordinate')) {
1679 my $total_start = $start + $sum;
1680 my $total_end = $total_start + $fcoord->length - 1;
1681 my $ori = $fcoord->strand();
1683 if($start_name eq
'first') { # add coords in consistant order
1684 $combined_mapper->add_map_coordinates(
1685 $seq_region_id, $total_start, $total_end, $ori,
1686 $fcoord->id(), $fcoord->start(), $fcoord->end());
1688 $combined_mapper->add_map_coordinates(
1689 $fcoord->id(), $fcoord->start(), $fcoord->end(),$ori,
1690 $seq_region_id, $total_start, $total_end);
1694 $sum += $fcoord->length();
1702 =head2 seq_regions_to_ids
1705 Arg [2] : listref of strings $seq_regions
1706 Example : my @ids = @{$asma->seq_regions_to_ids($coord_sys, \@seq_regs)};
1707 Description: Converts a list of seq_region names to
internal identifiers
1708 using the
internal cache that has accumulated
while registering
1709 regions
for AssemblyMappers. If any requested regions are
1710 not found in the cache an attempt is made to retrieve them
1712 Returntype : listref of ints
1713 Exceptions :
throw if a non-existant seqregion is provided
1719 sub seq_regions_to_ids {
1721 my $coord_system = shift;
1722 my $seq_regions = shift;
1724 my $cs_id = $coord_system->
dbID();
1728 foreach my $sr (@$seq_regions) {
1729 my $arr = $self->{
'sr_name_cache'}->{
"$sr:$cs_id"};
1731 push( @out, $arr->[0] );
1733 push @out, $self->_seq_region_name_to_id($sr,$cs_id);
1741 =head2 seq_ids_to_regions
1743 Arg [1] : listref of seq_region ids
1744 Example : my @ids = @{$asma->ids_to_seq_regions(\@seq_ids)};
1745 Description: Converts a list of seq_region ids to seq region names
1746 using the
internal cache that has accumulated
while registering
1747 regions
for AssemblyMappers. If any requested regions are
1748 not found in the cache an attempt is made to retrieve them
1750 Returntype : listref of strings
1751 Exceptions :
throw if a non-existant seq_region_id is provided
1757 sub seq_ids_to_regions {
1759 my $seq_region_ids = shift;
1763 foreach my $sr (@$seq_region_ids) {
1764 my $arr = $self->{
'sr_id_cache'}->{
"$sr"};
1766 push( @out, $arr->[1] );
1768 push @out, $self->_seq_region_id_to_name($sr);
1777 Description: Delete all the caches
for the mappings/seq_regions
1788 %{$self->{
'sr_name_cache'}} = ();
1789 %{$self->{
'sr_id_cache'}} = ();
1791 foreach my $key (keys %{$self->{
'_asm_mapper_cache'}}){
1792 $self->{
'_asm_mapper_cache'}->{$key}->flush();
1794 %{$self->{
'_asm_mapper_cache'}} = ();