3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
34 Handles mapping between a given coordinate system and the toplevel
35 pseudo coordinate system.
40 $asma = $db->get_AssemblyMapperAdaptor();
41 $csa = $db->get_CoordSystemAdaptor();
43 my $toplevel = $cs_adaptor->fetch_by_name(
'toplevel');
44 my $ctg_cs = $cs_adaptor->fetch_by_name(
'contig');
46 $asm_mapper = $map_adaptor->fetch_by_CoordSystems( $toplevel, $ctg_cs );
48 # map to toplevel coord system for this region
50 $asm_mapper->map(
'AL30421.1.200.92341', 100, 10000, -1, $ctg_cs );
52 # list toplevel seq_region_ids for this region
54 $asm_mapper->list_ids(
'AL30421.1.200.92341', 1, 1000, -1,
60 coordinate system and the toplevel pseudo cooordinate system. The
61 toplevel coordinate system is not a real coordinate system, but
62 represents the highest coordinate system that can be mapped to in a
63 given region. It is only possible to perform unidirectional mapping
64 using this mapper, because it does not make sense to
map from the
65 toplevel coordinate system to another coordinate system.
75 package Bio::EnsEMBL::TopLevelAssemblyMapper;
80 use Scalar::Util qw(weaken);
84 Arg [1] : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor
for
85 the database
this mapper is
using.
89 Returntype : Bio::EnsEMBL::DBSQL::TopLevelAssemblyMapper
90 Exceptions :
throws if any of the 3 arguments are missing/ not
91 : of the correct type.
99 my ($caller, $adaptor, $toplevel_cs, $other_cs) = @_;
101 my $class = ref($caller) || $caller;
103 if(!ref($toplevel_cs)) {
104 throw(
'Toplevel CoordSystem argument expected.');
106 if(!ref($other_cs)) {
107 throw(
'Other CoordSystem argument expected.');
110 if(!$toplevel_cs->is_top_level()) {
111 throw($toplevel_cs->name() .
" is not the toplevel CoordSystem.");
113 if($other_cs->is_top_level()) {
114 throw(
"Other CoordSystem argument should not be toplevel CoordSystem.");
117 my $cs_adaptor = $adaptor->
db()->get_CoordSystemAdaptor();
118 my $coord_systems = $cs_adaptor->fetch_all();
120 my $self = bless {
'coord_systems' => $coord_systems,
121 'toplevel_cs' => $toplevel_cs,
122 'other_cs' => $other_cs}, $class;
124 $self->adaptor($adaptor);
131 weaken($self->{
'adaptor'} = shift)
if(@_);
133 return $self->{
'adaptor'};
138 Arg [1] :
string $frm_seq_region
139 The name of the sequence region to transform FROM
140 Arg [2] :
int $frm_start
141 The start of the region to transform FROM
142 Arg [3] :
int $frm_end
143 The end of the region to transform FROM
144 Arg [4] :
int $strand
145 The strand of the region to transform FROM
147 The coordinate system to transform FROM
148 Arg [6] :
if set will
do a fastmap
149 Arg [7] : (optional) dummy placeholder to keep the interface
150 consistent across different mappers
151 Arg [8] : (optional)
boolean
152 Whether or not to include the original coordinates
153 Example : @coords = $mapper->map(
'X', 1_000_000, 2_000_000,
155 Description: Transforms coordinates from one coordinate system
159 Exceptions : thrown
if if the specified TO coordinate system is not one
160 of the coordinate systems associated with
this mapper
168 throw(
'Incorrect number of arguments.')
if @_ < 6;
170 my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, $frm_cs,
171 $fastmap, $dummy, $include_org_coord) = @_;
173 if($frm_cs->is_top_level()) {
174 throw(
"The toplevel CoordSystem can only be mapped TO, not FROM.");
178 push @tmp, $frm_seq_region_name;
179 my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
181 my $mapper = $self->{
'mapper'};
182 my $toplevel_cs = $self->{
'toplevel_cs'};
183 my $other_cs = $self->{
'other_cs'};
184 my $adaptor = $self->adaptor;
186 if($frm_cs != $other_cs && !$frm_cs->equals($other_cs)) {
187 throw(
"Coordinate system " . $frm_cs->name .
" " . $frm_cs->version .
188 " is neither the assembled nor the component coordinate system " .
189 " of this AssemblyMapper");
192 my $coord_systems = $self->{
'coord_systems'};
194 my $csa = $self->adaptor()->db()->get_CoordSystemAdaptor();
197 # TBD try to make this more efficient
199 my $from_rank = $other_cs->rank();
200 foreach my $cs (@$coord_systems) {
201 last
if($cs->rank >= $from_rank);
203 #check if a mapping path even exists to this coordinate system
204 my @mapping_path = @{ $csa->get_mapping_path( $cs, $other_cs ) };
208 # Try to map to this coord system. If we get back any coordinates then
209 # it is our 'toplevel' that we were looking for
210 my $mapper = $adaptor->fetch_by_CoordSystems($other_cs, $cs);
213 my @result = $mapper->fastmap($frm_seq_region_name, $frm_start, $frm_end,
214 $frm_strand, $frm_cs);
215 return @result
if(@result);
217 my @coords = $mapper->map($frm_seq_region_name, $frm_start, $frm_end,
218 $frm_strand, $frm_cs, undef, undef, $include_org_coord);
222 } elsif ($include_org_coord) {
223 return @coords unless $coords[0]{mapped}->isa(
'Bio::EnsEMBL::Mapper::Gap');
225 return @coords unless $coords[0]->isa(
'Bio::EnsEMBL::Mapper::Gap');
232 # the toplevel coordinate system for the region requested *is* the requested region.
235 return ($seq_region_id, $frm_start, $frm_end, $frm_strand, $other_cs);
239 if ($include_org_coord) {
240 return {
'original' => $coord,
'mapped' => $coord };
261 Arg [1] :
string $frm_seq_region
262 The name of the sequence region to transform FROM
263 Arg [2] :
int $frm_start
264 The start of the region to transform FROM
265 Arg [3] :
int $frm_end
266 The end of the region to transform FROM
267 Arg [4] :
int $strand
268 The strand of the region to transform FROM
270 The coordinate system to transform FROM
271 Example : @coords = $mapper->fastmap(
'X', 1_000_000, 2_000_000,
273 Description: Transforms coordinates from one coordinate system
277 Exceptions : thrown
if if the specified TO coordinate system is not one
278 of the coordinate systems associated with
this mapper
286 return $self->map(@_,1);
289 =head2 assembled_CoordSystem
292 Example : $cs = $mapper->assembled_CoordSystem
293 Description: Retrieves the assembled CoordSystem from
this mapper
296 Caller :
internal, AssemblyMapperAdaptor
301 sub assembled_CoordSystem {
303 return $self->{
'toplevel_cs'};
306 =head2 component_CoordSystem
309 Example : $cs = $mapper->component_CoordSystem
310 Description: Retrieves the component CoordSystem from
this mapper
313 Caller :
internal, AssemblyMapperAdaptor
318 sub component_CoordSystem {
320 return $self->{
'other_cs'};
325 my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs, $seq_regions) = @_;
327 my $mapper = $self->{
'mapper'};
328 my $toplevel_cs = $self->{
'toplevel_cs'};
329 my $other_cs = $self->{
'other_cs'};
332 if($frm_cs->is_top_level()) {
333 throw(
"The toplevel CoordSystem can only be mapped TO, not FROM.");
335 if($frm_cs != $other_cs && !$frm_cs->equals($other_cs)) {
336 throw(
"Coordinate system " . $frm_cs->name .
" " . $frm_cs->version .
337 " is neither the assembled nor the component coordinate system " .
338 " of this AssemblyMapper");
341 my $coord_systems = $self->{
'coord_systems'};
342 my $csa = $self->adaptor()->
db()->get_CoordSystemAdaptor();
345 # TBD try to make this more efficient
347 my $from_rank = $other_cs->rank();
348 foreach my $cs (@$coord_systems) {
349 last
if($cs->rank >= $from_rank);
351 #check if a mapping path even exists to this coordinate system
352 my @mapping_path = @{ $csa->get_mapping_path( $cs, $other_cs ) };
356 # Try to map to this coord system. If we get back any coordinates then
357 # it is our 'toplevel' that we were looking for
358 my $mapper = $adaptor->fetch_by_CoordSystems($other_cs, $cs);
363 push @tmp, $frm_seq_region_name;
364 my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
367 @result = $mapper->list_seq_regions($frm_seq_region_name, $frm_start,
370 @result = $mapper->list_ids($frm_seq_region_name, $frm_start,
374 return @result
if(@result);
378 # the toplevel coordinate system for the region requested *is* the
379 return ($frm_seq_region_name);
384 return ($frm_seq_region_name);
387 #this seems a bit silly and inefficient, but it is probably never
389 my $slice_adaptor = $adaptor->db()->get_SliceAdaptor();
390 my $slice = $slice_adaptor->fetch_by_region($other_cs->name(),
391 $frm_seq_region_name,
392 undef,undef,undef,$other_cs);
393 return ($slice_adaptor->get_seq_region_id($slice));
398 =head2 list_seq_regions
400 Arg [1] :
string $frm_seq_region
401 The name of the sequence region of interest
402 Arg [2] :
int $frm_start
403 The start of the region of interest
404 Arg [3] :
int $frm_end
405 The end of the region to transform of interest
407 The coordinate system to obtain overlapping ids of
408 Example :
foreach $id ($asm_mapper->list_ids(
'X',1,1000,$ctg_cs)) {...}
409 Description: Retrieves a list of overlapping seq_region names
410 of another coordinate system. This is the same as the
411 list_ids method but uses seq_region names rather
internal ids
412 Returntype : List of strings
419 sub list_seq_regions {
420 throw(
'Incorrect number of arguments.')
if(@_ != 5);
427 Arg [1] :
string $frm_seq_region
428 The name of the sequence region of interest.
429 Arg [2] :
int $frm_start
430 The start of the region of interest
431 Arg [3] :
int $frm_end
432 The end of the region to transform of interest
434 The coordinate system to obtain overlapping ids of
435 Example :
foreach $id ($asm_mapper->list_ids(
'X',1,1000,$chr_cs)) {...}
436 Description: Retrieves a list of overlapping seq_region
internal identifiers
437 of another coordinate system. This is the same as the
438 list_seq_regions method but uses
internal identfiers rather
439 than seq_region strings
440 Returntype : List of ints
441 Exceptions : thrown
if the from CoordSystem is the toplevel coord system
442 thrown
if the from CoordSystem is not the one used in the mapper
449 throw(
'Incorrect number of arguments.')
if(@_ != 5);