ensembl-hive  2.7.0
TopLevelAssemblyMapper.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
34 Handles mapping between a given coordinate system and the toplevel
35 pseudo coordinate system.
36 
37 =head1 SYNOPSIS
38 
40  $asma = $db->get_AssemblyMapperAdaptor();
41  $csa = $db->get_CoordSystemAdaptor();
42 
43  my $toplevel = $cs_adaptor->fetch_by_name('toplevel');
44  my $ctg_cs = $cs_adaptor->fetch_by_name('contig');
45 
46  $asm_mapper = $map_adaptor->fetch_by_CoordSystems( $toplevel, $ctg_cs );
47 
48  # map to toplevel coord system for this region
49  @chr_coords =
50  $asm_mapper->map( 'AL30421.1.200.92341', 100, 10000, -1, $ctg_cs );
51 
52  # list toplevel seq_region_ids for this region
53  @chr_ids =
54  $asm_mapper->list_ids( 'AL30421.1.200.92341', 1, 1000, -1,
55  $ctg_cs );
56 
57 =head1 DESCRIPTION
58 
59 The TopLevelAssemblyMapper performs mapping between a provided
60 coordinate system and the toplevel pseudo cooordinate system. The
61 toplevel coordinate system is not a real coordinate system, but
62 represents the highest coordinate system that can be mapped to in a
63 given region. It is only possible to perform unidirectional mapping
64 using this mapper, because it does not make sense to map from the
65 toplevel coordinate system to another coordinate system.
66 
67 =head1 METHODS
68 
69 =cut
70 
71 
72 use strict;
73 use warnings;
74 
75 package Bio::EnsEMBL::TopLevelAssemblyMapper;
76 
77 use Bio::EnsEMBL::Utils::Exception qw(throw);
80 use Scalar::Util qw(weaken);
81 
82 =head2 new
83 
84  Arg [1] : Bio::EnsEMBL::DBAdaptor $dbadaptor the adaptor for
85  the database this mapper is using.
86  Arg [2] : Toplevel CoordSystem
87  Arg [3] : Other CoordSystem
88  Description: Creates a new TopLevelAssemblyMapper object
89  Returntype : Bio::EnsEMBL::DBSQL::TopLevelAssemblyMapper
90  Exceptions : throws if any of the 3 arguments are missing/ not
91  : of the correct type.
93  Status : Stable
94 
95 =cut
96 
97 
98 sub new {
99  my ($caller, $adaptor, $toplevel_cs, $other_cs) = @_;
100 
101  my $class = ref($caller) || $caller;
102 
103  if(!ref($toplevel_cs)) {
104  throw('Toplevel CoordSystem argument expected.');
105  }
106  if(!ref($other_cs)) {
107  throw('Other CoordSystem argument expected.');
108  }
109 
110  if(!$toplevel_cs->is_top_level()) {
111  throw($toplevel_cs->name() . " is not the toplevel CoordSystem.");
112  }
113  if($other_cs->is_top_level()) {
114  throw("Other CoordSystem argument should not be toplevel CoordSystem.");
115  }
116 
117  my $cs_adaptor = $adaptor->db()->get_CoordSystemAdaptor();
118  my $coord_systems = $cs_adaptor->fetch_all();
119 
120  my $self = bless {'coord_systems' => $coord_systems,
121  'toplevel_cs' => $toplevel_cs,
122  'other_cs' => $other_cs}, $class;
123 
124  $self->adaptor($adaptor);
125  return $self;
126 }
127 
128 sub adaptor {
129  my $self = shift;
130 
131  weaken($self->{'adaptor'} = shift) if(@_);
132 
133  return $self->{'adaptor'};
134 }
135 
136 =head2 map
137 
138  Arg [1] : string $frm_seq_region
139  The name of the sequence region to transform FROM
140  Arg [2] : int $frm_start
141  The start of the region to transform FROM
142  Arg [3] : int $frm_end
143  The end of the region to transform FROM
144  Arg [4] : int $strand
145  The strand of the region to transform FROM
146  Arg [5] : Bio::EnsEMBL::CoordSystem
147  The coordinate system to transform FROM
148  Arg [6] : if set will do a fastmap
149  Arg [7] : (optional) dummy placeholder to keep the interface
150  consistent across different mappers
151  Arg [8] : (optional) boolean
152  Whether or not to include the original coordinates
153  Example : @coords = $mapper->map('X', 1_000_000, 2_000_000,
154  1, $chr_cs);
155  Description: Transforms coordinates from one coordinate system
156  to another.
157  Returntype : List of Bio::EnsEMBL::Mapper::Coordinate and/or
158  Bio::EnsEMBL::Mapper:Gap objects
159  Exceptions : thrown if if the specified TO coordinate system is not one
160  of the coordinate systems associated with this mapper
161  Caller : general
162  Status : Stable
163 
164 =cut
165 
166 
167 sub map {
168  throw('Incorrect number of arguments.') if @_ < 6;
169 
170  my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_strand, $frm_cs,
171  $fastmap, $dummy, $include_org_coord) = @_;
172 
173  if($frm_cs->is_top_level()) {
174  throw("The toplevel CoordSystem can only be mapped TO, not FROM.");
175  }
176 
177  my @tmp;
178  push @tmp, $frm_seq_region_name;
179  my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
180 
181  my $mapper = $self->{'mapper'};
182  my $toplevel_cs = $self->{'toplevel_cs'};
183  my $other_cs = $self->{'other_cs'};
184  my $adaptor = $self->adaptor;
185 
186  if($frm_cs != $other_cs && !$frm_cs->equals($other_cs)) {
187  throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
188  " is neither the assembled nor the component coordinate system " .
189  " of this AssemblyMapper");
190  }
191 
192  my $coord_systems = $self->{'coord_systems'};
193 
194  my $csa = $self->adaptor()->db()->get_CoordSystemAdaptor();
195 
196  #
197  # TBD try to make this more efficient
198  #
199  my $from_rank = $other_cs->rank();
200  foreach my $cs (@$coord_systems) {
201  last if($cs->rank >= $from_rank);
202 
203  #check if a mapping path even exists to this coordinate system
204  my @mapping_path = @{ $csa->get_mapping_path( $cs, $other_cs ) };
205 
206  if(@mapping_path) {
207 
208  # Try to map to this coord system. If we get back any coordinates then
209  # it is our 'toplevel' that we were looking for
210  my $mapper = $adaptor->fetch_by_CoordSystems($other_cs, $cs);
211 
212  if($fastmap) {
213  my @result = $mapper->fastmap($frm_seq_region_name, $frm_start, $frm_end,
214  $frm_strand, $frm_cs);
215  return @result if(@result);
216  } else {
217  my @coords = $mapper->map($frm_seq_region_name, $frm_start, $frm_end,
218  $frm_strand, $frm_cs, undef, undef, $include_org_coord);
219 
220  if(@coords > 1) {
221  return @coords;
222  } elsif ($include_org_coord) {
223  return @coords unless $coords[0]{mapped}->isa('Bio::EnsEMBL::Mapper::Gap');
224  } else {
225  return @coords unless $coords[0]->isa('Bio::EnsEMBL::Mapper::Gap');
226  }
227  }
228  }
229  }
230 
231  #
232  # the toplevel coordinate system for the region requested *is* the requested region.
233  #
234  if ($fastmap) {
235  return ($seq_region_id, $frm_start, $frm_end, $frm_strand, $other_cs);
236  }
237 
238  my $coord = Bio::EnsEMBL::Mapper::Coordinate->new($seq_region_id, $frm_start,$frm_end, $frm_strand, $other_cs);
239  if ($include_org_coord) {
240  return { 'original' => $coord, 'mapped' => $coord };
241  } else {
242  return $coord;
243  }
244 }
245 
246 =head2 flush
247 
248  Args : none
249  Example : none
250  Description: polymorphism with AssemblyMapper, does nothing
251  Returntype : none
252  Exceptions : none
253  Status : Stable
254 
255 =cut
256 
257 sub flush {}
258 
259 =head2 fastmap
260 
261  Arg [1] : string $frm_seq_region
262  The name of the sequence region to transform FROM
263  Arg [2] : int $frm_start
264  The start of the region to transform FROM
265  Arg [3] : int $frm_end
266  The end of the region to transform FROM
267  Arg [4] : int $strand
268  The strand of the region to transform FROM
269  Arg [5] : Bio::EnsEMBL::CoordSystem
270  The coordinate system to transform FROM
271  Example : @coords = $mapper->fastmap('X', 1_000_000, 2_000_000,
272  1, $chr_cs);
273  Description: Transforms coordinates from one coordinate system
274  to another.
275  Returntype : List of Bio::EnsEMBL::Mapper::Coordinate and/or
276  Bio::EnsEMBL::Mapper:Gap objects
277  Exceptions : thrown if if the specified TO coordinate system is not one
278  of the coordinate systems associated with this mapper
279  Caller : general
280  Status : Stable
281 
282 =cut
283 
284 sub fastmap {
285  my $self = shift;
286  return $self->map(@_,1);
287 }
288 
289 =head2 assembled_CoordSystem
290 
291  Arg [1] : none
292  Example : $cs = $mapper->assembled_CoordSystem
293  Description: Retrieves the assembled CoordSystem from this mapper
294  Returntype : Bio::EnsEMBL::CoordSystem
295  Exceptions : none
296  Caller : internal, AssemblyMapperAdaptor
297  Status : Stable
298 
299 =cut
300 
301 sub assembled_CoordSystem {
302  my $self = shift;
303  return $self->{'toplevel_cs'};
304 }
305 
306 =head2 component_CoordSystem
307 
308  Arg [1] : none
309  Example : $cs = $mapper->component_CoordSystem
310  Description: Retrieves the component CoordSystem from this mapper
311  Returntype : Bio::EnsEMBL::CoordSystem
312  Exceptions : none
313  Caller : internal, AssemblyMapperAdaptor
314  Status : Stable
315 
316 =cut
317 
318 sub component_CoordSystem {
319  my $self = shift;
320  return $self->{'other_cs'};
321 }
322 
323 
324 sub _list {
325  my($self, $frm_seq_region_name, $frm_start, $frm_end, $frm_cs, $seq_regions) = @_;
326 
327  my $mapper = $self->{'mapper'};
328  my $toplevel_cs = $self->{'toplevel_cs'};
329  my $other_cs = $self->{'other_cs'};
330  my $adaptor = $self->adaptor;
331 
332  if($frm_cs->is_top_level()) {
333  throw("The toplevel CoordSystem can only be mapped TO, not FROM.");
334  }
335  if($frm_cs != $other_cs && !$frm_cs->equals($other_cs)) {
336  throw("Coordinate system " . $frm_cs->name . " " . $frm_cs->version .
337  " is neither the assembled nor the component coordinate system " .
338  " of this AssemblyMapper");
339  }
340 
341  my $coord_systems = $self->{'coord_systems'};
342  my $csa = $self->adaptor()->db()->get_CoordSystemAdaptor();
343 
344  #
345  # TBD try to make this more efficient
346  #
347  my $from_rank = $other_cs->rank();
348  foreach my $cs (@$coord_systems) {
349  last if($cs->rank >= $from_rank);
350 
351  #check if a mapping path even exists to this coordinate system
352  my @mapping_path = @{ $csa->get_mapping_path( $cs, $other_cs ) };
353 
354  if(@mapping_path) {
355 
356  # Try to map to this coord system. If we get back any coordinates then
357  # it is our 'toplevel' that we were looking for
358  my $mapper = $adaptor->fetch_by_CoordSystems($other_cs, $cs);
359 
360  my @result;
361 
362  my @tmp;
363  push @tmp, $frm_seq_region_name;
364  my $seq_region_id = @{$self->adaptor()->seq_regions_to_ids($frm_cs, \@tmp)}[0];
365 
366  if($seq_regions) {
367  @result = $mapper->list_seq_regions($frm_seq_region_name, $frm_start,
368  $frm_end, $frm_cs);
369  } else {
370  @result = $mapper->list_ids($frm_seq_region_name, $frm_start,
371  $frm_end, $frm_cs);
372  }
373 
374  return @result if(@result);
375  }
376  }
377 
378  # the toplevel coordinate system for the region requested *is* the
379  return ($frm_seq_region_name);
380 
381 
382  # requested region.
383  if($seq_regions) {
384  return ($frm_seq_region_name);
385  }
386 
387  #this seems a bit silly and inefficient, but it is probably never
388  #called anyway.
389  my $slice_adaptor = $adaptor->db()->get_SliceAdaptor();
390  my $slice = $slice_adaptor->fetch_by_region($other_cs->name(),
391  $frm_seq_region_name,
392  undef,undef,undef,$other_cs);
393  return ($slice_adaptor->get_seq_region_id($slice));
394 }
395 
396 
397 
398 =head2 list_seq_regions
399 
400  Arg [1] : string $frm_seq_region
401  The name of the sequence region of interest
402  Arg [2] : int $frm_start
403  The start of the region of interest
404  Arg [3] : int $frm_end
405  The end of the region to transform of interest
406  Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
407  The coordinate system to obtain overlapping ids of
408  Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$ctg_cs)) {...}
409  Description: Retrieves a list of overlapping seq_region names
410  of another coordinate system. This is the same as the
411  list_ids method but uses seq_region names rather internal ids
412  Returntype : List of strings
413  Exceptions : none
414  Caller : general
415  Status : Stable
416 
417 =cut
418 
419 sub list_seq_regions {
420  throw('Incorrect number of arguments.') if(@_ != 5);
421  return _list(@_,1);
422 }
423 
424 
425 =head2 list_ids
426 
427  Arg [1] : string $frm_seq_region
428  The name of the sequence region of interest.
429  Arg [2] : int $frm_start
430  The start of the region of interest
431  Arg [3] : int $frm_end
432  The end of the region to transform of interest
433  Arg [5] : Bio::EnsEMBL::CoordSystem $frm_cs
434  The coordinate system to obtain overlapping ids of
435  Example : foreach $id ($asm_mapper->list_ids('X',1,1000,$chr_cs)) {...}
436  Description: Retrieves a list of overlapping seq_region internal identifiers
437  of another coordinate system. This is the same as the
438  list_seq_regions method but uses internal identfiers rather
439  than seq_region strings
440  Returntype : List of ints
441  Exceptions : thrown if the from CoordSystem is the toplevel coord system
442  thrown if the from CoordSystem is not the one used in the mapper
443  Caller : general
444  Status : Stable
445 
446 =cut
447 
448 sub list_ids {
449  throw('Incorrect number of arguments.') if(@_ != 5);
450  return _list(@_,0);
451 }
452 
453 
454 
455 
456 
457 1;
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
Bio::EnsEMBL::Mapper::Coordinate
Definition: Coordinate.pm:14
map
public map()
AssemblyMapper
Definition: BlastzAligner.pm:7
Bio::EnsEMBL::CoordSystem
Definition: CoordSystem.pm:40
Bio::EnsEMBL::DBSQL::AssemblyMapperAdaptor
Definition: AssemblyMapperAdaptor.pm:58
Bio::EnsEMBL::TopLevelAssemblyMapper
Definition: TopLevelAssemblyMapper.pm:40
Bio::EnsEMBL::DBSQL::BaseAdaptor::db
public Bio::EnsEMBL::DBSQL::DBAdaptor db()
Bio::EnsEMBL::Mapper::Coordinate::new
public new()
Bio::EnsEMBL::DBSQL::DBAdaptor::new
public Bio::EnsEMBL::DBSQL::DBAdaptor new()
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68
Bio::EnsEMBL::Mapper
Definition: Coordinate.pm:3
Bio::EnsEMBL::Storable::adaptor
public Bio::EnsEMBL::DBSQL::BaseAdaptor adaptor()