ensembl-hive  2.7.0
MappedSlice.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::MappedSlice - an object representing a mapped slice
34 
35 =head1 SYNOPSIS
36 
37  # get a reference slice
38  my $slice =
39  $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
40 
41  # create MappedSliceContainer based on the reference slice
42  my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice );
43 
44  # set the adaptor for fetching AssemblySlices
45  my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor;
46  $msc->set_AssemblySliceAdaptor($asa);
47 
48  # add an AssemblySlice to your MappedSliceContainer
49  $msc->attach_AssemblySlice('NCBIM36');
50 
51  foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) {
52  print $mapped_slice->name, "\n";
53 
54  foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) {
55  print " ", &to_string($sf), "\n";
56  }
57  }
58 
59 =head1 DESCRIPTION
60 
61 NOTE: this code is under development and not fully functional nor tested
62 yet. Use only for development.
63 
64 This object represents a mapped slice, i.e. a slice that's attached
65 to a reference slice and a mapper to convert coordinates to/from the
66 reference. The attachment is done via a MappedSliceContainer which
67 has the reference slice and the "container slice" defining the common
68 coordinate system for all MappedSlices.
69 
70 A MappedSlice is supposed to behave as close to a Bio::EnsEMBL::Slice
71 as possible. Most Slice methods are implemented in MappedSlice and will
72 return an equivalent value to what Slice does. There are some exceptions
73 of unimplemented methods, either because there is no useful equivalent
74 for a MappedSlice to do, or they are too complicated.
75 
76 Not supported Bio::EnsEMBL::Slice methods:
77 
78  All Bio::PrimarySeqI compliance methods
79  expand
80  get_generic_features
81  get_seq_region_id
82  seq_region_Slice
83 
84 Not currently supported but maybe should/could:
85 
86  calculate_pi
87  calculate_theta
88  get_base_count
89  get_by_Individual
90  get_by_strain
91  invert
92 
93 Internally, a MappedSlice is a collection of Bio::EnsEMBL::Slices and
94 associated Bio::EnsEMBL::Mappers which map the slices to the common
95 container coordinate system.
96 
97 MappedSlices are usually created and attached to a MappedSliceContainer
98 by an adaptor/factory.
99 
100 =head1 METHODS
101 
102  new
103  add_Slice_Mapper_pair
104  get_all_Slice_Mapper_pairs
105  adaptor
106  container
107  name
108  seq_region_name
109  start
110  end
111  strand
112  length
113  seq_region_length
114  centrepoint
115  coord_system
116  coord_system_name
117  is_toplevel
118  seq (not implemented yet)
119  subseq (not implemented yet)
120  get_repeatmasked_seq (not implemented yet)
121  sub_MappedSlice (not implemented yet)
122  project (not implemented yet)
123 
124 =head1 RELATED MODULES
125 
126  Bio::EnsEMBL::MappedSlice
127  Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
128  Bio::EnsEMBL::Compara::AlignSlice
129  Bio::EnsEMBL::Compara::AlignSlice::Slice
130  Bio::EnsEMBL::StrainSlice
131 
132 =cut
133 
134 package Bio::EnsEMBL::MappedSlice;
135 
136 use strict;
137 use warnings;
138 no warnings 'uninitialized';
139 
140 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
141 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
142 use Bio::EnsEMBL::Mapper;
143 use Scalar::Util qw(weaken);
144 
145 use vars qw($AUTOLOAD);
146 
147 
148 =head2 new
149 
150  Arg [ADAPTOR] : Adaptor $adaptor - an adaptor of the appropriate type
151  Arg [CONTAINER] : Bio::EnsEMBL::MappedSliceContainer $container - the
152  container this MappedSlice is attached to
153  Arg [NAME] : String $name - name
154  Example : my $mapped_slice = Bio::EnsEMBL::MappedSlice->new(
155  -ADAPTOR => $adaptor,
156  -CONTAINER => $container,
157  -NAME => $name,
158  );
159  Description : Constructor. Usually you won't call this method manually, but
160  the MappedSlice will be constructed by an adaptor/factory.
161  Return type : Bio::EnsEMBL::MappedSlice
162  Exceptions : thrown on wrong or missing arguments
163  Caller : general, MappedSlice adaptors
164  Status : At Risk
165  : under development
166 
167 =cut
168 
169 sub new {
170  my $caller = shift;
171  my $class = ref($caller) || $caller;
172 
173  my ($adaptor, $container, $name) =
174  rearrange([qw(ADAPTOR CONTAINER NAME)], @_);
175 
176  # arguement check
177  unless ($container and ref($container) and
178  $container->isa('Bio::EnsEMBL::MappedSliceContainer')) {
179  throw("Need a MappedSliceContainer.");
180  }
181 
182  my $self = {};
183  bless ($self, $class);
184 
185  #
186  # initialise object
187  #
188 
189  # need to weaken reference to prevent circular reference
190  weaken($self->{'container'} = $container);
191 
192  $self->adaptor($adaptor) if (defined($adaptor));
193  $self->{'name'} = $name if (defined($name));
194 
195  $self->{'slice_mapper_pairs'} = [];
196 
197  return $self;
198 }
199 
200 
201 =head2 add_Slice_Mapper_pair
202 
203  Arg[1] : Bio::EnsEMBL::Slice $slice - slice to add
204  Arg[2] : Bio::EnsEMBL::Mapper $mapper - the mapper for this slice
205  Example : $mapped_slice->add_Slice_Mapper_pair($slice, $mapper);
206  Description : Adds a native slice and a corresponding mapper to map to/from
207  the artificial container coord system.
208  Return type : listref of Bio::EnsEMBL::MappedSlice
209  Exceptions : thrown on wrong or missing arguments
210  Caller : general, MappedSlice adaptors
211  Status : At Risk
212  : under development
213 
214 =cut
215 
216 sub add_Slice_Mapper_pair {
217  my $self = shift;
218  my $slice = shift;
219  my $mapper = shift;
220 
221  # argument check
222  unless ($slice and ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
223  throw("You must provide a slice.");
224  }
225 
226  unless ($mapper and ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) {
227  throw("You must provide a mapper.");
228  }
229 
230  push @{ $self->{'slice_mapper_pairs'} }, [ $slice, $mapper ];
231 
232  return $self->{'slice_mapper_pairs'};
233 }
234 
235 
236 =head2 get_all_Slice_Mapper_pairs
237 
238  Example : foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) {
239  my ($slice, $mapper) = @$pair;
240 
241  # get container coordinates
242  my @coords = $mapper->map_coordinates(
243  $slice->seq_region_name,
244  $slice->start,
245  $slice->end,
246  $slice->strand,
247  'mapped_slice'
248  );
249 
250  # ....
251  }
252  Description : Gets all Slice/Mapper pairs this MappedSlice is composed of.
253  Each slice (and features on it) can be mapped onto the
254  artificial container coord system using the mapper.
255  Return type : listref of listref of a Bio::EnsEMBL::Slice and
257  Exceptions : none
258  Caller : general
259  Status : At Risk
260  : under development
261 
262 =cut
263 
264 sub get_all_Slice_Mapper_pairs {
265  my $self = shift;
266  return $self->{'slice_mapper_pairs'};
267 }
268 
269 
270 =head2 adaptor
271 
272  Arg[1] : (optional) Adaptor $adaptor - the adaptor/factory for this
273  object
274  Example : $mapped_slice->adaptor($assembly_slice_adaptor);
275  Description : Getter/setter for the adaptor/factory for this object.
276  Return type : Adaptor of appropriate type
277  Exceptions : none
278  Caller : general
279  Status : At Risk
280  : under development
281 
282 =cut
283 
284 sub adaptor {
285  my $self = shift;
286  weaken($self->{'adaptor'} = shift) if (@_);
287  return $self->{'adaptor'};
288 }
289 
290 
291 =head2 container
292 
293  Arg[1] : (optional) Bio::EnsEMBL::MappedSliceContainer - the container
294  this object is attached to
295  Example : my $container = $mapped_slice->container;
296  print $container->ref_slice->name, "\n";
297  Description : Getter/setter for the container this object is attached to. The
298  container will give you access to the reference slice, a common
299  artificial container slice, and a mapper to map to it from the
300  container coord system.
301 
302  The implementation uses a weak reference to attach the container
303  since the container holds a list of MappedSlices itself.
305  Exceptions : none
306  Caller : general
307  Status : At Risk
308  : under development
309 
310 =cut
311 
312 sub container {
313  my $self = shift;
314  weaken($self->{'container'} = shift) if (@_);
315  return $self->{'container'};
316 }
317 
318 
319 =head2 name
320 
321  Arg[1] : String - the name of this object
322  Example : my $name = $mapped_slice->container->ref_slice->name .
323  ":mapped_" . $ident_string;
324  $mapped_slice->name($name);
325  Description : Getter/setter for this object's name
326  Return type : String
327  Exceptions : none
328  Caller : general
329  Status : At Risk
330  : under development
331 
332 =cut
333 
334 sub name {
335  my $self = shift;
336  $self->{'name'} = shift if (@_);
337  return $self->{'name'};
338 }
339 
340 
341 =head2 seq_region_name
342 
343  Example : my $sr_name = $mapped_slice->seq_region_name;
344  Description : Returns the seq_region name of the reference slice.
345  Return type : String
346  Exceptions : none
347  Caller : general
348  Status : At Risk
349  : under development
350 
351 =cut
352 
353 sub seq_region_name {
354  my $self = shift;
355  return $self->container->ref_slice->seq_region_name;
356 }
357 
358 
359 =head2 start
360 
361  Example : my $start = $mapped_slice->start;
362  Description : Returns the start of the container slice.
363  Return type : Int
364  Exceptions : none
365  Caller : general
366  Status : At Risk
367  : under development
368 
369 =cut
370 
371 sub start {
372  my $self = shift;
373  return $self->container->container_slice->start;
374 }
375 
376 
377 =head2 end
378 
379  Example : my $end = $mapped_slice->end;
380  Description : Returns the end of the container slice.
381  Return type : Int
382  Exceptions : none
383  Caller : general
384  Status : At Risk
385  : under development
386 
387 =cut
388 
389 sub end {
390  my $self = shift;
391  return $self->container->container_slice->end;
392 }
393 
394 
395 =head2 strand
396 
397  Example : my $strand = $mapped_slice->strand;
398  Description : Returns the strand of the container slice.
399  Return type : Int
400  Exceptions : none
401  Caller : general
402  Status : At Risk
403  : under development
404 
405 =cut
406 
407 sub strand {
408  my $self = shift;
409  return $self->container->container_slice->strand;
410 }
411 
412 
413 =head2 length
414 
415  Example : my $length = $mapped_slice->length;
416  Description : Returns the length of the container slice
417  Return type : Int
418  Exceptions : none
419  Caller : general
420  Status : At Risk
421  : under development
422 
423 =cut
424 
425 sub length {
426  my $self = shift;
427  return $self->container->container_slice->length;
428 }
429 
430 
431 =head2 seq_region_length
432 
433  Example : my $sr_length = $mapped_slice->seq_region_length;
434  Description : Returns the seq_region length of the reference slice.
435  Return type : Int
436  Exceptions : none
437  Caller : general
438  Status : At Risk
439  : under development
440 
441 =cut
442 
443 sub seq_region_length {
444  my $self = shift;
445  return $self->container->ref_slice->seq_region_length;
446 }
447 
448 
449 =head2 centrepoint
450 
451  Example : my $centrepoint = $mapped_slice->centrepoint;
452  Description : Returns the centrepoint of the container slice.
453  Return type : Int
454  Exceptions : none
455  Caller : general
456  Status : At Risk
457  : under development
458 
459 =cut
460 
461 sub centrepoint {
462  my $self = shift;
463  return $self->container->container_slice->centrepoint;
464 }
465 
466 
467 =head2 coord_system
468 
469  Example : my $cs = $mapped_slice->coord_system;
470  Description : Returns the coord system of the container slice.
471  Return type : Bio::EnsEMBL::CoordSystem
472  Exceptions : none
473  Caller : general
474  Status : At Risk
475  : under development
476 
477 =cut
478 
479 sub coord_system {
480  my $self = shift;
481  return $self->container->container_slice->coord_system;
482 }
483 
484 =head2 coord_system_name
485 
486  Example : my $cs_name = $mapped_slice->coord_system_name;
487  Description : Returns the coord system name of the container slice.
488  Return type : Int
489  Exceptions : none
490  Caller : general
491  Status : At Risk
492  : under development
493 
494 =cut
495 
496 sub coord_system_name {
497  my $self = shift;
498  return $self->container->container_slice->coord_system_name;
499 }
500 
501 =head2 is_toplevel
502 
503  Example : my $toplevel_flag = $mapped_slice->is_toplevel;
504  Description : Returns weather the container slice is toplevel.
505  Return type : Int
506  Exceptions : none
507  Caller : general
508  Status : At Risk
509  : under development
510 
511 =cut
512 
513 sub is_toplevel {
514  my $self = shift;
515  return $self->container->container_slice->is_toplevel;
516 }
517 
518 
519 =head2 seq
520 
521  Example : my $seq = $mapped_slice->seq()
522  Description : Retrieves the expanded sequence of this mapped slice,
523  including "-" characters where there are inserts in any other
524  mapped slices. This will align with the sequence returned by
525  the container's seq() method.
526  Return type : String
527  Exceptions : none
528  Caller : general
529  Status : At Risk
530  : under development
531 
532 =cut
533 
534 sub seq {
535  my $self = shift;
536 
537  # create an empty string
538  my $ms_seq = '';
539 
540  # this coord represents the current position in the MS sequence
541  my $start = 0;
542 
543  # get slice/mapper pairs from mapped slice (usually only one anyway)
544  foreach my $pair(@{$self->get_all_Slice_Mapper_pairs()}) {
545  my ($s, $m) = @$pair;
546 
547  # make sure to send extra args
548  # eg strain slices might need read coverage filtering
549  my $seq = $s->seq(@_);
550 
551  # project from mapped slice to reference slice using the mapper
552  foreach my $ref_coord($m->map_coordinates('mapped_slice', 1, CORE::length($seq), $s->strand, 'mapped_slice')) {
553 
554  # normal coord
555  if(!$ref_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ref_coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
556 
557  # project from reference slice to container slice using the container's mapper
558  foreach my $ms_coord($self->container->mapper->map_coordinates($self->container->ref_slice->seq_region_name, $ref_coord->start, $ref_coord->end, $ref_coord->strand, 'ref_slice')) {
559 
560  # normal coord
561  if(!$ms_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate') && !$ms_coord->isa('Bio::EnsEMBL::Mapper::Gap')) {
562  $ms_seq .= substr($seq, $start, $ms_coord->length);
563 
564  $start += $ms_coord->length();
565  }
566 
567  # indel coord
568  else {
569  $ms_seq .= '-' x $ms_coord->length();
570  }
571  }
572  }
573 
574  # indel / gap
575  else {
576  if($ref_coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate')) {
577  if($ref_coord->gap_length > 0) {
578  $ms_seq .= substr($seq, $start, $ref_coord->gap_length);
579  $start += $ref_coord->gap_length;
580  }
581  $ms_seq .= '-' x ($ref_coord->length() - $ref_coord->gap_length());
582  } else {
583  $ms_seq .= '-' x $ref_coord->length();
584  }
585  }
586  }
587  }
588 
589  return $ms_seq;
590 }
591 
592 sub subseq {
593 }
594 
595 sub get_repeatmasked_seq {
596 }
597 
598 sub sub_MappedSlice {
599 }
600 
601 sub project {
602 }
603 
604 
605 =head2 AUTOLOAD
606 
607  Arg[1..N] : Arguments passed on to the calls on the underlying slices.
608  Example : my @simple_features = @{ $mapped_slice->get_all_SimpleFeatures };
609  Description : Aggregate data gathered from composing Slices.
610  This will call Slice->get_all_* and combine the results.
611  Coordinates will be transformed to be on the container slice
612  coordinate system.
613 
614  Calls involving DAS features are skipped since the DAS adaptor
615  handles coordinate conversions natively.
616  Return type : listref of features (same type as corresponding Slice method)
617  Exceptions : none
618  Caller : general
619  Status : At Risk
620  : under development
621 
622 =cut
623 
624 sub AUTOLOAD {
625  my $self = shift;
626 
627  my $method = $AUTOLOAD;
628  $method =~ s/.*:://;
629 
630  # AUTOLOAD should only deal with get_all_* methods
631  return unless ($method =~ /^get_all_/);
632 
633  # skip DAS methods
634  return if ($method =~ /DAS/);
635 
636  my @mapped_features = ();
637 
638  foreach my $pair (@{ $self->get_all_Slice_Mapper_pairs }) {
639  my ($slice, $mapper) = @$pair;
640  #warn $slice->name;
641 
642  # call $method on each native slice composing the MappedSlice
643  my @features = @{ $slice->$method(@_) };
644 
645  # map features onto the artificial container coordinate system
646  foreach my $f (@features) {
647 
648  my @coords = $mapper->map_coordinates(
649  $f->slice->seq_region_name,
650  $f->start,
651  $f->end,
652  $f->strand,
653  'mapped_slice'
654  );
655 
656  # sanity check
657  if (scalar(@coords) > 1) {
658  warning("Got more than one Coordinate returned, expected only one!\n");
659  }
660 
661  $f->start($coords[0]->start);
662  $f->end($coords[0]->end);
663  $f->strand($coords[0]->strand);
664  $f->slice($self->container->container_slice);
665 
666  push @mapped_features, $f;
667  }
668 
669  }
670 
671  return \@mapped_features;
672 }
673 
674 
675 1;
676 
map
public map()
Bio::EnsEMBL::MappedSliceContainer::get_AssemblySliceAdaptor
public Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor get_AssemblySliceAdaptor()
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio::EnsEMBL::MappedSlice
Definition: MappedSlice.pm:75
Bio::EnsEMBL::Slice::name
public String name()
Bio::EnsEMBL::MappedSliceContainer
Definition: MappedSliceContainer.pm:62
Bio::EnsEMBL::MappedSliceContainer::new
public Bio::EnsEMBL::MappedSliceContainer new()
Bio::EnsEMBL::MappedSliceContainer::ref_slice
public Bio::EnsEMBL::Slice ref_slice()
Bio::EnsEMBL::Mapper
Definition: Coordinate.pm:3