ensembl-hive  2.8.1
MappedSliceContainer.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::MappedSliceContainer - container for mapped slices
34 
35 =head1 SYNOPSIS
36 
37  # get a reference slice
38  my $slice =
39  $slice_adaptor->fetch_by_region( 'chromosome', 14, 900000, 950000 );
40 
41  # create MappedSliceContainer based on the reference slice
42  my $msc = Bio::EnsEMBL::MappedSliceContainer->new( -SLICE => $slice );
43 
44  # set the adaptor for fetching AssemblySlices
45  my $asa = $slice->adaptor->db->get_AssemblySliceAdaptor;
46  $msc->set_AssemblySliceAdaptor($asa);
47 
48  # add an AssemblySlice to your MappedSliceContainer
49  $msc->attach_AssemblySlice('NCBIM36');
50 
51  foreach my $mapped_slice ( @{ $msc->get_all_MappedSlices } ) {
52  print $mapped_slice->name, "\n";
53 
54  foreach my $sf ( @{ $mapped_slice->get_all_SimpleFeatures } ) {
55  print " ", &to_string($sf), "\n";
56  }
57  }
58 
59 =head1 DESCRIPTION
60 
61 NOTE: this code is under development and not fully functional nor tested
62 yet. Use only for development.
63 
64 A MappedSliceContainer holds a collection of one or more
65 Bio::EnsEMBL::MappedSlices. It is based on a real reference slice and
66 contains an artificial "container slice" which defines the common
67 coordinate system used by all attached MappedSlices. There is also a
68 mapper to convert coordinates between the reference and the container
69 slice.
70 
71 Attaching MappedSlices to the container is delegated to adaptors
72 (which act more as object factories than as traditional Ensembl db
73 adaptors). The adaptors will also modify the container slice and
74 associated mapper if required. This design allows us to keep the
75 MappedSliceContainer generic and encapsulate the data source specific
76 code in the adaptor/factory module.
77 
78 In the simplest use case, all required MappedSlices are attached to the
79 MappedSliceContainer at once (by a single call to the adaptor). This
80 object should also allow "hot-plugging" of MappedSlices (e.g. attach a
81 MappedSlice representing a strain to a container that already contains a
82 multi-species alignment). The methods for attaching new MappedSlice will
83 be responsable to perform the necessary adjustments to coordinates and
84 mapper on the existing MappedSlices.
85 
86 =head1 METHODS
87 
88  new
89  set_adaptor
91  set_AssemblySliceAdaptor
92  get_AssemblySliceAdaptor
93  set_AlignSliceAdaptor (not implemented yet)
94  get_AlignSliceAdaptor (not implemented yet)
95  set_StrainSliceAdaptor (not implemented yet)
96  get_StrainSliceAdaptor (not implemented yet)
97  attach_AssemblySlice
98  attach_AlignSlice (not implemented yet)
99  attach_StrainSlice (not implemented yet)
100  get_all_MappedSlices
101  sub_MappedSliceContainer (not implemented yet)
102  ref_slice
103  container_slice
104  mapper
105  expanded
106 
107 =head1 RELATED MODULES
108 
111  Bio::EnsEMBL::Compara::AlignSlice
112  Bio::EnsEMBL::Compara::AlignSlice::Slice
113  Bio::EnsEMBL::StrainSlice
114 
115 =cut
116 
117 package Bio::EnsEMBL::MappedSliceContainer;
118 
119 use strict;
120 use warnings;
121 no warnings 'uninitialized';
122 
123 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
124 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
128 
129 
130 # define avalable adaptormajs to use with this container
131 my %adaptors = map { $_ => 1 } qw(assembly align strain);
132 
133 
134 =head2 new
135 
136  Arg [SLICE] : Bio::EnsEMBL::Slice $slice - the reference slice for this
137  container
138  Arg [EXPANDED] : (optional) Boolean $expanded - set expanded mode (default:
139  collapsed)
140  Example : my $slice = $slice_adaptor->fetch_by_region('chromosome', 1,
141  9000000, 9500000);
143  -SLICE => $slice,
144  -EXPANDED => 1,
145  );
146  Description : Constructor. See the general documentation of this module for
147  details about this object. Note that the constructor creates an
148  empty container, so you'll have to attach MappedSlices to it to
149  be useful (this is usually done by an adaptor/factory).
150  Return type : Bio::EnsEMBL::MappedSliceContainer
151  Exceptions : thrown on wrong or missing argument
152  Caller : general
153  Status : At Risk
154  : under development
155 
156 =cut
157 
158 sub new {
159  my $caller = shift;
160  my $class = ref($caller) || $caller;
161 
162  my ($ref_slice, $expanded) = rearrange([qw(SLICE EXPANDED)], @_);
163 
164  # argument check
165  unless ($ref_slice and ref($ref_slice) and
166  ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
167  throw("You must provide a reference slice.");
168  }
169 
170  my $self = {};
171  bless ($self, $class);
172 
173  # initialise object
174  $self->{'ref_slice'} = $ref_slice;
175  $self->{'expanded'} = $expanded || 0;
176 
177  $self->{'mapped_slices'} = [];
178 
179  # create the container slice
180  $self->_create_container_slice($ref_slice);
181 
182  return $self;
183 }
184 
185 
186 #
187 # Create an artificial slice which represents the common coordinate system used
188 # for this MappedSliceContainer
189 #
190 sub _create_container_slice {
191  my $self = shift;
192  my $ref_slice = shift;
193 
194  # argument check
195  unless ($ref_slice and ref($ref_slice) and
196  ($ref_slice->isa('Bio::EnsEMBL::Slice') or $ref_slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
197  throw("You must provide a reference slice.");
198  }
199 
200  # create an artificial coordinate system for the container slice
201  my $cs = Bio::EnsEMBL::CoordSystem->new(
202  -NAME => 'container',
203  -RANK => 1,
204  );
205 
206  # Create a new artificial slice spanning your container. Initially this will
207  # simply span your reference slice
208  my $container_slice = Bio::EnsEMBL::Slice->new(
209  -COORD_SYSTEM => $cs,
210  -START => 1,
211  -END => $ref_slice->length,
212  -STRAND => 1,
213  -SEQ_REGION_NAME => 'container',
214  );
215 
216  $self->{'container_slice'} = $container_slice;
217 
218  # Create an Mapper to map to/from the reference slice to the container coord
219  # system.
220  my $mapper = Bio::EnsEMBL::Mapper->new('ref_slice', 'container');
221 
222  $mapper->add_map_coordinates(
223  $ref_slice->seq_region_name,
224  $ref_slice->start,
225  $ref_slice->end,
226  1,
227  $container_slice->seq_region_name,
228  $container_slice->start,
229  $container_slice->end,
230  );
231 
232  $self->{'mapper'} = $mapper;
233 }
234 
235 
236 =head2 set_adaptor
237 
238  Arg[1] : String $type - the type of adaptor to set
239  Arg[2] : Adaptor $adaptor - the adaptor to set
240  Example : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new;
241  $msc->set_adaptor('assembly', $adaptor);
242  Description : Parameterisable wrapper for all methods that set adaptors (see
243  below).
244  Return type : same as Arg 2
245  Exceptions : thrown on missing type
246  Caller : general
247  Status : At Risk
248  : under development
249 
250 =cut
251 
252 sub set_adaptor {
253  my $self = shift;
254  my $type = shift;
255  my $adaptor = shift;
256 
257  # argument check
258  unless ($type and $adaptors{$type}) {
259  throw("Missing or unknown adaptor type.");
260  }
261 
262  $type = ucfirst($type);
263  my $method = "set_${type}SliceAdaptor";
264 
265  return $self->$method($adaptor);
266 }
267 
268 
269 =head2 get_adaptor
270 
271  Arg[1] : String $type - the type of adaptor to get
272  Example : my $assembly_slice_adaptor = $msc->get_adaptor('assembly');
273  Description : Parameterisable wrapper for all methods that get adaptors (see
274  below).
275  Return type : An adaptor for the requested type of MappedSlice.
276  Exceptions : thrown on missing type
277  Caller : general
278  Status : At Risk
279  : under development
280 
281 =cut
282 
283 sub get_adaptor {
284  my $self = shift;
285  my $type = shift;
286 
287  # argument check
288  unless ($type and $adaptors{$type}) {
289  throw("Missing or unknown adaptor type.");
290  }
291 
292  $type = ucfirst($type);
293  my $method = "get_${type}SliceAdaptor";
294 
295  return $self->$method;
296 }
297 
298 
299 =head2 set_AssemblySliceAdaptor
300 
301  Arg[1] : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor - the adaptor to set
302  Example : my $adaptor = Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor->new;
303  $msc->set_AssemblySliceAdaptor($adaptor);
304  Description : Sets an AssemblySliceAdaptor for this container. The adaptor can
305  be used to attach MappedSlice for alternative assemblies.
306  Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
307  Exceptions : thrown on wrong or missing argument
308  Caller : general, $self->get_adaptor
309  Status : At Risk
310  : under development
311 
312 =cut
313 
314 sub set_AssemblySliceAdaptor {
315  my $self = shift;
316  my $assembly_slice_adaptor = shift;
317 
318  unless ($assembly_slice_adaptor and ref($assembly_slice_adaptor) and
319  $assembly_slice_adaptor->isa('Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor')) {
320  throw("Need a Bio::EnsEMBL::AssemblySliceAdaptor.");
321  }
322 
323  $self->{'adaptors'}->{'AssemblySlice'} = $assembly_slice_adaptor;
324 }
325 
326 
327 =head2 get_AssemblySliceAdaptor
328 
329  Example : my $assembly_slice_adaptor = $msc->get_AssemblySliceAdaptor;
330  Description : Gets a AssemblySliceAdaptor from this container. The adaptor can
331  be used to attach MappedSlice for alternative assemblies.
332  Return type : Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
333  Exceptions : thrown on wrong or missing argument
334  Caller : general, $self->get_adaptor
335  Status : At Risk
336  : under development
337 
338 =cut
339 
340 sub get_AssemblySliceAdaptor {
341  my $self = shift;
342 
343  unless ($self->{'adaptors'}->{'AssemblySlice'}) {
344  warning("No AssemblySliceAdaptor attached to MappedSliceContainer.");
345  }
346 
347  return $self->{'adaptors'}->{'AssemblySlice'};
348 }
349 
350 
351 # [todo]
352 sub set_AlignSliceAdaptor {
353  throw("Not implemented yet!");
354 }
355 
356 
357 # [todo]
358 sub get_AlignSliceAdaptor {
359  throw("Not implemented yet!");
360 }
361 
362 
363 # [todo]
364 sub set_StrainSliceAdaptor {
365  my $self = shift;
366  my $strain_slice_adaptor = shift;
367 
368  unless ($strain_slice_adaptor and ref($strain_slice_adaptor) and
369  $strain_slice_adaptor->isa('Bio::EnsEMBL::Variation::DBSQL::StrainSliceAdaptor')) {
370  throw("Need a Bio::EnsEMBL::Variation::DBSQL::StrainSliceAdaptor.");
371  }
372 
373  $self->{'adaptors'}->{'StrainSlice'} = $strain_slice_adaptor;
374 }
375 
376 
377 # [todo]
378 sub get_StrainSliceAdaptor {
379  my $self = shift;
380 
381  unless ($self->{'adaptors'}->{'StrainSlice'}) {
382  warning("No StrainSliceAdaptor attached to MappedSliceContainer.");
383  }
384 
385  return $self->{'adaptors'}->{'StrainSlice'};
386 }
387 
388 
389 =head2 attach_AssemblySlice
390 
391  Arg[1] : String $version - assembly version to attach
392  Example : $msc->attach_AssemblySlice('NCBIM36');
393  Description : Attaches a MappedSlice for an alternative assembly to this
394  container.
395  Return type : none
396  Exceptions : thrown on missing argument
397  Caller : general, Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
398  Status : At Risk
399  : under development
400 
401 =cut
402 
403 sub attach_AssemblySlice {
404  my $self = shift;
405  my $version = shift;
406 
407  throw("Need a version.") unless ($version);
408 
409  my $asa = $self->get_AssemblySliceAdaptor;
410  return unless ($asa);
411 
412  my @mapped_slices = @{ $asa->fetch_by_version($self, $version) };
413 
414  push @{ $self->{'mapped_slices'} }, @mapped_slices;
415 }
416 
417 
418 =head2 attach_StrainSlice
419 
420  Arg[1] : String $strain - name of strain to attach
421  Example : $msc->attach_StrainSlice('Watson');
422  Description : Attaches a MappedSlice for an alternative strain to this
423  container.
424  Return type : none
425  Exceptions : thrown on missing argument
426  Caller : general, Bio::EnsEMBL::DBSQL::StrainSliceAdaptor
427  Status : At Risk
428  : under development
429 
430 =cut
431 
432 sub attach_StrainSlice {
433  my $self = shift;
434  my $strain = shift;
435 
436  throw("Need a strain.") unless ($strain);
437 
438  my $ssa = $self->get_StrainSliceAdaptor;
439  return unless ($ssa);
440 
441  my @mapped_slices = @{ $ssa->fetch_by_name($self, $strain) };
442 
443  push @{ $self->{'mapped_slices'} }, @mapped_slices;
444 }
445 
446 
447 
448 =head2 get_all_MappedSlices
449 
450  Example : foreach my $mapped_slice (@{ $msc->get_all_MappedSlices }) {
451  print $mapped_slice->name, "\n";
452  }
453  Description : Returns all MappedSlices attached to this container.
454  Return type : listref of Bio::EnsEMBL::MappedSlice
455  Exceptions : none
456  Caller : general
457  Status : At Risk
458  : under development
459 
460 =cut
461 
462 sub get_all_MappedSlices {
463  my $self = shift;
464  return $self->{'mapped_slices'};
465 }
466 
467 
468 # [todo]
469 sub sub_MappedSliceContainer {
470  throw("Not implemented yet!");
471 }
472 
473 
474 =head2 ref_slice
475 
476  Arg[1] : (optional) Bio::EnsEMBL::Slice - the reference slice to set
477  Example : my $ref_slice = $mapped_slice_container->ref_slice;
478  print "This MappedSliceContainer is based on the reference
479  slice ", $ref_slice->name, "\n";
480  Description : Getter/setter for the reference slice.
481  Return type : Bio::EnsEMBL::Slice
482  Exceptions : thrown on wrong argument type
483  Caller : general
484  Status : At Risk
485  : under development
486 
487 =cut
488 
489 sub ref_slice {
490  my $self = shift;
491 
492  if (@_) {
493  my $slice = shift;
494 
495  unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
496  throw("Need a Bio::EnsEMBL::Slice.");
497  }
498 
499  $self->{'ref_slice'} = $slice;
500  }
501 
502  return $self->{'ref_slice'};
503 }
504 
505 
506 =head2 container_slice
507 
508  Arg[1] : (optional) Bio::EnsEMBL::Slice - the container slice to set
509  Example : my $container_slice = $mapped_slice_container->container_slice;
510  print "The common slice used by this MappedSliceContainer is ",
511  $container_slice->name, "\n";
512  Description : Getter/setter for the container slice. This is an artificial
513  slice which defines the common coordinate system used by the
514  MappedSlices attached to this container.
515  Return type : Bio::EnsEMBL::Slice
516  Exceptions : thrown on wrong argument type
517  Caller : general
518  Status : At Risk
519  : under development
520 
521 =cut
522 
523 sub container_slice {
524  my $self = shift;
525 
526  if (@_) {
527  my $slice = shift;
528 
529  unless (ref($slice) and ($slice->isa('Bio::EnsEMBL::Slice') or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
530  throw("Need a Bio::EnsEMBL::Slice.");
531  }
532 
533  $self->{'container_slice'} = $slice;
534  }
535 
536  return $self->{'container_slice'};
537 }
538 
539 
540 =head2 mapper
541 
542  Arg[1] : (optional) Bio::EnsEMBL::Mapper - the mapper to set
543  Example : my $mapper = Bio::EnsEMBL::Mapper->new('ref', 'mapped');
544  $mapped_slice_container->mapper($mapper);
545  Description : Getter/setter for the mapper to map between reference slice and
546  the artificial container coord system.
547  Return type : Bio::EnsEMBL::Mapper
548  Exceptions : thrown on wrong argument type
549  Caller : internal, Bio::EnsEMBL::MappedSlice->AUTOLOAD
550  Status : At Risk
551  : under development
552 
553 =cut
554 
555 sub mapper {
556  my $self = shift;
557 
558  if (@_) {
559  my $mapper = shift;
560 
561  unless (ref($mapper) and $mapper->isa('Bio::EnsEMBL::Mapper')) {
562  throw("Need a Bio::EnsEMBL::Mapper.");
563  }
564 
565  $self->{'mapper'} = $mapper;
566  }
567 
568  return $self->{'mapper'};
569 }
570 
571 
572 =head2 expanded
573 
574  Arg[1] : (optional) Boolean - expanded mode to set
575  Example : if ($mapped_slice_container->expanded) {
576  # do more elaborate mapping than in collapsed mode
577  [...]
578  }
579  Description : Getter/setter for expanded mode.
580 
581  By default, MappedSliceContainer use collapsed mode, which
582  means that no inserts in the reference sequence are allowed
583  when constructing the MappedSlices. in this mode, the
584  mapped_slice artificial coord system will be identical with the
585  ref_slice coord system.
586 
587  By setting expanded mode, you allow inserts in the reference
588  sequence.
589  Return type : Boolean
590  Exceptions : none
591  Caller : general
592  Status : At Risk
593  : under development
594 
595 =cut
596 
597 sub expanded {
598  my $self = shift;
599  $self->{'expanded'} = shift if (@_);
600  return $self->{'expanded'};
601 }
602 
603 =head2 seq
604 
605  Example : my $seq = $container->seq()
606  Description : Retrieves the expanded sequence of the artificial container
607  slice, including "-" characters where there are inserts in any
608  of the attached mapped slices.
609  Return type : String
610  Exceptions : none
611  Caller : general
612  Status : At Risk
613  : under development
614 
615 =cut
616 
617 sub seq {
618  my $self = shift;
619 
620  my $container_seq = '';
621 
622  # check there's a mapper
623  if(defined($self->mapper)) {
624  my $start = 0;
625  my $slice = $self->ref_slice();
626  my $seq = $slice->seq();
627 
628  foreach my $coord($self->mapper->map_coordinates($slice->seq_region_name, $slice->start, $slice->end, $slice->strand, 'ref_slice')) {
629  # if it is a normal coordinate insert sequence
630  if(!$coord->isa('Bio::EnsEMBL::Mapper::IndelCoordinate')) {
631  $container_seq .= substr($seq, $start, $coord->length());
632  $start += $coord->length;
633  }
634 
635  # if it is a gap or indel insert "-"
636  else {
637  $container_seq .= '-' x $coord->length();
638  }
639  }
640  }
641 
642  return $container_seq;
643 }
644 
645 
646 1;
647 
map
public map()
Bio::EnsEMBL::MappedSliceContainer::get_AssemblySliceAdaptor
public Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor get_AssemblySliceAdaptor()
Bio::EnsEMBL::CoordSystem
Definition: CoordSystem.pm:40
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio::EnsEMBL::MappedSlice
Definition: MappedSlice.pm:75
Bio::EnsEMBL::LRGSlice
Definition: LRGSlice.pm:37
about
public about()
Bio::EnsEMBL::MappedSliceContainer
Definition: MappedSliceContainer.pm:62
Bio::EnsEMBL::DBSQL::AssemblySliceAdaptor
Definition: AssemblySliceAdaptor.pm:33
Bio::EnsEMBL::MappedSliceContainer::new
public Bio::EnsEMBL::MappedSliceContainer new()
get_adaptor
public get_adaptor()
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68
Bio::EnsEMBL::Mapper
Definition: Coordinate.pm:3