ensembl-hive  2.7.0
Slice.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::Slice - Arbitary Slice of a genome
34 
35 =head1 SYNOPSIS
36 
37  $sa = $db->get_SliceAdaptor;
38 
39  $slice =
40  $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
41 
42  # get some attributes of the slice
43  my $seqname = $slice->seq_region_name();
44  my $start = $slice->start();
45  my $end = $slice->end();
46 
47  # get the sequence from the slice
48  my $seq = $slice->seq();
49 
50  # get some features from the slice
51  foreach $gene ( @{ $slice->get_all_Genes } ) {
52  # do something with a gene
53  }
54 
55  foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
56  # do something with dna-dna alignments
57  }
58 
59 =head1 DESCRIPTION
60 
61 A Slice object represents a region of a genome. It can be used to retrieve
62 sequence or features from an area of interest.
63 
64 NOTE: The Slice is defined by its Strand, but normal behaviour for get_all_*
65 methods is to return Features on both Strands.
66 
67 =head1 METHODS
68 
69 =cut
70 
71 package Bio::EnsEMBL::Slice;
72 use vars qw(@ISA);
73 use strict;
74 
75 use Bio::PrimarySeqI;
76 
77 
78 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
79 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
81 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
86 
89 use Scalar::Util qw(weaken isweak);
90 
91 # use Data::Dumper;
92 
93 my $registry = "Bio::EnsEMBL::Registry";
94 
95 @ISA = qw(Bio::PrimarySeqI);
96 
97 
98 =head2 new
99 
100  Arg [...] : List of named arguments
101  Bio::EnsEMBL::CoordSystem COORD_SYSTEM
102  string SEQ_REGION_NAME,
103  int START,
104  int END,
105  int SEQ_REGION_LENGTH, (optional)
106  string SEQ (optional)
107  int STRAND, (optional, defaults to 1)
108  Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional)
109  Example : $slice = Bio::EnsEMBL::Slice->new(-coord_system => $cs,
110  -start => 1,
111  -end => 10000,
112  -strand => 1,
113  -seq_region_name => 'X',
114  -seq_region_length => 12e6,
115  -adaptor => $slice_adaptor);
116  Description: Creates a new slice object. A slice represents a region
117  of sequence in a particular coordinate system. Slices can be
118  used to retrieve sequence and features from an area of
119  interest in a genome.
120 
121  Coordinates start at 1 and are inclusive. Negative
122  coordinates or coordinates exceeding the length of the
123  seq_region are permitted. Start must be less than or equal.
124  to end regardless of the strand.
125 
126  Slice objects are immutable. Once instantiated their attributes
127  (with the exception of the adaptor) may not be altered. To
128  change the attributes a new slice must be created.
129  Returntype : Bio::EnsEMBL::Slice
130  Exceptions : throws if start, end, coordsystem or seq_region_name not specified or not of the correct type
131  Caller : general, Bio::EnsEMBL::SliceAdaptor
132  Status : Stable
133 
134 =cut
135 
136 sub new {
137  my $caller = shift;
138 
139  #new can be called as a class or object method
140  my $class = ref($caller) || $caller;
141 
142  my ($seq, $coord_system, $seq_region_name, $seq_region_length,
143  $start, $end, $strand, $adaptor, $empty) =
144  rearrange([qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
145  START END STRAND ADAPTOR EMPTY)], @_);
146 
147  if ( !defined($seq_region_name) ) {
148  throw('SEQ_REGION_NAME argument is required');
149  }
150  if ( !defined($start) ) { throw('START argument is required') }
151  if ( !defined($end) ) { throw('END argument is required') }
152 
153  ## if ( $start > $end + 1 ) {
154  ## throw('start must be less than or equal to end+1');
155  ## }
156 
157  if ( !defined($seq_region_length) ) { $seq_region_length = $end }
158 
159  if ( $seq_region_length <= 0 ) {
160  throw('SEQ_REGION_LENGTH must be > 0');
161  }
162 
163  if ( defined($seq) && CORE::length($seq) != ( $end - $start + 1 ) ) {
164  throw('SEQ must be the same length as the defined LENGTH not '
165  . CORE::length($seq)
166  . ' compared to '
167  . ( $end - $start + 1 ) );
168  }
169 
170  if(defined($coord_system)) {
171  if(!ref($coord_system) || !$coord_system->isa('Bio::EnsEMBL::CoordSystem')){
172  throw('COORD_SYSTEM argument must be a Bio::EnsEMBL::CoordSystem');
173  }
174  if($coord_system->is_top_level()) {
175  throw('Cannot create slice on toplevel CoordSystem.');
176  }
177  } else {
178  warning("Slice without coordinate system");
179  }
180 
181  $strand ||= 1;
182 
183  if($strand != 1 && $strand != -1) {
184  throw('STRAND argument must be -1 or 1');
185  }
186 
187  if(defined($adaptor)) {
188  if(!ref($adaptor) || !$adaptor->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
189  throw('ADAPTOR argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
190  }
191  }
192 
193  my $self = bless {'coord_system' => $coord_system,
194  'seq' => $seq,
195  'seq_region_name' => $seq_region_name,
196  'seq_region_length' => $seq_region_length,
197  'start' => int($start),
198  'end' => int($end),
199  'strand' => $strand}, $class;
200 
201  $self->adaptor($adaptor);
202 
203  return $self;
204 
205 }
206 
207 =head2 new_fast
208 
209  Arg [1] : hashref to be blessed
210  Description: Construct a new Bio::EnsEMBL::Slice using the hashref.
211  Exceptions : none
212  Returntype : Bio::EnsEMBL::Slice
213  Caller : general
214  Status : Stable
215 
216 =cut
217 
218 
219 sub new_fast {
220  my $class = shift;
221  my $hashref = shift;
222  my $self = bless $hashref, $class;
223  weaken($self->{adaptor}) if ( ! isweak($self->{adaptor}) );
224  return $self;
225 }
226 
227 =head2 adaptor
228 
229  Arg [1] : (optional) Bio::EnsEMBL::DBSQL::SliceAdaptor $adaptor
230  Example : $adaptor = $slice->adaptor();
231  Description: Getter/Setter for the slice object adaptor used
232  by this slice for database interaction.
234  Exceptions : thorws if argument passed is not a SliceAdaptor
235  Caller : general
236  Status : Stable
237 
238 =cut
239 
240 sub adaptor{
241  my $self = shift;
242 
243  if(@_) {
244  my $ad = shift;
245  if(defined($ad)) {
246  if(!ref($ad) || !$ad->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
247  throw('Argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
248  }
249  }
250  weaken($self->{'adaptor'} = $ad);
251  }
252 
253  return $self->{'adaptor'};
254 }
255 
256 
257 
258 =head2 seq_region_name
259 
260  Arg [1] : none
261  Example : $seq_region = $slice->seq_region_name();
262  Description: Returns the name of the seq_region that this slice is on. For
263  example if this slice is in chromosomal coordinates the
264  seq_region_name might be 'X' or '10'.
265 
266  This function was formerly named chr_name, but since slices can
267  now be on coordinate systems other than chromosomal it has been
268  changed.
269  Returntype : string
270  Exceptions : none
271  Caller : general
272  Status : Stable
273 
274 =cut
275 
276 sub seq_region_name {
277  my $self = shift;
278  return $self->{'seq_region_name'};
279 }
280 
281 =head2 seq_region_start
282 
283  Example : $seq_region_start = $slice->seq_region_start();
284  Description: Returns the start position of this slice relative to the
285  start of the sequence region that it was created on.
286  Since slices are always in genomic coordinates this is
287  an alias to start()
288  Returntype : int
289  Exceptions : none
290  Caller : general
291  Status : Stable
292 
293 =cut
294 
295 sub seq_region_start {
296  my $self = shift;
297  return $self->start();
298 }
299 
300 =head2 seq_region_end
301 
302  Example : $seq_region_end = $slice->seq_region_end();
303  Description: Returns the end position of this slice relative to the
304  start of the sequence region that it was created on.
305  Since slices are always in genomic coordinates this is
306  an alias to end()
307  Returntype : int
308  Exceptions : none
309  Caller : general
310  Status : Stable
311 
312 =cut
313 
314 sub seq_region_end {
315  my $self = shift;
316  return $self->end();
317 }
318 
319 =head2 seq_region_length
320 
321  Arg [1] : none
322  Example : $seq_region_length = $slice->seq_region_length();
323  Description: Returns the length of the entire seq_region that this slice is
324  on. For example if this slice is on a chromosome this will be
325  the length (in basepairs) of the entire chromosome.
326  Returntype : int
327  Exceptions : none
328  Caller : general
329  Status : Stable
330 
331 =cut
332 
333 sub seq_region_length {
334  my $self = shift;
335  return $self->{'seq_region_length'};
336 }
337 
338 
339 =head2 coord_system
340 
341  Arg [1] : none
342  Example : print $slice->coord_system->name();
343  Description: Returns the coordinate system that this slice is on.
344  Returntype : Bio::EnsEMBL::CoordSystem
345  Exceptions : none
346  Caller : general
347  Status : Stable
348 
349 =cut
350 
351 sub coord_system {
352  my $self = shift;
353  return $self->{'coord_system'};
354 }
355 
356 =head2 source
357 
358  Arg [1] : (optional) String $value
359  Example : print $slice->source();
360  Description: Returns the source this slice is coming from
361  Returntype : string
362  Exceptions : none
363  Caller : general
364  Status : Stable
365 
366 =cut
367 
368 sub source {
369  my $self = shift;
370  $self->{'source'} = shift if (@_);
371  return $self->{'source'};
372 }
373 
374 =head2 coord_system_name
375 
376  Arg [1] : none
377  Example : print $slice->coord_system_name()
378  Description: Convenience method. Gets the name of the coord_system which
379  this slice is on.
380  Returns undef if this Slice does not have an attached
381  CoordSystem.
382  Returntype: string or undef
383  Exceptions: none
384  Caller : general
385  Status : Stable
386 
387 =cut
388 
389 sub coord_system_name {
390  my $self = shift;
391  my $csystem = $self->{'coord_system'};
392  return ($csystem) ? $csystem->name() : undef;
393 }
394 
395 
396 =head2 centrepoint
397 
398  Arg [1] : none
399  Example : $cp = $slice->centrepoint();
400  Description: Returns the mid position of this slice relative to the
401  start of the sequence region that it was created on.
402  Coordinates are inclusive and start at 1.
403  Returntype : int
404  Exceptions : none
405  Caller : general
406  Status : Stable
407 
408 =cut
409 
410 sub centrepoint {
411  my $self = shift;
412  return ($self->{'start'}+$self->{'end'})/2;
413 }
414 
415 =head2 start
416 
417  Arg [1] : none
418  Example : $start = $slice->start();
419  Description: Returns the start position of this slice relative to the
420  start of the sequence region that it was created on.
421  Coordinates are inclusive and start at 1. Negative coordinates
422  or coordinates exceeding the length of the sequence region are
423  permitted. Start is always less than or equal to end
424  regardless of the orientation of the slice.
425  Returntype : int
426  Exceptions : none
427  Caller : general
428  Status : Stable
429 
430 =cut
431 
432 sub start {
433  my $self = shift;
434  return $self->{'start'};
435 }
436 
437 
438 
439 =head2 end
440 
441  Arg [1] : none
442  Example : $end = $slice->end();
443  Description: Returns the end position of this slice relative to the
444  start of the sequence region that it was created on.
445  Coordinates are inclusive and start at 1. Negative coordinates
446  or coordinates exceeding the length of the sequence region are
447  permitted. End is always greater than or equal to start
448  regardless of the orientation of the slice.
449  Returntype : int
450  Exceptions : none
451  Caller : general
452  Status : Stable
453 
454 =cut
455 
456 sub end {
457  my $self = shift;
458  return $self->{'end'};
459 }
460 
461 
462 
463 =head2 strand
464 
465  Arg [1] : none
466  Example : $strand = $slice->strand();
467  Description: Returns the orientation of this slice on the seq_region it has
468  been created on
469  Returntype : int (either 1 or -1)
470  Exceptions : none
471  Caller : general, invert
472  Status : Stable
473 
474 =cut
475 
476 sub strand{
477  my $self = shift;
478  return $self->{'strand'};
479 }
480 
481 
482 
483 
484 
485 =head2 name
486 
487  Arg [1] : none
488  Example : my $results = $cache{$slice->name()};
489  Description: Returns the name of this slice. The name is formatted as a colon
490  delimited string with the following attributes:
491  coord_system:version:seq_region_name:start:end:strand
492 
493  Slices with the same name are equivalent and thus the name can
494  act as a hash key.
495  Returntype : string
496  Exceptions : none
497  Caller : general
498  Status : Stable
499 
500 =cut
501 
502 sub name {
503  my $self = shift;
504 
505  my $cs = $self->{'coord_system'};
506 
507  return join(':',
508  ($cs) ? $cs->name() : '',
509  ($cs) ? $cs->version() : '',
510  $self->{'seq_region_name'},
511  $self->{'start'},
512  $self->{'end'},
513  $self->{'strand'});
514 }
515 
516 
517 
518 =head2 length
519 
520  Arg [1] : none
521  Example : $length = $slice->length();
522  Description: Returns the length of this slice in basepairs
523  Returntype : int
524  Exceptions : none
525  Caller : general
526  Status : Stable
527 
528 =cut
529 
530 sub length {
531  my ($self) = @_;
532 
533  my $length = $self->{'end'} - $self->{'start'} + 1;
534 
535  if ( $self->{'start'} > $self->{'end'} && $self->is_circular() ) {
536  $length += $self->{'seq_region_length'};
537  }
538 
539  return $length;
540 }
541 
542 =head2 is_reference
543  Arg : none
544  Example : my $reference = $slice->is_reference()
545  Description: Returns 1 if slice is a reference slice else 0
546  Returntype : int
547  Caller : general
548  Status : At Risk
549 
550 =cut
551 
552 sub is_reference {
553  my ($self) = @_;
554 
555  if ( !defined( $self->{'is_reference'} ) ) {
556  $self->{'is_reference'} =
557  $self->adaptor()->is_reference( $self->get_seq_region_id() );
558  }
559 
560  return $self->{'is_reference'};
561 }
562 
563 =head2 is_toplevel
564  Arg : none
565  Example : my $top = $slice->is_toplevel()
566  Description: Returns 1 if slice is a toplevel slice else 0
567  Returntype : int
568  Caller : general
569  Status : At Risk
570 
571 =cut
572 
573 sub is_toplevel {
574  my ($self) = @_;
575 
576  if ( !defined( $self->{'toplevel'} ) ) {
577  $self->{'toplevel'} =
578  $self->adaptor()->is_toplevel( $self->get_seq_region_id() );
579  }
580 
581  return $self->{'toplevel'};
582 }
583 
584 =head2 has_karyotype
585  Arg : none
586  Example : my $top = $slice->has_karyotype()
587  Description: Returns 1 if slice is part of the karyotype else 0
588  Returntype : int
589  Caller : general
590  Status : At Risk
591 
592 =cut
593 
594 sub has_karyotype {
595  my ($self) = @_;
596 
597  if ( !defined( $self->{'karyotype'} ) ) {
598  $self->{'karyotype'} =
599  $self->adaptor()->has_karyotype( $self->get_seq_region_id() );
600  }
601 
602  return $self->{'karyotype'};
603 }
604 
605 =head2 karyotype_rank
606  Arg : none
607  Example : my $rank = $slice->karyotype_rank()
608  Description: Returns the numeric ranking in the karyotype. Otherwise 0 is returned
609  Returntype : int
610  Caller : general
611  Status : At Risk
612 
613 =cut
614 
615 sub karyotype_rank {
616  my ($self) = @_;
617  if(! defined( $self->{karyotype_rank})) {
618  my $rank = $self->adaptor()->get_karyotype_rank($self->get_seq_region_id());
619  $self->{karyotype_rank} = $rank if $rank;
620  }
621  return $self->{karyotype_rank} || 0;
622 }
623 
624 =head2 is_circular
625  Arg : none
626  Example : my $circ = $slice->is_circular()
627  Description: Returns 1 if slice is a circular slice else 0
628  Returntype : int
629  Caller : general
630  Status : Stable
631 
632 =cut
633 
634 sub is_circular {
635  my ($self) = @_;
636  my $adaptor = $self->adaptor();
637  return 0 if ! defined $adaptor;
638  if (! exists $self->{'circular'}) {
639  my $id = $adaptor->get_seq_region_id($self);
640  $self->{circular} = $adaptor->is_circular($id);
641  }
642  return $self->{circular};
643 }
644 
645 =head2 invert
646 
647  Arg [1] : none
648  Example : $inverted_slice = $slice->invert;
649  Description: Creates a copy of this slice on the opposite strand and
650  returns it.
651  Returntype : Bio::EnsEMBL::Slice
652  Exceptions : none
653  Caller : general
654  Status : Stable
655 
656 =cut
657 
658 sub invert {
659  my $self = shift;
660 
661  # make a shallow copy of the slice via a hash copy and flip the strand
662  my %s = %$self;
663  $s{'strand'} = $self->{'strand'} * -1;
664 
665  # reverse compliment any attached sequence
666  reverse_comp(\$s{'seq'}) if($s{'seq'});
667 
668  # bless and return the copy
669  return bless \%s, ref $self;
670 }
671 
672 
673 
674 =head2 seq
675 
676  Arg [1] : none
677  Example : print "SEQUENCE = ", $slice->seq();
678  Description: Returns the sequence of the region represented by this
679  slice formatted as a string.
680  Only available for the default coord_system
681  Returntype : string
682  Exceptions : none
683  Caller : general
684  Status : Stable
685 
686 =cut
687 
688 sub seq {
689  my $self = shift;
690 
691  # special case for in-between (insert) coordinates
692  return '' if($self->start() == $self->end() + 1);
693 
694  return $self->{'seq'} if($self->{'seq'});
695 
696  if($self->adaptor()) {
697  my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
698  return ${$seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1)};
699  }
700 
701  # no attached sequence, and no db, so just return Ns
702  return 'N' x $self->length();
703 }
704 
705 
706 
707 =head2 subseq
708 
709  Arg [1] : int $startBasePair
710  relative to start of slice, which is 1.
711  Arg [2] : int $endBasePair
712  relative to start of slice.
713  Arg [3] : (optional) int $strand
714  The strand of the slice to obtain sequence from. Default
715  value is 1.
716  Description: returns string of dna sequence
717  Returntype : txt
718  Exceptions : end should be at least as big as start
719  strand must be set
720  Caller : general
721  Status : Stable
722 
723 =cut
724 
725 sub subseq {
726  my ( $self, $start, $end, $strand ) = @_;
727 
728  if ( $end+1 < $start ) {
729  throw("End coord + 1 is less than start coord");
730  }
731 
732  # handle 'between' case for insertions
733  return '' if( $start == $end + 1);
734 
735  $strand = 1 unless(defined $strand);
736 
737  if ( $strand != -1 && $strand != 1 ) {
738  throw("Invalid strand [$strand] in call to Slice::subseq.");
739  }
740  my $subseq;
741  if($self->adaptor){
742  my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
743  $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
744  ( $self, $start,
745  $end, $strand )};
746  } else {
747  ## check for gap at the beginning and pad it with Ns
748  if ($start < 1) {
749  $subseq = "N" x (1 - $start);
750  $start = 1;
751  }
752  $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
753  ## check for gap at the end and pad it with Ns
754  if ($end > $self->length()) {
755  $subseq .= "N" x ($end - $self->length());
756  }
757  reverse_comp(\$subseq) if($strand == -1);
758  }
759  return $subseq;
760 }
761 
762 =head2 sub_Slice_Iterator
763 
764  Arg[1] : int The chunk size to request
765  Example : my $i = $slice->sub_Slice_Iterator(60000);
766  while($i->has_next()) { warn $i->next()->name(); }
767  Description : Returns an iterator which batches subslices of this Slice
768  in the requested chunk size
769  Returntype : Bio::EnsEMBL::Utils::Iterator next() will return the next
770  chunk of Slice
771  Exceptions : None
772 
773 =cut
774 
775 sub sub_Slice_Iterator {
776  my ($self, $chunk_size) = @_;
777  throw "Need a chunk size to divide the slice by" if ! $chunk_size;
778  my $here = 1;
779  my $end = $self->length();
780  my $iterator_sub = sub {
781  while($here <= $end) {
782  my $there = $here + $chunk_size - 1;
783  $there = $end if($there > $end);
784  my $slice = $self->sub_Slice($here, $there);
785  $here = $there + 1;
786  return $slice;
787  }
788  return;
789  };
790  return Bio::EnsEMBL::Utils::Iterator->new($iterator_sub);
791 }
792 
793 =head2 assembly_exception_type
794 
795  Example : $self->assembly_exception_type();
796  Description : Returns the type of slice this is. If it is reference then you
797  will get 'REF' back. Otherwise you will get the first
798  element from C<get_all_AssemblyExceptionFeatures()>. If no
799  assembly exception exists you will get an empty string back.
800  Returntype : String
801  Exceptions : None
802  Caller : Public
803  Status : Beta
804 
805 =cut
806 
807 sub assembly_exception_type {
808  my ($self) = @_;
809  my $type = q{};
810  if($self->is_reference()) {
811  $type = 'REF';
812  }
813  else {
814  my $assembly_exceptions = $self->get_all_AssemblyExceptionFeatures();
815  if(@{$assembly_exceptions}) {
816  $type = $assembly_exceptions->[0]->type();
817  }
818  }
819  return $type;
820 }
821 
822 =head2 is_chromosome
823 
824  Example : print ($slice->is_chromosome()) ? 'I am a chromosome' : 'Not one';
825  Description : A chromosome is a slice with a karyotype rank assigned to it.
826  Returntype : Boolean indicates if the current object is a chromosome
827  Exceptions : None
828 
829 =cut
830 
831 sub is_chromosome {
832  my ($self) = @_;
833 
834  return $self->has_karyotype();
835 }
836 
837 
838 =head2 get_base_count
839 
840  Arg [1] : none
841  Example : $c_count = $slice->get_base_count->{'c'};
842  Description: Retrieves a hashref containing the counts of each bases in the
843  sequence spanned by this slice. The format of the hash is :
844  { 'a' => num,
845  'c' => num,
846  't' => num,
847  'g' => num,
848  'n' => num,
849  '%gc' => num }
850 
851  All bases which are not in the set [A,a,C,c,T,t,G,g] are
852  included in the 'n' count. The 'n' count could therefore be
853  inclusive of ambiguity codes such as 'y'.
854  The %gc is the ratio of GC to AT content as in:
855  total(GC)/total(ACTG) * 100
856  This function is conservative in its memory usage and scales to
857  work for entire chromosomes.
858  Returntype : hashref
859  Exceptions : none
860  Caller : general
861  Status : Stable
862 
863 =cut
864 
865 sub get_base_count {
866  my $self = shift;
867 
868  my $a = 0;
869  my $c = 0;
870  my $t = 0;
871  my $g = 0;
872 
873  my $start = 1;
874  my $end;
875 
876  my $RANGE = 100_000;
877  my $len = $self->length();
878 
879  my $seq;
880 
881  while ( $start <= $len ) {
882  $end = $start + $RANGE - 1;
883  $end = $len if ( $end > $len );
884 
885  $seq = $self->subseq( $start, $end );
886 
887  $a += $seq =~ tr/Aa//;
888  $c += $seq =~ tr/Cc//;
889  $t += $seq =~ tr/Tt//;
890  $g += $seq =~ tr/Gg//;
891 
892  $start = $end + 1;
893  }
894 
895  my $actg = $a + $c + $t + $g;
896 
897  my $gc_content = 0;
898  if ( $actg > 0 ) { # Avoid dividing by 0
899  $gc_content = sprintf( "%1.2f", ( ( $g + $c )/$actg )*100 );
900  }
901 
902  return { 'a' => $a,
903  'c' => $c,
904  't' => $t,
905  'g' => $g,
906  'n' => $len - $actg,
907  '%gc' => $gc_content };
908 }
909 
910 
911 
912 =head2 project
913 
914  Arg [1] : string $name
915  The name of the coordinate system to project this slice onto
916  Arg [2] : string $version
917  The version of the coordinate system (such as 'NCBI34') to
918  project this slice onto
919  Example :
920  my $clone_projection = $slice->project('clone');
921 
922  foreach my $segment (@$clone_projection) {
923  my $clone = $segment->to_Slice();
924  print $slice->seq_region_name(), ':', $segment->from_start(), '-',
925  $segment->from_end(), ' -> ',
926  $clone->seq_region_name(), ':', $clone->start(), '-',$clone->end(),
927  ':', $clone->strand(), "\n";
928  }
929  Description: Returns the results of 'projecting' this slice onto another
930  coordinate system. Projecting to a coordinate system that
931  the slice is assembled from is analagous to retrieving a tiling
932  path. This method may also be used to 'project up' to a higher
933  level coordinate system, however.
934 
935  This method returns a listref of triplets [start,end,slice]
936  which represents the projection. The start and end defined the
937  region of this slice which is made up of the third value of
938  the triplet: a slice in the requested coordinate system.
939  Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
940  can also be used as [$start,$end,$slice] triplets
941  Exceptions : none
942  Caller : general
943  Status : Stable
944 
945 =cut
946 
947 sub project {
948  my $self = shift;
949  my $cs_name = shift;
950  my $cs_version = shift;
951 
952  throw('Coord_system name argument is required') if(!$cs_name);
953 
954  my $slice_adaptor = $self->adaptor();
955 
956  if(!$slice_adaptor) {
957  warning("Cannot project without attached adaptor.");
958  return [];
959  }
960 
961  if(!$self->coord_system()) {
962  warning("Cannot project without attached coord system.");
963  return [];
964  }
965 
966 
967  my $db = $slice_adaptor->db();
968  my $csa = $db->get_CoordSystemAdaptor();
969  my $cs = $csa->fetch_by_name($cs_name, $cs_version);
970  my $slice_cs = $self->coord_system();
971 
972  if(!$cs) {
973  throw("Cannot project to unknown coordinate system " .
974  "[$cs_name $cs_version]");
975  }
976 
977  # no mapping is needed if the requested coord system is the one we are in
978  # but we do need to check if some of the slice is outside of defined regions
979  if($slice_cs->equals($cs)) {
980  return $self->_constrain_to_region();
981  }
982 
983  my @projection;
984 
985  # decompose this slice into its symlinked components.
986  # this allows us to handle haplotypes and PARs
987  my $normal_slice_proj =
988  $slice_adaptor->fetch_normalized_slice_projection($self);
989  foreach my $segment (@$normal_slice_proj) {
990  my $normal_slice = $segment->[2];
991 
992  $slice_cs = $normal_slice->coord_system();
993 
994  my $asma = $db->get_AssemblyMapperAdaptor();
995  my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
996  next unless defined $asm_mapper;
997 
998  # perform the mapping between this slice and the requested system
999  my @coords = $asm_mapper->map($normal_slice->seq_region_name(),
1000  $normal_slice->start(),
1001  $normal_slice->end(),
1002  $normal_slice->strand(),
1003  $slice_cs, undef, undef, 1);
1004 
1005  # construct a projection from the mapping results and return it
1006  foreach my $coord (@coords) {
1007  my $original = $coord->{original};
1008  my $mapped = $coord->{mapped};
1009 
1010  # skip gaps
1011  next unless $mapped->isa('Bio::EnsEMBL::Mapper::Coordinate');
1012 
1013  my $mapped_start = $mapped->start();
1014  my $mapped_end = $mapped->end();
1015  my ($current_start, $current_end);
1016  if ($self->strand == 1) {
1017  $current_start = $original->start() - $self->start + 1;
1018  $current_end = $original->end() - $self->start + 1;
1019  } else {
1020  $current_start = $self->end() - $original->end + 1;
1021  $current_end = $self->end - $original->start + 1;
1022  }
1023 
1024  my $coord_cs = $mapped->coord_system();
1025 
1026  # If the normalised projection just ended up mapping to the
1027  # same coordinate system we were already in then we should just
1028  # return the original region. This can happen for example, if we
1029  # were on a PAR region on Y which refered to X and a projection to
1030  # 'toplevel' was requested.
1031  if($coord_cs->equals($slice_cs)) {
1032  # trim off regions which are not defined
1033  return $self->_constrain_to_region();
1034  }
1035  #create slices for the mapped-to coord system
1036  my $slice = $slice_adaptor->fetch_by_seq_region_id($mapped->id(),
1037  $mapped_start,
1038  $mapped_end,
1039  $mapped->strand());
1040 
1041  if ($current_end > $slice->seq_region_length() && $slice->is_circular ) {
1042  $current_end -= $slice->seq_region_length();
1043  }
1044 
1045  push @projection, bless([$current_start, $current_end, $slice],
1046  "Bio::EnsEMBL::ProjectionSegment");
1047 
1048  }
1049  }
1050 
1051  return \@projection;
1052 }
1053 
1054 
1055 sub _constrain_to_region {
1056  my $self = shift;
1057 
1058  my $entire_len = $self->seq_region_length();
1059 
1060  #if the slice has negative coordinates or coordinates exceeding the
1061  #exceeding length of the sequence region we want to shrink the slice to
1062  #the defined region
1063 
1064  if($self->{'start'} > $entire_len || $self->{'end'} < 1) {
1065  #none of this slice is in a defined region
1066  return [];
1067  }
1068 
1069  my $right_contract = 0;
1070  my $left_contract = 0;
1071  if($self->{'end'} > $entire_len) {
1072  $right_contract = $entire_len - $self->{'end'};
1073  }
1074  if($self->{'start'} < 1) {
1075  $left_contract = $self->{'start'} - 1;
1076  }
1077 
1078  my $new_slice;
1079  if($left_contract || $right_contract) {
1080  #if slice in negative strand, need to swap contracts
1081  if ($self->strand == 1) {
1082  $new_slice = $self->expand($left_contract, $right_contract);
1083  }
1084  elsif ($self->strand == -1) {
1085  $new_slice = $self->expand($right_contract, $left_contract);
1086  }
1087  } else {
1088  $new_slice = $self;
1089  }
1090 
1091  return [bless [1-$left_contract, $self->length()+$right_contract,
1092  $new_slice], "Bio::EnsEMBL::ProjectionSegment" ];
1093 }
1094 
1095 
1096 =head2 expand
1097 
1098  Arg [1] : (optional) int $five_prime_expand
1099  The number of basepairs to shift this slices five_prime
1100  coordinate by. Positive values make the slice larger,
1101  negative make the slice smaller.
1102  coordinate left.
1103  Default = 0.
1104  Arg [2] : (optional) int $three_prime_expand
1105  The number of basepairs to shift this slices three_prime
1106  coordinate by. Positive values make the slice larger,
1107  negative make the slice smaller.
1108  Default = 0.
1109  Arg [3] : (optional) bool $force_expand
1110  if set to 1, then the slice will be contracted even in the case
1111  when shifts $five_prime_expand and $three_prime_expand overlap.
1112  In that case $five_prime_expand and $three_prime_expand will be set
1113  to a maximum possible number and that will result in the slice
1114  which would have only 2pbs.
1115  Default = 0.
1116  Arg [4] : (optional) int* $fpref
1117  The reference to a number of basepairs to shift this slices five_prime
1118  coordinate by. Normally it would be set to $five_prime_expand.
1119  But in case when $five_prime_expand shift can not be applied and
1120  $force_expand is set to 1, then $$fpref will contain the maximum possible
1121  shift
1122  Arg [5] : (optional) int* $tpref
1123  The reference to a number of basepairs to shift this slices three_prime
1124  coordinate by. Normally it would be set to $three_prime_expand.
1125  But in case when $five_prime_expand shift can not be applied and
1126  $force_expand is set to 1, then $$tpref will contain the maximum possible
1127  shift
1128  Example : my $expanded_slice = $slice->expand( 1000, 1000);
1129  my $contracted_slice = $slice->expand(-1000,-1000);
1130  my $shifted_right_slice = $slice->expand(-1000, 1000);
1131  my $shifted_left_slice = $slice->expand( 1000,-1000);
1132  my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift);
1133 
1134  Description: Returns a slice which is a resized copy of this slice. The
1135  start and end are moved outwards from the center of the slice
1136  if positive values are provided and moved inwards if negative
1137  values are provided. This slice remains unchanged. A slice
1138  may not be contracted below 1bp but may grow to be arbitrarily
1139  large.
1140  Returntype : Bio::EnsEMBL::Slice
1141  Exceptions : warning if an attempt is made to contract the slice below 1bp
1142  Caller : general
1143  Status : Stable
1144 
1145 =cut
1146 
1147 sub expand {
1148  my $self = shift;
1149  my $five_prime_shift = shift || 0;
1150  my $three_prime_shift = shift || 0;
1151  my $force_expand = shift || 0;
1152  my $fpref = shift;
1153  my $tpref = shift;
1154 
1155  if ( $self->{'seq'} ) {
1156  warning(
1157  "Cannot expand a slice which has a manually attached sequence ");
1158  return undef;
1159  }
1160 
1161  my $sshift = $five_prime_shift;
1162  my $eshift = $three_prime_shift;
1163 
1164  if ($sshift == 0 && $eshift == 0) { return $self; }
1165 
1166  if ( $self->{'strand'} != 1 ) {
1167  $eshift = $five_prime_shift;
1168  $sshift = $three_prime_shift;
1169  }
1170 
1171  my $new_start = $self->{'start'} - $sshift;
1172  my $new_end = $self->{'end'} + $eshift;
1173 
1174  # Wrap around on circular slices
1175  if (( $new_start <= 0 || $new_start > $self->seq_region_length() || $new_end <= 0
1176  || $new_end > $self->seq_region_length() ) && ( $self->is_circular() ) ) {
1177 
1178  if ( $new_start <= 0 ) {
1179  $new_start = $self->seq_region_length() + $new_start;
1180  }
1181  if ( $new_start > $self->seq_region_length() ) {
1182  $new_start -= $self->seq_region_length();
1183  }
1184 
1185  if ( $new_end <= 0 ) {
1186  $new_end = $self->seq_region_length() + $new_end;
1187  }
1188  if ( $new_end > $self->seq_region_length() ) {
1189  $new_end -= $self->seq_region_length();
1190  }
1191 
1192  }
1193 
1194  if ( $new_start > $new_end && (not $self->is_circular() ) ) {
1195 
1196  if ($force_expand) {
1197  # Apply max possible shift, if force_expand is set
1198  if ( $sshift < 0 ) {
1199  # if we are contracting the slice from the start - move the
1200  # start just before the end
1201  $new_start = $new_end - 1;
1202  $sshift = $self->{start} - $new_start;
1203  }
1204 
1205  if ( $new_start > $new_end ) {
1206  # if the slice still has a negative length - try to move the
1207  # end
1208  if ( $eshift < 0 ) {
1209  $new_end = $new_start + 1;
1210  $eshift = $new_end - $self->{end};
1211  }
1212  }
1213  # return the values by which the primes were actually shifted
1214  $$tpref = $self->{strand} == 1 ? $eshift : $sshift;
1215  $$fpref = $self->{strand} == 1 ? $sshift : $eshift;
1216  }
1217  if ( $new_start > $new_end ) {
1218  throw('Slice start cannot be greater than slice end');
1219  }
1220  }
1221 
1222  #fastest way to copy a slice is to do a shallow hash copy
1223  my %new_slice = %$self;
1224  $new_slice{'start'} = int($new_start);
1225  $new_slice{'end'} = int($new_end);
1226 
1227  return bless \%new_slice, ref($self);
1228 } ## end sub expand
1229 
1230 =head2 constrain_to_seq_region
1231  Example : $new_slice = $slice->expand(1000,10000);
1232  $new_slice = $new_slice->constrain_to_seq_region();
1233  Description: Used to prevent overly zealous expand calls going off the end of
1234  the sequence region. It contracts the start and end where needed
1235  and produces a slice copy with the tweaked coordinates.
1236  Returntype : Bio::EnsEMBL::Slice
1237 =cut
1238 
1239 sub constrain_to_seq_region {
1240  my ($self) = @_;
1241  # circular calculations should already be taken care of
1242  if ($self->is_circular) {return $self}
1243  my $new_start = $self->start;
1244  my $new_end = $self->end;
1245 
1246  my $seq_region = $self->seq_region_Slice;
1247 
1248  if ($new_start < $seq_region->start) {$new_start = $seq_region->start}
1249  if ($new_end > $seq_region->end) {$new_end = $seq_region->end}
1250 
1251  my %new_slice = %$self;
1252  $new_slice{'start'} = $new_start;
1253  $new_slice{'end'} = $new_end;
1254 
1255  return bless \%new_slice, ref($self);
1256 }
1257 
1258 
1259 =head2 sub_Slice
1260 
1261  Arg 1 : int $start, refers to the start of the subslice relative to the input slice
1262  Arg 2 : int $end, refers to the end of the subslice relative to the input slice
1263  Arge [3] : int $strand
1264  Example : none
1265  Description: Makes another Slice that covers only part of this Slice
1266  If a Slice is requested which lies outside of the boundaries
1267  of this function will return undef. This means that
1268  behaviour will be consistant whether or not the slice is
1269  attached to the database (i.e. if there is attached sequence
1270  to the slice). Alternatively the expand() method or the
1271  SliceAdaptor::fetch_by_region method can be used instead.
1272  Returntype : Bio::EnsEMBL::Slice or undef if arguments are wrong
1273  Exceptions : none
1274  Caller : general
1275  Status : Stable
1276 
1277 =cut
1278 
1279 sub sub_Slice {
1280  my ( $self, $start, $end, $strand ) = @_;
1281 
1282  my $length = $self->length();
1283 
1284  if( $start < 1 || $start > $length ) {
1285  # throw( "start argument not valid" );
1286  return undef;
1287  }
1288 
1289  if( $end < $start || $end > $length ) {
1290  # throw( "end argument not valid" )
1291  return undef;
1292  }
1293 
1294  my ( $new_start, $new_end, $new_strand, $new_seq );
1295  if( ! defined $strand ) {
1296  $strand = 1;
1297  }
1298 
1299  if( $self->{'strand'} == 1 ) {
1300  $new_start = $self->{'start'} + $start - 1;
1301  $new_end = $self->{'start'} + $end - 1;
1302  $new_strand = $strand;
1303  } else {
1304  $new_start = $self->{'end'} - $end + 1;;
1305  $new_end = $self->{'end'} - $start + 1;
1306  $new_strand = -$strand;
1307  }
1308 
1309  if( defined $self->{'seq'} ) {
1310  $new_seq = $self->subseq( $start, $end, $strand );
1311  }
1312 
1313  #fastest way to copy a slice is to do a shallow hash copy
1314  my $new_slice = {%{$self}};
1315  $new_slice->{'start'} = int($new_start);
1316  $new_slice->{'end'} = int($new_end);
1317  $new_slice->{'strand'} = $new_strand;
1318  if( $new_seq ) {
1319  $new_slice->{'seq'} = $new_seq;
1320  }
1321  weaken($new_slice->{adaptor});
1322 
1323  return bless $new_slice, ref($self);
1324 }
1325 
1326 
1327 
1328 =head2 seq_region_Slice
1329 
1330  Arg [1] : none
1331  Example : $slice = $slice->seq_region_Slice();
1332  Description: Returns a slice which spans the whole seq_region which this slice
1333  is on. For example if this is a slice which spans a small region
1334  of chromosome X, this method will return a slice which covers the
1335  entire chromosome X. The returned slice will always have strand
1336  of 1 and start of 1. This method cannot be used if the sequence
1337  of the slice has been set manually.
1338  Returntype : Bio::EnsEMBL::Slice
1339  Exceptions : warning if called when sequence of Slice has been set manually.
1340  Caller : general
1341  Status : Stable
1342 
1343 =cut
1344 
1345 sub seq_region_Slice {
1346  my $self = shift;
1347 
1348  if($self->{'seq'}){
1349  warning("Cannot get a seq_region_Slice of a slice which has manually ".
1350  "attached sequence ");
1351  return undef;
1352  }
1353 
1354  # quick shallow copy
1355  my $slice;
1356  %{$slice} = %{$self};
1357  bless $slice, ref($self);
1358  weaken($slice->{adaptor});
1359 
1360  $slice->{'start'} = 1;
1361  $slice->{'end'} = $slice->{'seq_region_length'};
1362  $slice->{'strand'} = 1;
1363 
1364  return $slice;
1365 }
1366 
1367 
1368 =head2 get_seq_region_id
1369 
1370  Arg [1] : none
1371  Example : my $seq_region_id = $slice->get_seq_region_id();
1372  Description: Gets the internal identifier of the seq_region that this slice
1373  is on. Note that this function will not work correctly if this
1374  slice does not have an attached adaptor. Also note that it may
1375  be better to go through the SliceAdaptor::get_seq_region_id
1376  method if you are working with multiple databases since is
1377  possible to work with slices from databases with different
1378  internal seq_region identifiers.
1379  Returntype : int or undef if slices does not have attached adaptor
1380  Exceptions : warning if slice is not associated with a SliceAdaptor
1381  Caller : assembly loading scripts, general
1382  Status : Stable
1383 
1384 =cut
1385 
1386 sub get_seq_region_id {
1387  my ($self) = @_;
1388  if($self->adaptor) {
1389  return $self->adaptor->get_seq_region_id($self);
1390  }
1391  else {
1392  warning('Cannot retrieve seq_region_id without attached adaptor.');
1393  return undef;
1394  }
1395 }
1396 
1397 =head2 get_genome_component
1398 
1399  Arg [] : none
1400  Example : my $genome_component = $slice->get_genome_component();
1401  Description: Returns the genome component of the slice
1402  Returntype : Scalar; the identifier of the genome component of the slice
1403  Exceptions : none
1404  Caller : general
1405  Status : Stable
1406 
1407 =cut
1408 
1409 sub get_genome_component {
1410  my $self = shift;
1411  return $self->adaptor->get_genome_component_for_slice($self);
1412 }
1413 
1414 =head2 get_all_Attributes
1415 
1416  Arg [1] : optional string $attrib_code
1417  The code of the attribute type to retrieve values for.
1418  Example : ($htg_phase) = @{$slice->get_all_Attributes('htg_phase')};
1419  @slice_attributes = @{$slice->get_all_Attributes()};
1420  Description: Gets a list of Attributes of this slice''s seq_region.
1421  Optionally just get Attrubutes for given code.
1422  Returntype : listref Bio::EnsEMBL::Attribute
1423  Exceptions : warning if slice does not have attached adaptor
1424  Caller : general
1425  Status : Stable
1426 
1427 =cut
1428 
1429 sub get_all_Attributes {
1430  my ($self, $attrib_code) = @_;
1431  if(my $adaptor = $self->_get_CoreAdaptor('Attribute')) {
1432  my $attribs = $adaptor->fetch_all_by_Slice($self);
1433  if(defined $attrib_code) {
1434  my $uc_attrib = uc($attrib_code);
1435  return [ grep { uc($_->code()) eq $uc_attrib } @{$attribs}];
1436  }
1437  return $attribs;
1438  }
1439  return [];
1440 }
1441 
1442 =head2 get_all_PredictionTranscripts
1443 
1444  Arg [1] : (optional) string $logic_name
1445  The name of the analysis used to generate the prediction
1446  transcripts obtained.
1447  Arg [2] : (optional) boolean $load_exons
1448  If set to true will force loading of all PredictionExons
1449  immediately rather than loading them on demand later. This
1450  is faster if there are a large number of PredictionTranscripts
1451  and the exons will be used.
1452  Example : @transcripts = @{$slice->get_all_PredictionTranscripts};
1453  Description: Retrieves the list of prediction transcripts which overlap
1454  this slice with logic_name $logic_name. If logic_name is
1455  not defined then all prediction transcripts are retrieved.
1456  Returntype : listref of Bio::EnsEMBL::PredictionTranscript
1457  Exceptions : warning if slice does not have attached adaptor
1458  Caller : none
1459  Status : Stable
1460 
1461 =cut
1462 
1463 sub get_all_PredictionTranscripts {
1464  my ($self,$logic_name, $load_exons) = @_;
1465 
1466  if(!$self->adaptor()) {
1467  warning('Cannot get PredictionTranscripts without attached adaptor');
1468  return [];
1469  }
1470  my $pta = $self->adaptor()->db()->get_PredictionTranscriptAdaptor();
1471  return $pta->fetch_all_by_Slice($self, $logic_name, $load_exons);
1472 }
1473 
1474 =head2 get_all_DnaAlignFeatures
1475 
1476  Arg [1] : (optional) string $logic_name
1477  The name of the analysis performed on the dna align features
1478  to obtain.
1479  Arg [2] : (optional) float $score
1480  The mimimum score of the features to retrieve
1481  Arg [3] : (optional) string $dbtype
1482  The name of an attached database to retrieve the features from
1483  instead, e.g. 'otherfeatures'.
1484  Arg [4] : (optional) float hcoverage
1485  The minimum hcoverage od the featurs to retrieve
1486  Example : @dna_dna_align_feats = @{$slice->get_all_DnaAlignFeatures};
1487  Description: Retrieves the DnaDnaAlignFeatures which overlap this slice with
1488  logic name $logic_name and with score above $score. If
1489  $logic_name is not defined features of all logic names are
1490  retrieved. If $score is not defined features of all scores are
1491  retrieved. Strand of the Slice is not considered.
1492  Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
1493  Exceptions : warning if slice does not have attached adaptor
1494  Caller : general
1495  Status : Stable
1496 
1497 =cut
1498 
1499 sub get_all_DnaAlignFeatures {
1500  my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
1501  return $self->_get_AlignFeatures($logic_name, $score, $dbtype, $hcoverage, 'DnaAlignFeature');
1502 }
1503 
1504 =head2 get_all_ProteinAlignFeatures
1505 
1506  Arg [1] : (optional) string $logic_name
1507  The name of the analysis performed on the protein align features
1508  to obtain.
1509  Arg [2] : (optional) float $score
1510  The mimimum score of the features to retrieve
1511  Arg [3] : (optional) string $dbtype
1512  The name of an attached database to retrieve features from
1513  instead.
1514  Arg [4] : (optional) float hcoverage
1515  The minimum hcoverage od the featurs to retrieve
1516  Example : @dna_pep_align_feats = @{$slice->get_all_ProteinAlignFeatures};
1517  Description: Retrieves the DnaPepAlignFeatures which overlap this slice with
1518  logic name $logic_name and with score above $score. If
1519  $logic_name is not defined features of all logic names are
1520  retrieved. If $score is not defined features of all scores are
1521  retrieved.
1522  Returntype : listref of Bio::EnsEMBL::DnaPepAlignFeatures
1523  Exceptions : warning if slice does not have attached adaptor
1524  Caller : general
1525  Status : Stable
1526 
1527 =cut
1528 
1529 sub get_all_ProteinAlignFeatures {
1530  my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
1531  return $self->_get_AlignFeatures($logic_name, $score, $dbtype, $hcoverage, 'ProteinAlignFeature');
1532 }
1533 
1534 =head2 _get_AlignFeatures
1535 
1536  Arg [1] : (optional) string $logic_name
1537  The name of the analysis performed on the protein align features
1538  to obtain.
1539  Arg [2] : (optional) float $score
1540  The mimimum score of the features to retrieve
1541  Arg [3] : (optional) string $dbtype
1542  The name of an attached database to retrieve features from
1543  instead.
1544  Arg [4] : (optional) float hcoverage
1545  The minimum hcoverage od the featurs to retrieve
1546  Arg [5] : string $align_type
1547  The type of adaptor to retrieve alignments from. Must be an BaseAlignFeature derived
1548  class
1549  Description: Generic method which deals with the retrieval of either AlignFeature adaptor and allows
1550  you to switch the adaptor values are retrieved from.
1551 
1552 =cut
1553 
1554 sub _get_AlignFeatures {
1555  my ($self, $logic_name, $score, $dbtype, $hcoverage, $align_type) = @_;
1556  throw "No align_type given" unless $align_type;
1557  if(my $adaptor = $self->_get_CoreAdaptor($align_type, $dbtype)) {
1558  if(defined($score) and defined ($hcoverage)){
1559  warning "Cannot specify score and hcoverage when querying for $align_type. Using score only";
1560  }
1561  if(defined($score)){
1562  return $adaptor->fetch_all_by_Slice_and_score($self,$score, $logic_name);
1563  }
1564  return $adaptor->fetch_all_by_Slice_and_hcoverage($self, $hcoverage, $logic_name);
1565  }
1566  return [];
1567 }
1568 
1569 =head2 get_all_SimilarityFeatures
1570 
1571  Arg [1] : (optional) string $logic_name
1572  the name of the analysis performed on the features to retrieve
1573  Arg [2] : (optional) float $score
1574  the lower bound of the score of the features to be retrieved
1575  Example : @feats = @{$slice->get_all_SimilarityFeatures};
1576  Description: Retrieves all dna_align_features and protein_align_features
1577  with analysis named $logic_name and with score above $score.
1578  It is probably faster to use get_all_ProteinAlignFeatures or
1579  get_all_DnaAlignFeatures if a sepcific feature type is desired.
1580  If $logic_name is not defined features of all logic names are
1581  retrieved. If $score is not defined features of all scores are
1582  retrieved.
1583  Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures
1584  Exceptions : warning if slice does not have attached adaptor
1585  Caller : general
1586  Status : Stable
1587 
1588 =cut
1589 
1590 sub get_all_SimilarityFeatures {
1591  my ($self, $logic_name, $score) = @_;
1592 
1593  my @out = ();
1594 
1595  push @out, @{$self->get_all_ProteinAlignFeatures($logic_name, $score) };
1596  push @out, @{$self->get_all_DnaAlignFeatures($logic_name, $score) };
1597 
1598  return \@out;
1599 }
1600 
1601 =head2 get_all_SimpleFeatures
1602 
1603  Arg [1] : (optional) string $logic_name
1604  The name of the analysis performed on the simple features
1605  to obtain.
1606  Arg [2] : (optional) float $score
1607  The mimimum score of the features to retrieve
1608  Example : @simple_feats = @{$slice->get_all_SimpleFeatures};
1609  Description: Retrieves the SimpleFeatures which overlap this slice with
1610  logic name $logic_name and with score above $score. If
1611  $logic_name is not defined features of all logic names are
1612  retrieved. If $score is not defined features of all scores are
1613  retrieved.
1614  Returntype : listref of Bio::EnsEMBL::SimpleFeatures
1615  Exceptions : warning if slice does not have attached adaptor
1616  Caller : general
1617  Status : Stable
1618 
1619 =cut
1620 
1621 sub get_all_SimpleFeatures {
1622  my ($self, $logic_name, $score, $dbtype) = @_;
1623  if(my $adaptor = $self->_get_CoreAdaptor('SimpleFeature', $dbtype)) {
1624  return $adaptor->fetch_all_by_Slice_and_score($self, $score, $logic_name);
1625  }
1626  return [];
1627 }
1628 
1629 =head2 get_all_RepeatFeatures
1630 
1631  Arg [1] : (optional) string $logic_name
1632  The name of the analysis performed on the repeat features
1633  to obtain.
1634  Arg [2] : (optional) string/array $repeat_type
1635  Limits features returned to those of the specified
1636  repeat_type. Can specify a single value or an array reference
1637  to limit by more than one
1638  Arg [3] : (optional) string $db
1639  Key for database e.g. core/vega/cdna/....
1640  Example : @repeat_feats = @{$slice->get_all_RepeatFeatures(undef,'Type II Transposons')};
1641  Description: Retrieves the RepeatFeatures which overlap with
1642  logic name $logic_name and with score above $score. If
1643  $logic_name is not defined features of all logic names are
1644  retrieved.
1645  Returntype : listref of Bio::EnsEMBL::RepeatFeatures
1646  Exceptions : warning if slice does not have attached adaptor
1647  Caller : general
1648  Status : Stable
1649 
1650 =cut
1651 
1652 sub get_all_RepeatFeatures {
1653  my ($self, $logic_name, $repeat_type, $dbtype) = @_;
1654  if(my $adaptor = $self->_get_CoreAdaptor('RepeatFeature', $dbtype)) {
1655  return $adaptor->fetch_all_by_Slice($self, $logic_name, $repeat_type);
1656  }
1657  return [];
1658 }
1659 
1660 =head2 get_all_LD_values
1661 
1662  Arg [1] : (optional) Bio::EnsEMBL::Variation::Population $population
1663  Description : returns all LD values on this slice. This function will only work correctly if the variation
1664  database has been attached to the core database. If the argument is passed, will return the LD information
1665  in that population
1666  ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer
1667  Exceptions : none
1668  Caller : contigview, snpview
1669  Status : Stable
1670 
1671 =cut
1672 
1673 sub get_all_LD_values {
1674  my $self = shift;
1675  my $population = shift;
1676 
1677  my $ld_adaptor = $self->_get_VariationAdaptor('LDFeatureContainer');
1678  if($ld_adaptor) {
1679  return $ld_adaptor->fetch_by_Slice($self,$population);
1680  }
1681  return [];
1682 }
1683 
1684 =head2 _get_VariationFeatureAdaptor
1685 
1686 Shortcut method here because VariationFeature is an often requested
1687 adaptor type.
1688 
1689 =cut
1690 
1691 sub _get_VariationFeatureAdaptor {
1692  my ($self, $dbtype) = @_;
1693  return $self->_get_VariationAdaptor('VariationFeature', $dbtype);
1694 }
1695 
1696 =head2 _get_StructuralVariationFeatureAdaptor
1697 
1698 Shortcut method here because StructuralVariationFeature is an often requested
1699 adaptor type.
1700 
1701 =cut
1702 
1703 sub _get_StructuralVariationFeatureAdaptor {
1704  my ($self, $dbtype) = @_;
1705  return $self->_get_VariationAdaptor('StructuralVariationFeature', $dbtype);
1706 }
1707 
1708 =head2 _get_VariationAdaptor
1709 
1710  Arg [1] : String object_type to retrieve an adaptor for
1711  Arg [2] : String dbtype to search for the given adaptor in. Defaults to variation
1712  Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
1713  it will return nothing if the adaptor was not found
1714  ReturnType : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance (specific to variation)
1715  Exceptions : none
1716 
1717 =cut
1718 
1719 sub _get_VariationAdaptor {
1720  my ($self, $object_type, $dbtype) = @_;
1721  # very important to do this defaulting since we *have* to assume the variation
1722  # database is called 'variation'. Using the current group will not work because
1723  # that will be something like 'core' (most likely), 'ensembl' or 'vega'.
1724  $dbtype ||= 'variation';
1725 
1726  # Also we do not care about Registry->get_db() for variation DBs
1727  my $do_not_check_db = 1;
1728 
1729  return $self->_get_Adaptor($object_type, $dbtype, $do_not_check_db);
1730 }
1731 
1732 =head2 _get_CoreAdaptor
1733 
1734  Arg [1] : String object_type to retrieve an adaptor for
1735  Arg [2] : String dbtype to search for the given adaptor in. Defaults to core
1736  Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
1737  it will return nothing if the adaptor was not found
1738  ReturnType : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance (specific to core-like dbs)
1739  Exceptions : none
1740 
1741 =cut
1742 
1743 sub _get_CoreAdaptor {
1744  my ($self, $object_type, $dbtype) = @_;
1745  #Simple pass through
1746  return $self->_get_Adaptor($object_type, $dbtype);
1747 }
1748 
1749 =head2 _get_Adaptor
1750 
1751  Arg [1] : String object_type to retrieve an adaptor for
1752  Arg [2] : String dbtype to search for the given adaptor in
1753  Arg [3] : Boolean Turn off the checking of Registry->get_db() for your
1754  adaptor.
1755  Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
1756  it will return nothing if the adaptor was not found. We consult the
1757  "special" adaptors held by Bio::EnsEMBL::Registry::get_db() method and then
1758  fall back to the normal methods of finding an adaptor.
1759 
1760  This method will warn when adaptors are missing but will never through an
1761  exception. It is up to the calling code to decide how to handle the unavailablity
1762  of an adaptor.
1763  ReturnType : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance. Otherwise it returns nothing
1764  Exceptions : none
1765 
1766 =cut
1767 
1768 sub _get_Adaptor {
1769  my ($self, $object_type, $dbtype, $do_not_check_db) = @_;
1770 
1771  if(!$object_type) {
1772  warning('Object type is a required parameter');
1773  return;
1774  }
1775 
1776  my $adaptor = $self->adaptor();
1777 
1778  if(!$adaptor) {
1779  warning("Cannot get a ${object_type} adaptor without a SliceAdaptor attached to this instance of ".ref($self));
1780  return;
1781  }
1782 
1783  my $local_db = $adaptor->db();
1784  my $species = $local_db->species();
1785 
1786  #First we query for the DBAdaptor using get_db(). This is a deprecated method
1787  #call but "special" adaptors can be registered via this method. We must
1788  #consult here 1st to find the possible special adaptor
1789  if(!$do_not_check_db && $dbtype) {
1790  my $db = $registry->get_db($local_db, $dbtype);
1791  if($db) {
1792  # If we got a return then use this DBAdaptor's species name, group and the given object type.
1793  # Special adaptors can have different species names
1794  $adaptor = $registry->get_adaptor($db->species(), $db->group(), $object_type);
1795  }
1796  else {
1797  #Otherwise just use the current species, dbtype and object type
1798  $adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
1799  }
1800  }
1801  # Otherwise our query group is the one attached to the current adaptor
1802  else {
1803  #If not set use the group attached to the local adaptor
1804  $dbtype ||= $local_db->group();
1805  $adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
1806  }
1807  return $adaptor if $adaptor;
1808 
1809  warning("No adaptor could be found for the species ${species}, database type ${dbtype} and object type ${object_type}");
1810  return;
1811 }
1812 
1813 =head2 get_all_VariationFeatures
1814 
1815  Args [1] : (optional) ArrayRef $so_terms
1816  SequenceOntology terms to limit the fetch to
1817  Args [2] : (optional) boolean $without_children
1818  Do not query using the children of the given SO terms
1819  i.e. query using the given terms directly
1820  Args [3] : (optional) ArrayRef $included_so
1821  ArrayRef of SequenceOntology which should be queried for
1822  without children. This argument allows you to combine SO terms with children
1823  from argument 1 with extra non-child SO terms. e.g. you wish to query for
1824  all protein_altering_variant (specified in argument 1) variations which
1825  would be defined by child SO terms but also wanted stop_retained_variant linked variations
1826  defined by this argument
1827  Args [4] : (optional) string $dbtype
1828  The dbtype of variation to obtain (i.e. can be different from the "variation" type).
1829  This assumes that the extra db has been added to the DBAdaptor under this name (using the
1830  DBConnection::add_db_adaptor method).
1831  Description : Returns all germline variation features on this slice. This function will
1832  only work correctly if the variation database has been attached to the core
1833  database.
1834  If $so_terms is specified, only variation features with a consequence type
1835  that matches or is an ontological child of any of the supplied terms will
1836  be returned
1837  ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1838  Exceptions : none
1839  Caller : contigview, snpview
1840  Status : Stable
1841 
1842 =cut
1843 
1844 sub get_all_VariationFeatures{
1845  my ($self, $so_terms, $without_children, $included_so, $dbtype) = @_;
1846  if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1847  return $vf_adaptor->fetch_all_by_Slice_SO_terms($self, $so_terms, $without_children, $included_so);
1848  }
1849  return [];
1850 }
1851 
1852 =head2 get_all_somatic_VariationFeatures
1853  Args [1] : (optional) ArrayRef $so_terms
1854  SequenceOntology terms to limit the fetch to
1855  Args [2] : (optional) boolean $without_children
1856  Do not query using the children of the given SO terms
1857  i.e. query using the given terms directly
1858  Args [3] : (optional) ArrayRef $included_so
1859  ArrayRef of SequenceOntology which should be queried for
1860  without children. This argument allows you to combine SO terms with children
1861  from argument 1 with extra non-child SO terms. e.g. you wish to query for
1862  all protein_altering_variant (specified in argument 1) variations which
1863  would be defined by child SO terms but also wanted stop_retained_variant linked variations
1864  defined by this argument
1865  Args [4] : (optional) string $dbtype
1866  The dbtype of variation to obtain (i.e. can be different from the "variation" type).
1867  This assumes that the extra db has been added to the DBAdaptor under this name (using the
1868  DBConnection::add_db_adaptor method).
1869  Description : Returns all somatic variation features on this slice. This function will only
1870  work correctly if the variation database has been attached to the core database.
1871  ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1872  Exceptions : none
1873  Status : Stable
1874 
1875 =cut
1876 
1877 sub get_all_somatic_VariationFeatures {
1878  my ($self, $so_terms, $without_children, $included_so, $dbtype) = @_;
1879  if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1880  return $vf_adaptor->fetch_all_somatic_by_Slice_SO_terms($self, $so_terms, $without_children, $included_so);
1881  }
1882  return [];
1883 }
1884 
1885 =head2 get_all_somatic_VariationFeatures_by_source
1886  Arg [1] : string $source [optional]
1887  The name of the source to query for
1888  Arg [2] : string $dbtype [optional]
1889  The dbtype of variation to obtain (i.e. can be different from the "variation" type).
1890  This assumes that the extra db has been added to the DBAdaptor under this name (using the
1891  DBConnection::add_db_adaptor method).
1892  Description : Returns all somatic variation features, from a defined source name (e.g.'COSMIC'),
1893  on this slice. This function will only work correctly if the variation database
1894  has been attached to the core database.
1895  ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1896  Exceptions : none
1897  Status : Stable
1898 
1899 =cut
1900 
1901 sub get_all_somatic_VariationFeatures_by_source {
1902  my ($self, $source, $dbtype) = @_;
1903  my $constraint = (defined($source)) ? " s.name='$source' " : undef;
1904  if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1905  return $vf_adaptor->fetch_all_somatic_by_Slice_constraint($self, $constraint);
1906  }
1907  return [];
1908 }
1909 
1910 =head2 get_all_somatic_VariationFeatures_with_phenotype
1911  Arg [1] : $variation_feature_source [optional]
1912  Arg [2] : $phenotype_source [optional]
1913  Arg [3] : $phenotype_name [optional]
1914  Arg [4] : string $dbtype [optional]
1915  The dbtype of variation to obtain (i.e. can be different from the "variation" type).
1916  This assumes that the extra db has been added to the DBAdaptor under this name (using the
1917  DBConnection::add_db_adaptor method).
1918  Description : returns all somatic variation features on this slice associated with a phenotype.
1919  (see get_all_VariationFeatures_with_phenotype for further documentation)
1920  ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1921  Exceptions : none
1922  Status : Stable
1923 
1924 =cut
1925 
1926 sub get_all_somatic_VariationFeatures_with_phenotype {
1927  my ($self, $source, $p_source, $phenotype, $dbtype) = @_;
1928  if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1929  return $vf_adaptor->fetch_all_somatic_with_phenotype_by_Slice($self, $source, $p_source, $phenotype);
1930  }
1931  return [];
1932 }
1933 
1934 
1935 =head2 get_all_VariationFeatures_by_Population
1936  Arg [1] : Bio::EnsEMBL::Variation::Population
1937  Arg [2] : $minimum_frequency [optional]
1938  Arg [3] : string $dbtype [optional]
1939  The dbtype of variation to obtain (i.e. can be different from the "variation" type).
1940  This assumes that the extra db has been added to the DBAdaptor under this name (using the
1941  DBConnection::add_db_adaptor method).
1942  Example : $pop = $pop_adaptor->fetch_by_dbID(659);
1943  @vfs = @{$slice->get_all_VariationFeatures_by_Population($pop,$slice)};
1944  Description : Retrieves all variation features in a slice which are stored for
1945  a specified population. If $minimum_frequency is supplied, only
1946  variations with a minor allele frequency (MAF) greater than
1947  $minimum_frequency will be returned.
1948  Returntype : listref of Bio::EnsEMBL::Variation::VariationFeature
1949  Exceptions : throw on incorrect argument
1950  Caller : general
1951  Status : At Risk
1952 
1953 =cut
1954 
1955 sub get_all_VariationFeatures_by_Population {
1956  my ($self, $minimum_frequency, $dbtype) = @_;
1957  if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1958  return $vf_adaptor->fetch_all_by_Slice_Population($self, $minimum_frequency);
1959  }
1960  return [];
1961 }
1962 
1963 =head2 get_all_Genes
1964 
1965  Arg [1] : (optional) string $logic_name
1966  The name of the analysis used to generate the genes to retrieve
1967  Arg [2] : (optional) string $dbtype
1968  The dbtype of genes to obtain. This assumes that the db has
1969  been added to the DBAdaptor under this name (using the
1970  DBConnection::add_db_adaptor method).
1971  Arg [3] : (optional) boolean $load_transcripts
1972  If set to true, transcripts will be loaded immediately rather
1973  than being lazy-loaded on request. This will result in a
1974  significant speed up if the Transcripts and Exons are going to
1975  be used (but a slow down if they are not).
1976  Arg [4] : (optional) string $source
1977  The source of the genes to retrieve.
1978  Arg [5] : (optional) string $biotype
1979  The biotype of the genes to retrieve.
1980  Example : @genes = @{$slice->get_all_Genes};
1981  Description: Retrieves all genes that overlap this slice, including those on
1982  the reverse strand.
1983  Returntype : listref of Bio::EnsEMBL::Genes
1984  Exceptions : none
1985  Caller : none
1986  Status : Stable
1987 
1988 =cut
1989 
1990 sub get_all_Genes{
1991  my ($self, $logic_name, $dbtype, $load_transcripts, $source, $biotype) = @_;
1992  if(my $adaptor = $self->_get_CoreAdaptor('Gene', $dbtype)) {
1993  return $adaptor->fetch_all_by_Slice( $self, $logic_name, $load_transcripts, $source, $biotype);
1994  }
1995  return [];
1996 }
1997 
1998 =head2 get_all_Genes_by_type
1999 
2000  Arg [1] : string $type
2001  The biotype of genes wanted.
2002  Arg [2] : (optional) string $logic_name
2003  Arg [3] : (optional) boolean $load_transcripts
2004  If set to true, transcripts will be loaded immediately rather
2005  than being lazy-loaded on request. This will result in a
2006  significant speed up if the Transcripts and Exons are going to
2007  be used (but a slow down if they are not).
2008  Example : @genes = @{$slice->get_all_Genes_by_type('protein_coding',
2009  'ensembl')};
2010  Description: Retrieves genes that overlap this slice of biotype $type.
2011  This is primarily used by the genebuilding code when several
2012  biotypes of genes are used.
2013 
2014  The logic name is the analysis of the genes that are retrieved.
2015  If not provided all genes will be retrieved instead. Both
2016  positive and negative strand Genes will be returned.
2017 
2018  Returntype : listref of Bio::EnsEMBL::Genes
2019  Exceptions : none
2020  Caller : genebuilder, general
2021  Status : Stable
2022 
2023 =cut
2024 
2025 sub get_all_Genes_by_type {
2026  my ($self, $type, $logic_name, $load_transcripts) = @_;
2027  return $self->get_all_Genes($logic_name, undef, $load_transcripts, undef, $type);
2028 }
2029 
2030 
2031 =head2 get_all_Genes_by_source
2032 
2033  Arg [1] : string source
2034  Arg [2] : (optional) boolean $load_transcripts
2035  If set to true, transcripts will be loaded immediately rather
2036  than being lazy-loaded on request. This will result in a
2037  significant speed up if the Transcripts and Exons are going to
2038  be used (but a slow down if they are not).
2039  Example : @genes = @{$slice->get_all_Genes_by_source('ensembl')};
2040  Description: Retrieves genes that overlap this slice of source $source.
2041  Strand of the Slice does not affect the result.
2042  Returntype : listref of Bio::EnsEMBL::Genes
2043  Exceptions : none
2044  Caller : general
2045  Status : Stable
2046 
2047 =cut
2048 
2049 sub get_all_Genes_by_source {
2050  my ($self, $source, $load_transcripts) = @_;
2051  return $self->get_all_Genes(undef, undef, $load_transcripts, $source);
2052 }
2053 
2054 =head2 get_all_Transcripts
2055 
2056  Arg [1] : (optional) boolean $load_exons
2057  If set to true exons will not be lazy-loaded but will instead
2058  be loaded right away. This is faster if the exons are
2059  actually going to be used right away.
2060  Arg [2] : (optional) string $logic_name
2061  the logic name of the type of features to obtain
2062  Arg [3] : (optional) string $db_type
2063  Example : @transcripts = @{$slice->get_all_Transcripts)_};
2064  Description: Gets all transcripts which overlap this slice. If you want to
2065  specify a particular analysis or type, then you are better off
2066  using get_all_Genes or get_all_Genes_by_type and iterating
2067  through the transcripts of each gene. Strand of the Slice is
2068  ignored.
2069  Returntype : reference to a list of Bio::EnsEMBL::Transcripts
2070  Exceptions : none
2071  Caller : general
2072  Status : Stable
2073 
2074 =cut
2075 
2076 sub get_all_Transcripts {
2077  my ($self, $load_exons, $logic_name, $dbtype, $source, $biotype) = @_;
2078  if(my $adaptor = $self->_get_CoreAdaptor('Transcript', $dbtype)) {
2079  return $adaptor->fetch_all_by_Slice($self, $load_exons, $logic_name, undef, $source, $biotype);
2080  }
2081  return [];
2082 }
2083 
2084 =head2 get_all_Transcripts_by_type
2085 
2086  Arg [1] : string $type
2087  The biotype of transcripts wanted.
2088  Arg [2] : (optional) string $logic_name
2089  Arg [3] : (optional) boolean $load_exons
2090  If set to true exons will not be lazy-loaded but will instead
2091  be loaded right away. This is faster if the exons are
2092  actually going to be used right away.
2093 
2094  Example : @transcripts = @{$slice->get_all_Transcripts_by_type('protein_coding',
2095  'ensembl')};
2096  Description: Retrieves transcripts that overlap this slice of biotype $type.
2097  This is primarily used by the genebuilding code when several
2098  biotypes of transcripts are used.
2099 
2100  The logic name is the analysis of the transcripts that are retrieved.
2101  If not provided all transcripts will be retrieved instead. Both
2102  positive and negative strand transcripts will be returned.
2103 
2104  Returntype : listref of Bio::EnsEMBL::Transcripts
2105  Exceptions : none
2106  Caller : genebuilder, general
2107  Status : Stable
2108 
2109 =cut
2110 
2111 sub get_all_Transcripts_by_type {
2112  my ($self, $biotype, $logic_name, $load_exons) = @_;
2113  return $self->get_all_Transcripts($load_exons, $logic_name, undef, undef, $biotype);
2114 }
2115 
2116 
2117 =head2 get_all_Transcripts_by_source
2118 
2119  Arg [1] : string source
2120  Arg [2] : (optional) boolean $load_exons
2121  If set to true exons will not be lazy-loaded but will instead
2122  be loaded right away. This is faster if the exons are
2123  actually going to be used right away.
2124  Example : @transcripts = @{$slice->get_all_Transcripts_by_source('ensembl')};
2125  Description: Retrieves transcripts that overlap this slice of source $source.
2126  Strand of the Slice does not affect the result.
2127  Returntype : listref of Bio::EnsEMBL::Transcripts
2128  Exceptions : none
2129  Caller : general
2130  Status : Stable
2131 
2132 =cut
2133 
2134 sub get_all_Transcripts_by_source {
2135  my ($self, $source, $load_exons) = @_;
2136  return $self->get_all_Transcripts($load_exons, undef, undef, $source);
2137 
2138 }
2139 
2140 
2141 =head2 get_all_Exons
2142 
2143  Arg [1] : none
2144  Example : @exons = @{$slice->get_all_Exons};
2145  Description: Gets all exons which overlap this slice. Note that these exons
2146  will not be associated with any transcripts, so this may not
2147  be terribly useful.
2148  Returntype : reference to a list of Bio::EnsEMBL::Exons
2149  Exceptions : none
2150  Caller : general
2151  Status : Stable
2152 
2153 =cut
2154 
2155 sub get_all_Exons {
2156  my $self = shift;
2157  if(!$self->adaptor()) {
2158  warning('Cannot get Exons without attached adaptor');
2159  return [];
2160  }
2161  return $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self);
2162 }
2163 
2164 =head2 get_all_KaryotypeBands
2165 
2166  Arg [1] : none
2167  Example : @kary_bands = @{$slice->get_all_KaryotypeBands};
2168  Description: Retrieves the karyotype bands which this slice overlaps.
2169  Returntype : listref oif Bio::EnsEMBL::KaryotypeBands
2170  Exceptions : none
2171  Caller : general, contigview
2172  Status : Stable
2173 
2174 =cut
2175 
2176 sub get_all_KaryotypeBands {
2177  my ($self) = @_;
2178  if (my $adaptor = $self->_get_CoreAdaptor('KaryotypeBand')) {
2179  return $adaptor->fetch_all_by_Slice($self);
2180  }
2181  return [];
2182 }
2183 
2184 =head2 get_repeatmasked_seq
2185 
2186  Arg [1] : listref of strings $logic_names (optional)
2187  Arg [2] : int $soft_masking_enable (optional)
2188  Arg [3] : hash reference $not_default_masking_cases (optional, default is {})
2189  The values are 0 or 1 for hard and soft masking respectively
2190  The keys of the hash should be of 2 forms
2191  "repeat_class_" . $repeat_consensus->repeat_class,
2192  e.g. "repeat_class_SINE/MIR"
2193  "repeat_name_" . $repeat_consensus->name
2194  e.g. "repeat_name_MIR"
2195  depending on which base you want to apply the not default
2196  masking either the repeat_class or repeat_name. Both can be
2197  specified in the same hash at the same time, but in that case,
2198  repeat_name setting has priority over repeat_class. For example,
2199  you may have hard masking as default, and you may want soft
2200  masking of all repeat_class SINE/MIR, but repeat_name AluSp
2201  (which are also from repeat_class SINE/MIR).
2202  Your hash will be something like {"repeat_class_SINE/MIR" => 1,
2203  "repeat_name_AluSp" => 0}
2204  Example : $rm_slice = $slice->get_repeatmasked_seq();
2205  $softrm_slice = $slice->get_repeatmasked_seq(['RepeatMask'],1);
2206  Description: Returns Bio::EnsEMBL::Slice that can be used to create repeat
2207  masked sequence instead of the regular sequence.
2208  Sequence returned by this new slice will have repeat regions
2209  hardmasked by default (sequence replaced by N) or
2210  or soft-masked when arg[2] = 1 (sequence in lowercase)
2211  Will only work with database connection to get repeat features.
2212  Returntype : Bio::EnsEMBL::RepeatMaskedSlice
2213  Exceptions : none
2214  Caller : general
2215  Status : Stable
2216 
2217 =cut
2218 
2219 sub get_repeatmasked_seq {
2220  my ($self,$logic_names,$soft_mask,$not_default_masking_cases) = @_;
2221 
2223  (-START => $self->{'start'},
2224  -END => $self->{'end'},
2225  -STRAND => $self->{'strand'},
2226  -ADAPTOR => $self->adaptor(),
2227  -SEQ => $self->{'seq'},
2228  -SEQ_REGION_NAME => $self->{'seq_region_name'},
2229  -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2230  -COORD_SYSTEM => $self->{'coord_system'},
2231  -REPEAT_MASK => $logic_names,
2232  -SOFT_MASK => $soft_mask,
2233  -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases);
2234 }
2235 
2236 
2237 
2238 =head2 _mask_features
2239 
2240  Arg [1] : reference to a string $dnaref
2241  Arg [2] : array_ref $repeats
2242  reference to a list Bio::EnsEMBL::RepeatFeature
2243  give the list of coordinates to replace with N or with
2244  lower case
2245  Arg [3] : int $soft_masking_enable (optional)
2246  Arg [4] : hash reference $not_default_masking_cases (optional, default is {})
2247  The values are 0 or 1 for hard and soft masking respectively
2248  The keys of the hash should be of 2 forms
2249  "repeat_class_" . $repeat_consensus->repeat_class,
2250  e.g. "repeat_class_SINE/MIR"
2251  "repeat_name_" . $repeat_consensus->name
2252  e.g. "repeat_name_MIR"
2253  depending on which base you want to apply the not default masking either
2254  the repeat_class or repeat_name. Both can be specified in the same hash
2255  at the same time, but in that case, repeat_name setting has priority over
2256  repeat_class. For example, you may have hard masking as default, and
2257  you may want soft masking of all repeat_class SINE/MIR,
2258  but repeat_name AluSp (which are also from repeat_class SINE/MIR).
2259  Your hash will be something like {"repeat_class_SINE/MIR" => 1,
2260  "repeat_name_AluSp" => 0}
2261  Example : none
2262  Description: replaces string positions described in the RepeatFeatures
2263  with Ns (default setting), or with the lower case equivalent
2264  (soft masking). The reference to a dna string which is passed
2265  is changed in place.
2266  Returntype : none
2267  Exceptions : none
2268  Caller : seq
2269  Status : Stable
2270 
2271 =cut
2272 
2273 sub _mask_features {
2274  my ($self,$dnaref,$repeats,$soft_mask,$not_default_masking_cases) = @_;
2275 
2276  $soft_mask = 0 unless (defined $soft_mask);
2277  $not_default_masking_cases = {} unless (defined $not_default_masking_cases);
2278 
2279  # explicit CORE::length call, to avoid any confusion with the Slice
2280  # length method
2281  my $dnalen = CORE::length($$dnaref);
2282 
2283  REP:foreach my $old_f (@{$repeats}) {
2284  my $f = $old_f->transfer( $self );
2285  my $start = $f->start;
2286  my $end = $f->end;
2287  my $length = ($end - $start) + 1;
2288 
2289  # check if we get repeat completely outside of expected slice range
2290  if ($end < 1 || $start > $dnalen) {
2291  # warning("Unexpected: Repeat completely outside slice coordinates.");
2292  next REP;
2293  }
2294 
2295  # repeat partly outside slice range, so correct
2296  # the repeat start and length to the slice size if needed
2297  if ($start < 1) {
2298  $start = 1;
2299  $length = ($end - $start) + 1;
2300  }
2301 
2302  # repeat partly outside slice range, so correct
2303  # the repeat end and length to the slice size if needed
2304  if ($end > $dnalen) {
2305  $end = $dnalen;
2306  $length = ($end - $start) + 1;
2307  }
2308 
2309  $start--;
2310 
2311  my $padstr;
2312  # if we decide to define masking on the base of the repeat_type, we'll need
2313  # to add the following, and the other commented line few lines below.
2314  my $rc_class;
2315  my $rc_name;
2316 
2317  if ($f->isa('Bio::EnsEMBL::RepeatFeature')) {
2318  $rc_class = "repeat_class_" . $f->repeat_consensus->repeat_class;
2319  $rc_name = "repeat_name_" . $f->repeat_consensus->name;
2320  }
2321 
2322  my $masking_type;
2323  $masking_type = $not_default_masking_cases->{$rc_class} if (defined $not_default_masking_cases->{$rc_class});
2324  $masking_type = $not_default_masking_cases->{$rc_name} if (defined $not_default_masking_cases->{$rc_name});
2325 
2326  $masking_type = $soft_mask unless (defined $masking_type);
2327 
2328  if ($masking_type) {
2329  $padstr = lc substr ($$dnaref,$start,$length);
2330  } else {
2331  $padstr = 'N' x $length;
2332  }
2333  substr ($$dnaref,$start,$length) = $padstr;
2334  }
2335 }
2336 
2337 
2338 =head2 get_all_SearchFeatures
2339 
2340  Arg [1] : scalar $ticket_ids
2341  Example : $slice->get_all_SearchFeatures('BLA_KpUwwWi5gY');
2342  Description: Retrieves all search features for stored blast
2343  results for the ticket that overlap this slice
2344  Returntype : listref of Bio::EnsEMBL::SeqFeatures
2345  Exceptions : none
2346  Caller : general (webby!)
2347  Status : Stable
2348 
2349 =cut
2350 
2351 sub get_all_SearchFeatures {
2352  my $self = shift;
2353  my $ticket = shift;
2354  local $_;
2355  unless($ticket) {
2356  throw("ticket argument is required");
2357  }
2358 
2359  if(!$self->adaptor()) {
2360  warning("Cannot get SearchFeatures without an attached adaptor");
2361  return [];
2362  }
2363 
2364  my $sfa = $self->adaptor()->db()->get_db_adaptor('blast');
2365 
2366  my $offset = $self->start-1;
2367 
2368  my $features = $sfa ? $sfa->get_all_SearchFeatures($ticket, $self->seq_region_name, $self->start, $self->end) : [];
2369 
2370  foreach( @$features ) {
2371  $_->start( $_->start - $offset );
2372  $_->end( $_->end - $offset );
2373  };
2374  return $features;
2375 
2376 }
2377 
2378 =head2 get_all_AssemblyExceptionFeatures
2379 
2380  Example : $slice->get_all_AssemblyExceptionFeatures();
2381  Description: Retrieves all misc features which overlap this slice. If
2382  a set code is provided only features which are members of
2383  the requested set are returned.
2384  Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures
2385  Exceptions : none
2386  Caller : general
2387  Status : Stable
2388 
2389 =cut
2390 
2391 sub get_all_AssemblyExceptionFeatures {
2392  my ($self) = @_;
2393  if(my $adaptor = $self->_get_CoreAdaptor('AssemblyExceptionFeature')) {
2394  return $adaptor->fetch_all_by_Slice($self);
2395  }
2396  return [];
2397 }
2398 
2399 
2400 
2401 =head2 get_all_MiscFeatures
2402 
2403  Arg [1] : string $set (optional)
2404  Arg [2] : string $database (optional)
2405  Example : $slice->get_all_MiscFeatures('cloneset');
2406  Description: Retrieves all misc features which overlap this slice. If
2407  a set code is provided only features which are members of
2408  the requested set are returned.
2409  Returntype : listref of Bio::EnsEMBL::MiscFeatures
2410  Exceptions : none
2411  Caller : general
2412  Status : Stable
2413 
2414 =cut
2415 
2416 sub get_all_MiscFeatures {
2417  my ($self, $misc_set, $dbtype) = @_;
2418  if(my $adaptor = $self->_get_CoreAdaptor('MiscFeature', $dbtype)) {
2419  if($misc_set) {
2420  return $adaptor->fetch_all_by_Slice_and_set_code($self,$misc_set);
2421  }
2422  return $adaptor->fetch_all_by_Slice($self);
2423  }
2424  return [];
2425 }
2426 
2427 =head2 get_all_MarkerFeatures
2428 
2429  Arg [1] : (optional) string logic_name
2430  The logic name of the marker features to retrieve
2431  Arg [2] : (optional) int $priority
2432  Lower (exclusive) priority bound of the markers to retrieve
2433  Arg [3] : (optional) int $map_weight
2434  Upper (exclusive) priority bound of the markers to retrieve
2435  Example : my @markers = @{$slice->get_all_MarkerFeatures(undef,50, 2)};
2436  Description: Retrieves all markers which lie on this slice fulfilling the
2437  specified map_weight and priority parameters (if supplied).
2438  Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
2439  Exceptions : none
2440  Caller : contigview, general
2441  Status : Stable
2442 
2443 =cut
2444 
2445 sub get_all_MarkerFeatures {
2446  my ($self, $logic_name, $priority, $map_weight) = @_;
2447  if(my $adaptor = $self->_get_CoreAdaptor('MarkerFeature')) {
2448  return $adaptor->fetch_all_by_Slice_and_priority($self, $priority, $map_weight, $logic_name);
2449  }
2450  return [];
2451 }
2452 
2453 
2454 =head2 get_MarkerFeatures_by_Name
2455 
2456  Arg [1] : string marker Name
2457  The name (synonym) of the marker feature(s) to retrieve
2458  Example : my @markers = @{$slice->get_MarkerFeatures_by_Name('z1705')};
2459  Description: Retrieves all markers with this ID
2460  Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
2461  Exceptions : none
2462  Caller : contigview, general
2463  Status : Stable
2464 
2465 =cut
2466 
2467 sub get_MarkerFeatures_by_Name {
2468  my ($self, $name) = @_;
2469  if(my $adaptor = $self->_get_CoreAdaptor('MarkerFeature')) {
2470  return $adaptor->fetch_all_by_Slice_and_MarkerName($self, $name);
2471  }
2472  return [];
2473 }
2474 
2475 
2476 =head2 get_all_compara_DnaAlignFeatures
2477 
2478  Arg [1] : string $qy_species
2479  The name of the species to retrieve similarity features from
2480  Arg [2] : string $qy_assembly
2481  The name of the assembly to retrieve similarity features from
2482  Arg [3] : string $type
2483  The type of the alignment to retrieve similarity features from
2484  Arg [4] : <optional> compara dbadptor to use.
2485  Example : $fs = $slc->get_all_compara_DnaAlignFeatures('Mus musculus',
2486  'MGSC3',
2487  'WGA');
2488  Description: Retrieves a list of DNA-DNA Alignments to the species specified
2489  by the $qy_species argument.
2490  The compara database must be attached to the core database
2491  for this call to work correctly. As well the compara database
2492  must have the core dbadaptors for both this species, and the
2493  query species added to function correctly.
2494  Returntype : reference to a list of Bio::EnsEMBL::DnaDnaAlignFeatures
2495  Exceptions : warning if compara database is not available
2496  Caller : contigview
2497  Status : Stable
2498 
2499 =cut
2500 
2501 sub get_all_compara_DnaAlignFeatures {
2502  my ($self, $qy_species, $qy_assembly, $alignment_type, $compara_db) = @_;
2503 
2504  if(!$self->adaptor()) {
2505  warning("Cannot retrieve DnaAlignFeatures without attached adaptor");
2506  return [];
2507  }
2508 
2509  unless($qy_species && $alignment_type # && $qy_assembly
2510  ) {
2511  throw("Query species and assembly and alignmemt type arguments are required");
2512  }
2513 
2514  if(!defined($compara_db)){
2515  $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara");
2516  }
2517  unless($compara_db) {
2518  warning("Compara database must be attached to core database or passed ".
2519  "as an argument to " .
2520  "retrieve compara information");
2521  return [];
2522  }
2523 
2524  my $dafa = $compara_db->get_DnaAlignFeatureAdaptor;
2525  return $dafa->fetch_all_by_Slice($self, $qy_species, $qy_assembly, $alignment_type);
2526 }
2527 
2528 =head2 get_all_compara_Syntenies
2529 
2530  Arg [1] : string $query_species e.g. "Mus_musculus" or "Mus musculus"
2531  Arg [2] : string $method_link_type, default is "SYNTENY"
2532  Arg [3] : <optional> compara dbadaptor to use.
2533  Description: gets all the compara syntenyies for a specfic species
2534  Returns : arrayref of Bio::EnsEMBL::Compara::SyntenyRegion
2535  Status : Stable
2536 
2537 =cut
2538 
2539 sub get_all_compara_Syntenies {
2540  my ($self, $qy_species, $method_link_type, $compara_db) = @_;
2541 
2542  if(!$self->adaptor()) {
2543  warning("Cannot retrieve features without attached adaptor");
2544  return [];
2545  }
2546 
2547  unless($qy_species) {
2548  throw("Query species and assembly arguments are required");
2549  }
2550 
2551  unless (defined $method_link_type) {
2552  $method_link_type = "SYNTENY";
2553  }
2554 
2555  if(!defined($compara_db)){
2556  $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara");
2557  }
2558  unless($compara_db) {
2559  warning("Compara database must be attached to core database or passed ".
2560  "as an argument to " .
2561  "retrieve compara information");
2562  return [];
2563  }
2564  my $gdba = $compara_db->get_GenomeDBAdaptor();
2565  my $mlssa = $compara_db->get_MethodLinkSpeciesSetAdaptor();
2566  my $sra = $compara_db->get_SyntenyRegionAdaptor();
2567 
2568  my $this_gdb = $gdba->fetch_by_core_DBAdaptor($self->adaptor()->db());
2569  my $query_gdb = $gdba->fetch_by_registry_name($qy_species);
2570  my $mlss;
2571  if($this_gdb eq $query_gdb) {
2572  $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb]);
2573  } else {
2574  $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb, $query_gdb]);
2575  }
2576 
2577  return $sra->fetch_all_by_MethodLinkSpeciesSet_Slice($mlss, $self);
2578 }
2579 
2580 =head2 get_all_Haplotypes
2581 
2582  Arg [1] : (optional) boolean $lite_flag
2583  if true lightweight haplotype objects are used
2584  Example : @haplotypes = $slice->get_all_Haplotypes;
2585  Description: Retrieves all of the haplotypes on this slice. Only works
2586  if the haplotype adaptor has been attached to the core adaptor
2587  via $dba->add_db_adaptor('haplotype', $hdba);
2588  Returntype : listref of Bio::EnsEMBL::External::Haplotype::Haplotypes
2589  Exceptions : warning is Haplotype database is not available
2590  Caller : contigview, general
2591  Status : Stable
2592 
2593 =cut
2594 
2595 sub get_all_Haplotypes {
2596  my($self, $lite_flag) = @_;
2597 
2598  if(!$self->adaptor()) {
2599  warning("Cannot retrieve features without attached adaptor");
2600  return [];
2601  }
2602 
2603  my $haplo_db = $self->adaptor->db->get_db_adaptor('haplotype');
2604 
2605  unless($haplo_db) {
2606  warning("Haplotype database must be attached to core database to " .
2607  "retrieve haplotype information" );
2608  return [];
2609  }
2610 
2611  my $haplo_adaptor = $haplo_db->get_HaplotypeAdaptor;
2612 
2613  my $haplotypes = $haplo_adaptor->fetch_all_by_Slice($self, $lite_flag);
2614 
2615  return $haplotypes;
2616 }
2617 
2618 
2619 sub get_all_DASFactories {
2620  my $self = shift;
2621  return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
2622 }
2623 
2624 sub get_all_DASFeatures_dsn {
2625  my ($self, $source_type, $dsn) = @_;
2626 
2627  if(!$self->adaptor()) {
2628  warning("Cannot retrieve features without attached adaptor");
2629  return [];
2630  }
2631  my @X = grep { $_->adaptor->dsn eq $dsn } $self->adaptor()->db()->_each_DASFeatureFactory;
2632 
2633  return [ $X[0]->fetch_all_Features( $self, $source_type ) ];
2634 }
2635 
2636 =head2 get_all_DAS_Features
2637 
2638  Arg [1] : none
2639  Example : $features = $slice->get_all_DASFeatures;
2640  Description: Retrieves a hash reference to a hash of DAS feature
2641  sets, keyed by the DNS, NOTE the values of this hash
2642  are an anonymous array containing:
2643  (1) a pointer to an array of features;
2644  (2) a pointer to the DAS stylesheet
2645  Returntype : hashref of Bio::SeqFeatures
2646  Exceptions : ?
2647  Caller : webcode
2648  Status : Stable
2649 
2650 =cut
2651 sub get_all_DAS_Features{
2652  my ($self) = @_;
2653 
2654  $self->{_das_features} ||= {}; # Cache
2655  $self->{_das_styles} ||= {}; # Cache
2656  $self->{_das_segments} ||= {}; # Cache
2657  my %das_features;
2658  my %das_styles;
2659  my %das_segments;
2660  my $slice = $self;
2661 
2662  foreach my $dasfact( @{$self->get_all_DASFactories} ){
2663  my $dsn = $dasfact->adaptor->dsn;
2664  my $name = $dasfact->adaptor->name;
2665 # my $type = $dasfact->adaptor->type;
2666  my $url = $dasfact->adaptor->url;
2667 
2668  my ($type) = $dasfact->adaptor->mapping;
2669  if (ref $type eq 'ARRAY') {
2670  $type = shift @$type;
2671  }
2672  $type ||= $dasfact->adaptor->type;
2673  # Construct a cache key : SOURCE_URL/TYPE
2674  # Need the type to handle sources that serve multiple types of features
2675 
2676  my $key = join('/', $name, $type);
2677  if( $self->{_das_features}->{$key} ){ # Use cached
2678  $das_features{$name} = $self->{_das_features}->{$key};
2679  $das_styles{$name} = $self->{_das_styles}->{$key};
2680  $das_segments{$name} = $self->{_das_segments}->{$key};
2681  } else { # Get fresh data
2682  my ($featref, $styleref, $segref) = $dasfact->fetch_all_Features( $slice, $type );
2683  $self->{_das_features}->{$key} = $featref;
2684  $self->{_das_styles}->{$key} = $styleref;
2685  $self->{_das_segments}->{$key} = $segref;
2686  $das_features{$name} = $featref;
2687  $das_styles{$name} = $styleref;
2688  $das_segments{$name} = $segref;
2689  }
2690  }
2691 
2692  return (\%das_features, \%das_styles, \%das_segments);
2693 }
2694 
2695 sub get_all_DASFeatures{
2696  my ($self, $source_type) = @_;
2697 
2698 
2699  if(!$self->adaptor()) {
2700  warning("Cannot retrieve features without attached adaptor");
2701  return [];
2702  }
2703 
2704  my %genomic_features = map { ( $_->adaptor->dsn => [ $_->fetch_all_Features($self, $source_type) ] ) } $self->adaptor()->db()->_each_DASFeatureFactory;
2705  return \%genomic_features;
2706 
2707 }
2708 
2709 sub old_get_all_DASFeatures{
2710  my ($self,@args) = @_;
2711 
2712  if(!$self->adaptor()) {
2713  warning("Cannot retrieve features without attached adaptor");
2714  return [];
2715  }
2716 
2717  my %genomic_features =
2718  map { ( $_->adaptor->dsn => [ $_->fetch_all_by_Slice($self) ] ) }
2719  $self->adaptor()->db()->_each_DASFeatureFactory;
2720  return \%genomic_features;
2721 
2722 }
2723 
2724 
2725 =head2 get_all_ExternalFeatures
2726 
2727  Arg [1] : (optional) string $track_name
2728  If specified only features from ExternalFeatureAdaptors with
2729  the track name $track_name are retrieved.
2730  If not set, all features from every ExternalFeatureAdaptor are
2731  retrieved.
2732  Example : @x_features = @{$slice->get_all_ExternalFeatures}
2733  Description: Retrieves features on this slice from external feature adaptors
2734  Returntype : listref of Bio::SeqFeatureI implementing objects in slice
2735  coordinates
2736  Exceptions : none
2737  Caller : general
2738  Status : Stable
2739 
2740 =cut
2741 
2742 sub get_all_ExternalFeatures {
2743  my ($self, $track_name) = @_;
2744 
2745  if(!$self->adaptor()) {
2746  warning("Cannot retrieve features without attached adaptor");
2747  return [];
2748  }
2749 
2750  my $features = [];
2751 
2752  my $xfa_hash = $self->adaptor->db->get_ExternalFeatureAdaptors;
2753  my @xf_adaptors = ();
2754 
2755  if($track_name) {
2756  #use a specific adaptor
2757  if(exists $xfa_hash->{$track_name}) {
2758  push @xf_adaptors, $xfa_hash->{$track_name};
2759  }
2760  } else {
2761  #use all of the adaptors
2762  push @xf_adaptors, values %$xfa_hash;
2763  }
2764 
2765 
2766  foreach my $xfa (@xf_adaptors) {
2767  push @$features, @{$xfa->fetch_all_by_Slice($self)};
2768  }
2769 
2770  return $features;
2771 }
2772 
2773 
2774 =head2 get_all_DitagFeatures
2775 
2776  Arg [1] : (optional) string ditag type
2777  Arg [1] : (optional) string logic_name
2778  Example : @dna_dna_align_feats = @{$slice->get_all_DitagFeatures};
2779  Description: Retrieves the DitagFeatures of a specific type which overlap
2780  this slice. If type is not defined, all features are retrieved.
2781  Strandedness of the Slice is ignored.
2782  Returntype : listref of Bio::EnsEMBL::DitagFeatures
2783  Exceptions : warning if slice does not have attached adaptor
2784  Caller : general
2785  Status : Stable
2786 
2787 =cut
2788 
2789 sub get_all_DitagFeatures {
2790  my ($self, $type, $logic_name) = @_;
2791  if(my $adaptor = $self->_get_CoreAdaptor('DitagFeature')) {
2792  return $adaptor->fetch_all_by_Slice($self, $type, $logic_name);
2793  }
2794  return [];
2795 }
2796 
2797 # GENERIC FEATURES (See DBAdaptor.pm)
2798 
2799 =head2 get_generic_features
2800 
2801  Arg [1] : (optional) List of names of generic feature types to return.
2802  If no feature names are given, all generic features are
2803  returned.
2804  Example : my %features = %{$slice->get_generic_features()};
2805  Description: Gets generic features via the generic feature adaptors that
2806  have been added via DBAdaptor->add_GenricFeatureAdaptor (if
2807  any)
2808  Returntype : Hash of named features.
2809  Exceptions : none
2810  Caller : none
2811  Status : Stable
2812 
2813 =cut
2814 
2815 sub get_generic_features {
2816 
2817  my ($self, @names) = @_;
2818 
2819  if(!$self->adaptor()) {
2820  warning('Cannot retrieve features without attached adaptor');
2821  return [];
2822  }
2823 
2824  my $db = $self->adaptor()->db();
2825 
2826  my %features = (); # this will hold the results
2827 
2828  # get the adaptors for each feature
2829  my %adaptors = %{$db->get_GenericFeatureAdaptors(@names)};
2830 
2831  foreach my $adaptor_name (keys(%adaptors)) {
2832 
2833  my $adaptor_obj = $adaptors{$adaptor_name};
2834  # get the features and add them to the hash
2835  my $features_ref = $adaptor_obj->fetch_all_by_Slice($self);
2836  # add each feature to the hash to be returned
2837  foreach my $feature (@$features_ref) {
2838  $features{$adaptor_name} = $feature;
2839  }
2840  }
2841 
2842  return \%features;
2843 
2844 }
2845 
2846 =head2 project_to_slice
2847 
2848  Arg [1] : Slice to project to.
2849  Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice);
2850  foreach my $segment ( @$chr_projection ){
2851  $chr_slice = $segment->to_Slice();
2852  print $clone_slice->seq_region_name(). ':'. $segment->from_start(). '-'.
2853  $segment->from_end(). ' -> '.$chr_slice->seq_region_name(). ':'. $chr_slice->start().
2854  '-'.$chr_slice->end().
2855  $chr_slice->strand(). " length: ".($chr_slice->end()-$chr_slice->start()+1). "\n";
2856  }
2857  Description: Projection of slice to another specific slice. Needed for where we have multiple mappings
2858  and we want to state which one to project to.
2859  Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
2860  can also be used as [$start,$end,$slice] triplets.
2861  Exceptions : none
2862  Caller : none
2863  Status : At Risk
2864 
2865 =cut
2866 
2867 sub project_to_slice {
2868  my $self = shift;
2869  my $to_slice = shift;
2870 
2871  throw('Slice argument is required') if(!$to_slice);
2872 
2873  my $slice_adaptor = $self->adaptor();
2874 
2875  if(!$slice_adaptor) {
2876  warning("Cannot project without attached adaptor.");
2877  return [];
2878  }
2879 
2880 
2881  my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor();
2882 
2883  my $cs = $to_slice->coord_system();
2884  my $slice_cs = $self->coord_system();
2885  my $to_slice_id = $to_slice->get_seq_region_id;
2886 
2887  my @projection;
2888 
2889  # decompose this slice into its symlinked components.
2890  # this allows us to handle haplotypes and PARs
2891  my $normal_slice_proj =
2892  $slice_adaptor->fetch_normalized_slice_projection($self);
2893  foreach my $segment (@$normal_slice_proj) {
2894  my $normal_slice = $segment->[2];
2895 
2896  $slice_cs = $normal_slice->coord_system();
2897 
2898  my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor();
2899  my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
2900  next unless defined $asm_mapper;
2901 
2902  # perform the mapping between this slice and the requested system
2903  my @coords = $asm_mapper->map($normal_slice->seq_region_name(),
2904  $normal_slice->start(),
2905  $normal_slice->end(),
2906  $normal_slice->strand(),
2907  $slice_cs, undef, $to_slice, 1);
2908 
2909  #construct a projection from the mapping results and return it
2910  foreach my $coord (@coords) {
2911  my $original = $coord->{original};
2912  my $mapped = $coord->{mapped};
2913 
2914  # skip gaps
2915  next unless $mapped->isa('Bio::EnsEMBL::Mapper::Coordinate');
2916 
2917  my $mapped_start = $mapped->start();
2918  my $mapped_end = $mapped->end();
2919  my ($current_start, $current_end);
2920  if ($self->strand == 1) {
2921  $current_start = $original->start() - $self->start + 1;
2922  $current_end = $original->end() - $self->start + 1;
2923  } else {
2924  $current_start = $self->end() - $original->end + 1;
2925  $current_end = $self->end - $original->start + 1;
2926  }
2927 
2928  # for multiple mappings only get the correct one
2929  next unless $mapped->id == $to_slice_id;
2930 
2931  my $coord_cs = $mapped->coord_system();
2932 
2933  # If the normalised projection just ended up mapping to the
2934  # same coordinate system we were already in then we should just
2935  # return the original region. This can happen for example, if we
2936  # were on a PAR region on Y which refered to X and a projection to
2937  # 'toplevel' was requested.
2938  # if($coord_cs->equals($slice_cs)) {
2939  # # trim off regions which are not defined
2940  # return $self->_constrain_to_region();
2941  # }
2942 
2943  # create slices for the mapped-to coord system
2944  my $slice = $slice_adaptor->fetch_by_seq_region_id($mapped->id(),
2945  $mapped_start,
2946  $mapped_end,
2947  $mapped->strand());
2948 
2949  push @projection, bless([$current_start, $current_end, $slice],
2950  "Bio::EnsEMBL::ProjectionSegment");
2951  }
2952  }
2953 
2954 
2955  # delete the cache as we may want to map to different set next time and old
2956  # results will be cached.
2957  $mapper_aptr->delete_cache;
2958 
2959  return \@projection;
2960 }
2961 
2962 
2963 =head2 get_all_synonyms
2964 
2965  Args [1] : String external_db_name The name of the database to retrieve
2966  the synonym for
2967  Args [2] : (optional) Integer external_db_version Optionally restrict
2968  results from external_db_name to a specific version of
2969  the the specified external database
2970  Example : my @alternative_names = @{$slice->get_all_synonyms()};
2971  @alternative_names = @{$slice->get_all_synonyms('EMBL')};
2972  Description: get a list of alternative names for this slice
2973  Returntype : reference to list of SeqRegionSynonym objects.
2974  Exception : none
2975  Caller : general
2976  Status : At Risk
2977 
2978 =cut
2979 
2980 sub get_all_synonyms {
2981  my ($self, $external_db_name, $external_db_version) = @_;
2982 
2983  if ( !defined( $self->{'synonym'} ) ) {
2984  my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
2985  $self->{'synonym'} =
2986  $adap->get_synonyms( $self->get_seq_region_id($self) );
2987  }
2988 
2989  if(! $external_db_name) {
2990  return $self->{'synonym'};
2991  }
2992  my @args = ($external_db_version) ?
2993  ($external_db_name, $external_db_version) :
2994  ($external_db_name, undef, 1);
2995  my $external_db_id = $self->adaptor->db()->get_DBEntryAdaptor()->get_external_db_id(@args);
2996  if(!$external_db_id) {
2997  my $extra = ($external_db_version) ? "and version $external_db_version " : q{};
2998  throw "The external database $external_db_name ${extra}did not result in a valid identifier";
2999  }
3000 
3001  return [ grep { $_->external_db_id() == $external_db_id } @{$self->{synonym}} ];
3002 }
3003 
3004 =head2 add_synonym
3005 
3006  Args[0] : synonym.
3007  Example : $slice->add_synonym("alt_name");
3008  Description: add an alternative name for this slice
3009  Returntype : none
3010  Exception : none
3011  Caller : general
3012  Status : At Risk
3013 
3014 =cut
3015 
3016 sub add_synonym{
3017  my $self = shift;
3018  my $syn = shift;
3019  my $external_db_id = shift;
3020 
3021  my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
3022  if ( !defined( $self->{'synonym'} ) ) {
3023  $self->{'synonym'} = $self->get_all_synonyms();
3024  }
3025  my $new_syn = Bio::EnsEMBL::SeqRegionSynonym->new( #-adaptor => $adap,
3026  -synonym => $syn,
3027  -external_db_id => $external_db_id,
3028  -seq_region_id => $self->get_seq_region_id($self));
3029 
3030  push (@{$self->{'synonym'}}, $new_syn);
3031 
3032  return;
3033 }
3034 
3035 
3036 # package variable to minimize duplication
3037 my %region_so_mapping = (
3038  'chromosome' => 'SO:0000340',
3039  'supercontig' => 'SO:0000148',
3040  'scaffold' => 'SO:0000148',
3041  'contig' => 'SO:0000149'
3042 );
3043 
3044 =head2 feature_so_acc
3045 
3046  Example : $slice_so_acc = $slice->feature_so_acc;
3047  Description : This method returns a string containing the SO accession number of the slice, based on the coordinate system name.
3048  Returns : string (Sequence Ontology accession number)
3049 
3050 =cut
3051 
3052 sub feature_so_acc {
3053  my $self = shift;
3054 
3055  # return the region SO acc, or Slice acc
3056  return $region_so_mapping{$self->coord_system_name} // 'SO:0000001';
3057 }
3058 
3059 =head2 feature_so_term
3060 
3061  Description: This method returns a string containing the SO term of the slice, based on the coordinate system name
3062  Define constant SEQUENCE_ONTOLOGY in classes that require it, or override it for multiple possible values for a class.
3063  Returntype : String (Sequence Ontology term)
3064  Exceptions : Thrown if caller SEQUENCE_ONTOLOGY is undefined and is not a Bio::EnsEMBL::Slice
3065 
3066 =cut
3067 
3068 sub feature_so_term {
3069  my ($self) = shift;
3070 
3071  return defined($region_so_mapping{$self->coord_system_name}) ?
3072  $self->coord_system_name :
3073  'region';
3074 }
3075 
3076 =head2 summary_as_hash
3077 
3078  Example : $slice_summary = $slice->summary_as_hash();
3079  Description : Retrieves a textual summary of this slice.
3080  Returns : hashref of descriptive strings
3081 =cut
3082 
3083 sub summary_as_hash {
3084  my $self = shift;
3085  my %summary;
3086  my @aliases = map { $_->name } @{$self->slice->get_all_synonyms()};
3087 
3088  $summary{'seq_region_name'} = $self->seq_region_name;
3089  $summary{'id'} = $self->seq_region_name;
3090  $summary{'start'} = $self->start;
3091  $summary{'end'} = $self->end;
3092  $summary{'strand'} = 0;
3093  $summary{'source'} = $self->source || $self->coord_system->version;
3094  $summary{'Alias'} = \@aliases if scalar(@aliases);
3095  $summary{'Is_circular'} = $self->is_circular ? "true" : undef;
3096  return \%summary;
3097 }
3098 
3099 
3100 sub slice {
3101  my $self = shift;
3102  return $self;
3103 }
3104 
3105 #
3106 # Bioperl Bio::PrimarySeqI methods:
3107 #
3108 
3109 =head2 id
3110 
3111  Description: Included for Bio::PrimarySeqI interface compliance (0.7)
3112 
3113 =cut
3114 
3115 sub id { name(@_); }
3116 
3117 
3118 =head2 display_id
3119 
3120  Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3121 
3122 =cut
3123 
3124 sub display_id { name(@_); }
3125 
3126 
3127 =head2 primary_id
3128 
3129  Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3130 
3131 =cut
3132 
3133 sub primary_id { name(@_); }
3134 
3135 
3136 =head2 desc
3137 
3138 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3139 
3140 =cut
3141 
3142 sub desc{ return $_[0]->coord_system->name().' '.$_[0]->seq_region_name(); }
3143 
3144 
3145 =head2 moltype
3146 
3147 Description: Included for Bio::PrimarySeqI interface compliance (0.7)
3148 
3149 =cut
3150 
3151 sub moltype { return 'dna'; }
3152 
3153 =head2 alphabet
3154 
3155  Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3156 
3157 =cut
3158 
3159 sub alphabet { return 'dna'; }
3160 
3161 
3162 =head2 accession_number
3163 
3164  Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3165 
3166 =cut
3167 
3168 sub accession_number { name(@_); }
3169 
3170 1;
usage
public usage()
Bio::EnsEMBL::Slice::coord_system_name
public String coord_system_name()
Bio::EnsEMBL::Mapper::RangeRegistry
Definition: RangeRegistry.pm:51
EnsEMBL
Definition: Filter.pm:1
Bio::EnsEMBL::ProjectionSegment
Definition: ProjectionSegment.pm:27
map
public map()
Bio::EnsEMBL::CoordSystem::name
public String name()
Bio::EnsEMBL::DBSQL::MergedAdaptor
Definition: MergedAdaptor.pm:36
Bio::EnsEMBL::Utils::Sequence
Definition: Sequence.pm:22
accession
public accession()
Bio::EnsEMBL::CoordSystem
Definition: CoordSystem.pm:40
Bio::EnsEMBL::DBSQL::SliceAdaptor
Definition: SliceAdaptor.pm:78
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio::EnsEMBL::Slice::seq
public String seq()
Bio::EnsEMBL::SeqRegionSynonym
Definition: SeqRegionSynonym.pm:16
Bio::EnsEMBL::Registry
Definition: Registry.pm:113
Bio::EnsEMBL::Registry::get_DBAdaptor
public DBAdaptor get_DBAdaptor()
Bio::EnsEMBL::Slice::new
public Bio::EnsEMBL::Slice new()
Bio::EnsEMBL::Slice::seq_region_name
public String seq_region_name()
has_karyotype
public has_karyotype()
Bio::EnsEMBL::Slice::start
public Int start()
Bio::EnsEMBL::RepeatMaskedSlice::new
public Bio::EnsEMBL::RepeatMaskedSlice new()
Bio::EnsEMBL::Utils::Iterator
Definition: Iterator.pm:44
Bio::EnsEMBL::DBSQL::BaseAdaptor
Definition: BaseAdaptor.pm:71
get_seq_region_id
public get_seq_region_id()
Bio::EnsEMBL::RepeatFeature
Definition: RepeatFeature.pm:45
Bio::EnsEMBL::Attribute
Definition: Attribute.pm:34
Bio::EnsEMBL::Registry::get_db
public Adaptor get_db()
Bio::EnsEMBL::RepeatMaskedSlice
Definition: RepeatMaskedSlice.pm:28
Bio::EnsEMBL::Utils::Iterator::new
public Bio::EnsEMBL::Utils::Iterator new()
Bio::EnsEMBL::SeqRegionSynonym::new
public Bio::EnsEMBL::SeqRegionSynonym new()
Bio::EnsEMBL::PredictionTranscript
Definition: PredictionTranscript.pm:39
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68