ensembl-hive  2.8.1
Slice.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::Slice - Arbitary Slice of a genome
34 
35 =head1 SYNOPSIS
36 
37  $sa = $db->get_SliceAdaptor;
38 
39  $slice =
40  $sa->fetch_by_region( 'chromosome', 'X', 1_000_000, 2_000_000 );
41 
42  # get some attributes of the slice
43  my $seqname = $slice->seq_region_name();
44  my $start = $slice->start();
45  my $end = $slice->end();
46 
47  # get the sequence from the slice
48  my $seq = $slice->seq();
49 
50  # get some features from the slice
51  foreach $gene ( @{ $slice->get_all_Genes } ) {
52  # do something with a gene
53  }
54 
55  foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
56  # do something with dna-dna alignments
57  }
58 
59 =head1 DESCRIPTION
60 
61 A Slice object represents a region of a genome. It can be used to retrieve
62 sequence or features from an area of interest.
63 
64 NOTE: The Slice is defined by its Strand, but normal behaviour for get_all_*
65 methods is to return Features on both Strands.
66 
67 =head1 METHODS
68 
69 =cut
70 
71 package Bio::EnsEMBL::Slice;
72 
73 use vars qw(@ISA);
74 use strict;
75 
76 use Bio::PrimarySeqI;
77 
78 
79 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
80 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
82 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
87 
90 use Scalar::Util qw(weaken isweak);
91 
92 # use Data::Dumper;
93 
94 my $registry = "Bio::EnsEMBL::Registry";
95 
96 @ISA = qw(Bio::PrimarySeqI);
97 
98 
99 =head2 new
100 
101  Arg [...] : List of named arguments
102  Bio::EnsEMBL::CoordSystem COORD_SYSTEM
103  string SEQ_REGION_NAME,
104  int START,
105  int END,
106  int SEQ_REGION_LENGTH, (optional)
107  string SEQ (optional)
108  int STRAND, (optional, defaults to 1)
109  Bio::EnsEMBL::DBSQL::SliceAdaptor ADAPTOR (optional)
110  Example : $slice = Bio::EnsEMBL::Slice->new(-coord_system => $cs,
111  -start => 1,
112  -end => 10000,
113  -strand => 1,
114  -seq_region_name => 'X',
115  -seq_region_length => 12e6,
116  -adaptor => $slice_adaptor);
117  Description: Creates a new slice object. A slice represents a region
118  of sequence in a particular coordinate system. Slices can be
119  used to retrieve sequence and features from an area of
120  interest in a genome.
121 
122  Coordinates start at 1 and are inclusive. Negative
123  coordinates or coordinates exceeding the length of the
124  seq_region are permitted. Start must be less than or equal.
125  to end regardless of the strand.
126 
127  Slice objects are immutable. Once instantiated their attributes
128  (with the exception of the adaptor) may not be altered. To
129  change the attributes a new slice must be created.
130  Returntype : Bio::EnsEMBL::Slice
131  Exceptions : throws if start, end, coordsystem or seq_region_name not specified or not of the correct type
132  Caller : general, Bio::EnsEMBL::SliceAdaptor
133  Status : Stable
134 
135 =cut
136 
137 sub new {
138  my $caller = shift;
139 
140  #new can be called as a class or object method
141  my $class = ref($caller) || $caller;
142 
143  my ($seq, $coord_system, $seq_region_name, $seq_region_length,
144  $start, $end, $strand, $adaptor, $empty) =
145  rearrange([qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
146  START END STRAND ADAPTOR EMPTY)], @_);
147 
148  if ( !defined($seq_region_name) ) {
149  throw('SEQ_REGION_NAME argument is required');
150  }
151  if ( !defined($start) ) { throw('START argument is required') }
152  if ( !defined($end) ) { throw('END argument is required') }
153 
154  ## if ( $start > $end + 1 ) {
155  ## throw('start must be less than or equal to end+1');
156  ## }
157 
158  if ( !defined($seq_region_length) ) { $seq_region_length = $end }
159 
160  if ( $seq_region_length <= 0 ) {
161  throw('SEQ_REGION_LENGTH must be > 0');
162  }
163 
164  if ( defined($seq) && CORE::length($seq) != ( $end - $start + 1 ) ) {
165  throw('SEQ must be the same length as the defined LENGTH not '
166  . CORE::length($seq)
167  . ' compared to '
168  . ( $end - $start + 1 ) );
169  }
170 
171  if(defined($coord_system)) {
172  if(!ref($coord_system) || !$coord_system->isa('Bio::EnsEMBL::CoordSystem')){
173  throw('COORD_SYSTEM argument must be a Bio::EnsEMBL::CoordSystem');
174  }
175  if($coord_system->is_top_level()) {
176  throw('Cannot create slice on toplevel CoordSystem.');
177  }
178  } else {
179  warning("Slice without coordinate system");
180  }
181 
182  $strand ||= 1;
183 
184  if($strand != 1 && $strand != -1) {
185  throw('STRAND argument must be -1 or 1');
186  }
187 
188  if(defined($adaptor)) {
189  if(!ref($adaptor) || !$adaptor->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
190  throw('ADAPTOR argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
191  }
192  }
193 
194  my $self = bless {'coord_system' => $coord_system,
195  'seq' => $seq,
196  'seq_region_name' => $seq_region_name,
197  'seq_region_length' => $seq_region_length,
198  'start' => int($start),
199  'end' => int($end),
200  'strand' => $strand}, $class;
201 
202  $self->adaptor($adaptor);
203 
204  return $self;
205 
206 }
207 
208 =head2 new_fast
209 
210  Arg [1] : hashref to be blessed
211  Description: Construct a new Bio::EnsEMBL::Slice using the hashref.
212  Exceptions : none
213  Returntype : Bio::EnsEMBL::Slice
214  Caller : general
215  Status : Stable
216 
217 =cut
218 
219 
220 sub new_fast {
221  my $class = shift;
222  my $hashref = shift;
223  my $self = bless $hashref, $class;
224  weaken($self->{adaptor}) if ( ! isweak($self->{adaptor}) );
225  return $self;
226 }
227 
228 =head2 adaptor
229 
230  Arg [1] : (optional) Bio::EnsEMBL::DBSQL::SliceAdaptor $adaptor
231  Example : $adaptor = $slice->adaptor();
232  Description: Getter/Setter for the slice object adaptor used
233  by this slice for database interaction.
235  Exceptions : thorws if argument passed is not a SliceAdaptor
236  Caller : general
237  Status : Stable
238 
239 =cut
240 
241 sub adaptor{
242  my $self = shift;
243 
244  if(@_) {
245  my $ad = shift;
246  if(defined($ad)) {
247  if(!ref($ad) || !$ad->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
248  throw('Argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
249  }
250  }
251  weaken($self->{'adaptor'} = $ad);
252  }
253 
254  return $self->{'adaptor'};
255 }
256 
257 
258 
259 =head2 seq_region_name
260 
261  Arg [1] : none
262  Example : $seq_region = $slice->seq_region_name();
263  Description: Returns the name of the seq_region that this slice is on. For
264  example if this slice is in chromosomal coordinates the
265  seq_region_name might be 'X' or '10'.
266 
267  This function was formerly named chr_name, but since slices can
268  now be on coordinate systems other than chromosomal it has been
269  changed.
270  Returntype : string
271  Exceptions : none
272  Caller : general
273  Status : Stable
274 
275 =cut
276 
277 sub seq_region_name {
278  my $self = shift;
279  return $self->{'seq_region_name'};
280 }
281 
282 =head2 seq_region_start
283 
284  Example : $seq_region_start = $slice->seq_region_start();
285  Description: Returns the start position of this slice relative to the
286  start of the sequence region that it was created on.
287  Since slices are always in genomic coordinates this is
288  an alias to start()
289  Returntype : int
290  Exceptions : none
291  Caller : general
292  Status : Stable
293 
294 =cut
295 
296 sub seq_region_start {
297  my $self = shift;
298  return $self->start();
299 }
300 
301 =head2 seq_region_end
302 
303  Example : $seq_region_end = $slice->seq_region_end();
304  Description: Returns the end position of this slice relative to the
305  start of the sequence region that it was created on.
306  Since slices are always in genomic coordinates this is
307  an alias to end()
308  Returntype : int
309  Exceptions : none
310  Caller : general
311  Status : Stable
312 
313 =cut
314 
315 sub seq_region_end {
316  my $self = shift;
317  return $self->end();
318 }
319 
320 =head2 seq_region_length
321 
322  Arg [1] : none
323  Example : $seq_region_length = $slice->seq_region_length();
324  Description: Returns the length of the entire seq_region that this slice is
325  on. For example if this slice is on a chromosome this will be
326  the length (in basepairs) of the entire chromosome.
327  Returntype : int
328  Exceptions : none
329  Caller : general
330  Status : Stable
331 
332 =cut
333 
334 sub seq_region_length {
335  my $self = shift;
336  return $self->{'seq_region_length'};
337 }
338 
339 
340 =head2 coord_system
341 
342  Arg [1] : none
343  Example : print $slice->coord_system->name();
344  Description: Returns the coordinate system that this slice is on.
345  Returntype : Bio::EnsEMBL::CoordSystem
346  Exceptions : none
347  Caller : general
348  Status : Stable
349 
350 =cut
351 
352 sub coord_system {
353  my $self = shift;
354  return $self->{'coord_system'};
355 }
356 
357 =head2 source
358 
359  Arg [1] : (optional) String $value
360  Example : print $slice->source();
361  Description: Returns the source this slice is coming from
362  Returntype : string
363  Exceptions : none
364  Caller : general
365  Status : Stable
366 
367 =cut
368 
369 sub source {
370  my $self = shift;
371  $self->{'source'} = shift if (@_);
372  return $self->{'source'};
373 }
374 
375 =head2 coord_system_name
376 
377  Arg [1] : none
378  Example : print $slice->coord_system_name()
379  Description: Convenience method. Gets the name of the coord_system which
380  this slice is on.
381  Returns undef if this Slice does not have an attached
382  CoordSystem.
383  Returntype: string or undef
384  Exceptions: none
385  Caller : general
386  Status : Stable
387 
388 =cut
389 
390 sub coord_system_name {
391  my $self = shift;
392  my $csystem = $self->{'coord_system'};
393  return ($csystem) ? $csystem->name() : undef;
394 }
395 
396 
397 =head2 centrepoint
398 
399  Arg [1] : none
400  Example : $cp = $slice->centrepoint();
401  Description: Returns the mid position of this slice relative to the
402  start of the sequence region that it was created on.
403  Coordinates are inclusive and start at 1.
404  Returntype : int
405  Exceptions : none
406  Caller : general
407  Status : Stable
408 
409 =cut
410 
411 sub centrepoint {
412  my $self = shift;
413  return ($self->{'start'}+$self->{'end'})/2;
414 }
415 
416 =head2 start
417 
418  Arg [1] : none
419  Example : $start = $slice->start();
420  Description: Returns the start position of this slice relative to the
421  start of the sequence region that it was created on.
422  Coordinates are inclusive and start at 1. Negative coordinates
423  or coordinates exceeding the length of the sequence region are
424  permitted. Start is always less than or equal to end
425  regardless of the orientation of the slice.
426  Returntype : int
427  Exceptions : none
428  Caller : general
429  Status : Stable
430 
431 =cut
432 
433 sub start {
434  my $self = shift;
435  return $self->{'start'};
436 }
437 
438 
439 
440 =head2 end
441 
442  Arg [1] : none
443  Example : $end = $slice->end();
444  Description: Returns the end position of this slice relative to the
445  start of the sequence region that it was created on.
446  Coordinates are inclusive and start at 1. Negative coordinates
447  or coordinates exceeding the length of the sequence region are
448  permitted. End is always greater than or equal to start
449  regardless of the orientation of the slice.
450  Returntype : int
451  Exceptions : none
452  Caller : general
453  Status : Stable
454 
455 =cut
456 
457 sub end {
458  my $self = shift;
459  return $self->{'end'};
460 }
461 
462 
463 
464 =head2 strand
465 
466  Arg [1] : none
467  Example : $strand = $slice->strand();
468  Description: Returns the orientation of this slice on the seq_region it has
469  been created on
470  Returntype : int (either 1 or -1)
471  Exceptions : none
472  Caller : general, invert
473  Status : Stable
474 
475 =cut
476 
477 sub strand{
478  my $self = shift;
479  return $self->{'strand'};
480 }
481 
482 
483 
484 
485 
486 =head2 name
487 
488  Arg [1] : none
489  Example : my $results = $cache{$slice->name()};
490  Description: Returns the name of this slice. The name is formatted as a colon
491  delimited string with the following attributes:
492  coord_system:version:seq_region_name:start:end:strand
493 
494  Slices with the same name are equivalent and thus the name can
495  act as a hash key.
496  Returntype : string
497  Exceptions : none
498  Caller : general
499  Status : Stable
500 
501 =cut
502 
503 sub name {
504  my $self = shift;
505 
506  my $cs = $self->{'coord_system'};
507 
508  return join(':',
509  ($cs) ? $cs->name() : '',
510  ($cs) ? $cs->version() : '',
511  $self->{'seq_region_name'},
512  $self->{'start'},
513  $self->{'end'},
514  $self->{'strand'});
515 }
516 
517 
518 
519 =head2 length
520 
521  Arg [1] : none
522  Example : $length = $slice->length();
523  Description: Returns the length of this slice in basepairs
524  Returntype : int
525  Exceptions : none
526  Caller : general
527  Status : Stable
528 
529 =cut
530 
531 sub length {
532  my ($self) = @_;
533 
534  my $length = $self->{'end'} - $self->{'start'} + 1;
535 
536  if ( $self->{'start'} > $self->{'end'} && $self->is_circular() ) {
537  $length += $self->{'seq_region_length'};
538  }
539 
540  return $length;
541 }
542 
543 =head2 is_reference
544  Arg : none
545  Example : my $reference = $slice->is_reference()
546  Description: Returns 1 if slice is a reference slice else 0
547  Returntype : int
548  Caller : general
549  Status : At Risk
550 
551 =cut
552 
553 sub is_reference {
554  my ($self) = @_;
555 
556  if ( !defined( $self->{'is_reference'} ) ) {
557  $self->{'is_reference'} =
558  $self->adaptor()->is_reference( $self->get_seq_region_id() );
559  }
560 
561  return $self->{'is_reference'};
562 }
563 
564 =head2 is_toplevel
565  Arg : none
566  Example : my $top = $slice->is_toplevel()
567  Description: Returns 1 if slice is a toplevel slice else 0
568  Returntype : int
569  Caller : general
570  Status : At Risk
571 
572 =cut
573 
574 sub is_toplevel {
575  my ($self) = @_;
576 
577  if ( !defined( $self->{'toplevel'} ) ) {
578  $self->{'toplevel'} =
579  $self->adaptor()->is_toplevel( $self->get_seq_region_id() );
580  }
581 
582  return $self->{'toplevel'};
583 }
584 
585 =head2 has_karyotype
586  Arg : none
587  Example : my $top = $slice->has_karyotype()
588  Description: Returns 1 if slice is part of the karyotype else 0
589  Returntype : int
590  Caller : general
591  Status : At Risk
592 
593 =cut
594 
595 sub has_karyotype {
596  my ($self) = @_;
597 
598  if ( !defined( $self->{'karyotype'} ) ) {
599  $self->{'karyotype'} =
600  $self->adaptor()->has_karyotype( $self->get_seq_region_id() );
601  }
602 
603  return $self->{'karyotype'};
604 }
605 
606 =head2 karyotype_rank
607  Arg : none
608  Example : my $rank = $slice->karyotype_rank()
609  Description: Returns the numeric ranking in the karyotype. Otherwise 0 is returned
610  Returntype : int
611  Caller : general
612  Status : At Risk
613 
614 =cut
615 
616 sub karyotype_rank {
617  my ($self) = @_;
618  if(! defined( $self->{karyotype_rank})) {
619  my $rank = $self->adaptor()->get_karyotype_rank($self->get_seq_region_id());
620  $self->{karyotype_rank} = $rank if $rank;
621  }
622  return $self->{karyotype_rank} || 0;
623 }
624 
625 =head2 is_circular
626  Arg : none
627  Example : my $circ = $slice->is_circular()
628  Description: Returns 1 if slice is a circular slice else 0
629  Returntype : int
630  Caller : general
631  Status : Stable
632 
633 =cut
634 
635 sub is_circular {
636  my ($self) = @_;
637  my $adaptor = $self->adaptor();
638  return 0 if ! defined $adaptor;
639  if (! exists $self->{'circular'}) {
640  my $id = $adaptor->get_seq_region_id($self);
641  $self->{circular} = $adaptor->is_circular($id);
642  }
643  return $self->{circular};
644 }
645 
646 =head2 invert
647 
648  Arg [1] : none
649  Example : $inverted_slice = $slice->invert;
650  Description: Creates a copy of this slice on the opposite strand and
651  returns it.
652  Returntype : Bio::EnsEMBL::Slice
653  Exceptions : none
654  Caller : general
655  Status : Stable
656 
657 =cut
658 
659 sub invert {
660  my $self = shift;
661 
662  # make a shallow copy of the slice via a hash copy and flip the strand
663  my %s = %$self;
664  $s{'strand'} = $self->{'strand'} * -1;
665 
666  # reverse compliment any attached sequence
667  reverse_comp(\$s{'seq'}) if($s{'seq'});
668 
669  # bless and return the copy
670  return bless \%s, ref $self;
671 }
672 
673 
674 
675 =head2 seq
676 
677  Arg [1] : none
678  Example : print "SEQUENCE = ", $slice->seq();
679  Description: Returns the sequence of the region represented by this
680  slice formatted as a string.
681  Only available for the default coord_system
682  Returntype : string
683  Exceptions : none
684  Caller : general
685  Status : Stable
686 
687 =cut
688 
689 sub seq {
690  my $self = shift;
691 
692  # special case for in-between (insert) coordinates
693  return '' if($self->start() == $self->end() + 1);
694 
695  return $self->{'seq'} if($self->{'seq'});
696 
697  if($self->adaptor()) {
698  my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
699  return ${$seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1)};
700  }
701 
702  # no attached sequence, and no db, so just return Ns
703  return 'N' x $self->length();
704 }
705 
706 
707 
708 =head2 subseq
709 
710  Arg [1] : int $startBasePair
711  relative to start of slice, which is 1.
712  Arg [2] : int $endBasePair
713  relative to start of slice.
714  Arg [3] : (optional) int $strand
715  The strand of the slice to obtain sequence from. Default
716  value is 1.
717  Description: returns string of dna sequence
718  Returntype : txt
719  Exceptions : end should be at least as big as start
720  strand must be set
721  Caller : general
722  Status : Stable
723 
724 =cut
725 
726 sub subseq {
727  my ( $self, $start, $end, $strand ) = @_;
728 
729  if ( $end+1 < $start ) {
730  throw("End coord + 1 is less than start coord");
731  }
732 
733  # handle 'between' case for insertions
734  return '' if( $start == $end + 1);
735 
736  $strand = 1 unless(defined $strand);
737 
738  if ( $strand != -1 && $strand != 1 ) {
739  throw("Invalid strand [$strand] in call to Slice::subseq.");
740  }
741  my $subseq;
742  if($self->adaptor){
743  my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
744  $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
745  ( $self, $start,
746  $end, $strand )};
747  } else {
748  ## check for gap at the beginning and pad it with Ns
749  if ($start < 1) {
750  $subseq = "N" x (1 - $start);
751  $start = 1;
752  }
753  $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
754  ## check for gap at the end and pad it with Ns
755  if ($end > $self->length()) {
756  $subseq .= "N" x ($end - $self->length());
757  }
758  reverse_comp(\$subseq) if($strand == -1);
759  }
760  return $subseq;
761 }
762 
763 =head2 sub_Slice_Iterator
764 
765  Arg[1] : int The chunk size to request
766  Example : my $i = $slice->sub_Slice_Iterator(60000);
767  while($i->has_next()) { warn $i->next()->name(); }
768  Description : Returns an iterator which batches subslices of this Slice
769  in the requested chunk size
770  Returntype : Bio::EnsEMBL::Utils::Iterator next() will return the next
771  chunk of Slice
772  Exceptions : None
773 
774 =cut
775 
776 sub sub_Slice_Iterator {
777  my ($self, $chunk_size) = @_;
778  throw "Need a chunk size to divide the slice by" if ! $chunk_size;
779  my $here = 1;
780  my $end = $self->length();
781  my $iterator_sub = sub {
782  while($here <= $end) {
783  my $there = $here + $chunk_size - 1;
784  $there = $end if($there > $end);
785  my $slice = $self->sub_Slice($here, $there);
786  $here = $there + 1;
787  return $slice;
788  }
789  return;
790  };
791  return Bio::EnsEMBL::Utils::Iterator->new($iterator_sub);
792 }
793 
794 =head2 assembly_exception_type
795 
796  Example : $self->assembly_exception_type();
797  Description : Returns the type of slice this is. If it is reference then you
798  will get 'REF' back. Otherwise you will get the first
799  element from C<get_all_AssemblyExceptionFeatures()>. If no
800  assembly exception exists you will get an empty string back.
801  Returntype : String
802  Exceptions : None
803  Caller : Public
804  Status : Beta
805 
806 =cut
807 
808 sub assembly_exception_type {
809  my ($self) = @_;
810  my $type = q{};
811  if($self->is_reference()) {
812  $type = 'REF';
813  }
814  else {
815  my $assembly_exceptions = $self->get_all_AssemblyExceptionFeatures();
816  if(@{$assembly_exceptions}) {
817  $type = $assembly_exceptions->[0]->type();
818  }
819  }
820  return $type;
821 }
822 
823 =head2 is_chromosome
824 
825  Example : print ($slice->is_chromosome()) ? 'I am a chromosome' : 'Not one';
826  Description : A chromosome is a slice with a karyotype rank assigned to it.
827  Returntype : Boolean indicates if the current object is a chromosome
828  Exceptions : None
829 
830 =cut
831 
832 sub is_chromosome {
833  my ($self) = @_;
834 
835  return $self->has_karyotype();
836 }
837 
838 
839 =head2 get_base_count
840 
841  Arg [1] : none
842  Example : $c_count = $slice->get_base_count->{'c'};
843  Description: Retrieves a hashref containing the counts of each bases in the
844  sequence spanned by this slice. The format of the hash is :
845  { 'a' => num,
846  'c' => num,
847  't' => num,
848  'g' => num,
849  'n' => num,
850  '%gc' => num }
851 
852  All bases which are not in the set [A,a,C,c,T,t,G,g] are
853  included in the 'n' count. The 'n' count could therefore be
854  inclusive of ambiguity codes such as 'y'.
855  The %gc is the ratio of GC to AT content as in:
856  total(GC)/total(ACTG) * 100
857  This function is conservative in its memory usage and scales to
858  work for entire chromosomes.
859  Returntype : hashref
860  Exceptions : none
861  Caller : general
862  Status : Stable
863 
864 =cut
865 
866 sub get_base_count {
867  my $self = shift;
868 
869  my $a = 0;
870  my $c = 0;
871  my $t = 0;
872  my $g = 0;
873 
874  my $start = 1;
875  my $end;
876 
877  my $RANGE = 100_000;
878  my $len = $self->length();
879 
880  my $seq;
881 
882  while ( $start <= $len ) {
883  $end = $start + $RANGE - 1;
884  $end = $len if ( $end > $len );
885 
886  $seq = $self->subseq( $start, $end );
887 
888  $a += $seq =~ tr/Aa//;
889  $c += $seq =~ tr/Cc//;
890  $t += $seq =~ tr/Tt//;
891  $g += $seq =~ tr/Gg//;
892 
893  $start = $end + 1;
894  }
895 
896  my $actg = $a + $c + $t + $g;
897 
898  my $gc_content = 0;
899  if ( $actg > 0 ) { # Avoid dividing by 0
900  $gc_content = sprintf( "%1.2f", ( ( $g + $c )/$actg )*100 );
901  }
902 
903  return { 'a' => $a,
904  'c' => $c,
905  't' => $t,
906  'g' => $g,
907  'n' => $len - $actg,
908  '%gc' => $gc_content };
909 }
910 
911 
912 
913 =head2 project
914 
915  Arg [1] : string $name
916  The name of the coordinate system to project this slice onto
917  Arg [2] : string $version
918  The version of the coordinate system (such as 'NCBI34') to
919  project this slice onto
920  Example :
921  my $clone_projection = $slice->project('clone');
922 
923  foreach my $segment (@$clone_projection) {
924  my $clone = $segment->to_Slice();
925  print $slice->seq_region_name(), ':', $segment->from_start(), '-',
926  $segment->from_end(), ' -> ',
927  $clone->seq_region_name(), ':', $clone->start(), '-',$clone->end(),
928  ':', $clone->strand(), "\n";
929  }
930  Description: Returns the results of 'projecting' this slice onto another
931  coordinate system. Projecting to a coordinate system that
932  the slice is assembled from is analagous to retrieving a tiling
933  path. This method may also be used to 'project up' to a higher
934  level coordinate system, however.
935 
936  This method returns a listref of triplets [start,end,slice]
937  which represents the projection. The start and end defined the
938  region of this slice which is made up of the third value of
939  the triplet: a slice in the requested coordinate system.
940  Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
941  can also be used as [$start,$end,$slice] triplets
942  Exceptions : none
943  Caller : general
944  Status : Stable
945 
946 =cut
947 
948 sub project {
949  my $self = shift;
950  my $cs_name = shift;
951  my $cs_version = shift;
952 
953  throw('Coord_system name argument is required') if(!$cs_name);
954 
955  my $slice_adaptor = $self->adaptor();
956 
957  if(!$slice_adaptor) {
958  warning("Cannot project without attached adaptor.");
959  return [];
960  }
961 
962  if(!$self->coord_system()) {
963  warning("Cannot project without attached coord system.");
964  return [];
965  }
966 
967 
968  my $db = $slice_adaptor->db();
969  my $csa = $db->get_CoordSystemAdaptor();
970  my $cs = $csa->fetch_by_name($cs_name, $cs_version);
971  my $slice_cs = $self->coord_system();
972 
973  if(!$cs) {
974  throw("Cannot project to unknown coordinate system " .
975  "[$cs_name $cs_version]");
976  }
977 
978  # no mapping is needed if the requested coord system is the one we are in
979  # but we do need to check if some of the slice is outside of defined regions
980  if($slice_cs->equals($cs)) {
981  return $self->_constrain_to_region();
982  }
983 
984  my @projection;
985 
986  # decompose this slice into its symlinked components.
987  # this allows us to handle haplotypes and PARs
988  my $normal_slice_proj =
989  $slice_adaptor->fetch_normalized_slice_projection($self);
990  foreach my $segment (@$normal_slice_proj) {
991  my $normal_slice = $segment->[2];
992 
993  $slice_cs = $normal_slice->coord_system();
994 
995  my $asma = $db->get_AssemblyMapperAdaptor();
996  my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
997  next unless defined $asm_mapper;
998 
999  # perform the mapping between this slice and the requested system
1000  my @coords = $asm_mapper->map($normal_slice->seq_region_name(),
1001  $normal_slice->start(),
1002  $normal_slice->end(),
1003  $normal_slice->strand(),
1004  $slice_cs, undef, undef, 1);
1005 
1006  # construct a projection from the mapping results and return it
1007  foreach my $coord (@coords) {
1008  my $original = $coord->{original};
1009  my $mapped = $coord->{mapped};
1010 
1011  # skip gaps
1012  next unless $mapped->isa('Bio::EnsEMBL::Mapper::Coordinate');
1013 
1014  my $mapped_start = $mapped->start();
1015  my $mapped_end = $mapped->end();
1016  my ($current_start, $current_end);
1017  if ($self->strand == 1) {
1018  $current_start = $original->start() - $self->start + 1;
1019  $current_end = $original->end() - $self->start + 1;
1020  } else {
1021  $current_start = $self->end() - $original->end + 1;
1022  $current_end = $self->end - $original->start + 1;
1023  }
1024 
1025  my $coord_cs = $mapped->coord_system();
1026 
1027  # If the normalised projection just ended up mapping to the
1028  # same coordinate system we were already in then we should just
1029  # return the original region. This can happen for example, if we
1030  # were on a PAR region on Y which refered to X and a projection to
1031  # 'toplevel' was requested.
1032  if($coord_cs->equals($slice_cs)) {
1033  # trim off regions which are not defined
1034  return $self->_constrain_to_region();
1035  }
1036  #create slices for the mapped-to coord system
1037  my $slice = $slice_adaptor->fetch_by_seq_region_id($mapped->id(),
1038  $mapped_start,
1039  $mapped_end,
1040  $mapped->strand());
1041 
1042  if ($current_end > $slice->seq_region_length() && $slice->is_circular ) {
1043  $current_end -= $slice->seq_region_length();
1044  }
1045 
1046  push @projection, bless([$current_start, $current_end, $slice],
1047  "Bio::EnsEMBL::ProjectionSegment");
1048 
1049  }
1050  }
1051 
1052  return \@projection;
1053 }
1054 
1055 
1056 sub _constrain_to_region {
1057  my $self = shift;
1058 
1059  my $entire_len = $self->seq_region_length();
1060 
1061  #if the slice has negative coordinates or coordinates exceeding the
1062  #exceeding length of the sequence region we want to shrink the slice to
1063  #the defined region
1064 
1065  if($self->{'start'} > $entire_len || $self->{'end'} < 1) {
1066  #none of this slice is in a defined region
1067  return [];
1068  }
1069 
1070  my $right_contract = 0;
1071  my $left_contract = 0;
1072  if($self->{'end'} > $entire_len) {
1073  $right_contract = $entire_len - $self->{'end'};
1074  }
1075  if($self->{'start'} < 1) {
1076  $left_contract = $self->{'start'} - 1;
1077  }
1078 
1079  my $new_slice;
1080  if($left_contract || $right_contract) {
1081  #if slice in negative strand, need to swap contracts
1082  if ($self->strand == 1) {
1083  $new_slice = $self->expand($left_contract, $right_contract);
1084  }
1085  elsif ($self->strand == -1) {
1086  $new_slice = $self->expand($right_contract, $left_contract);
1087  }
1088  } else {
1089  $new_slice = $self;
1090  }
1091 
1092  return [bless [1-$left_contract, $self->length()+$right_contract,
1093  $new_slice], "Bio::EnsEMBL::ProjectionSegment" ];
1094 }
1095 
1096 
1097 =head2 expand
1098 
1099  Arg [1] : (optional) int $five_prime_expand
1100  The number of basepairs to shift this slices five_prime
1101  coordinate by. Positive values make the slice larger,
1102  negative make the slice smaller.
1103  coordinate left.
1104  Default = 0.
1105  Arg [2] : (optional) int $three_prime_expand
1106  The number of basepairs to shift this slices three_prime
1107  coordinate by. Positive values make the slice larger,
1108  negative make the slice smaller.
1109  Default = 0.
1110  Arg [3] : (optional) bool $force_expand
1111  if set to 1, then the slice will be contracted even in the case
1112  when shifts $five_prime_expand and $three_prime_expand overlap.
1113  In that case $five_prime_expand and $three_prime_expand will be set
1114  to a maximum possible number and that will result in the slice
1115  which would have only 2pbs.
1116  Default = 0.
1117  Arg [4] : (optional) int* $fpref
1118  The reference to a number of basepairs to shift this slices five_prime
1119  coordinate by. Normally it would be set to $five_prime_expand.
1120  But in case when $five_prime_expand shift can not be applied and
1121  $force_expand is set to 1, then $$fpref will contain the maximum possible
1122  shift
1123  Arg [5] : (optional) int* $tpref
1124  The reference to a number of basepairs to shift this slices three_prime
1125  coordinate by. Normally it would be set to $three_prime_expand.
1126  But in case when $five_prime_expand shift can not be applied and
1127  $force_expand is set to 1, then $$tpref will contain the maximum possible
1128  shift
1129  Example : my $expanded_slice = $slice->expand( 1000, 1000);
1130  my $contracted_slice = $slice->expand(-1000,-1000);
1131  my $shifted_right_slice = $slice->expand(-1000, 1000);
1132  my $shifted_left_slice = $slice->expand( 1000,-1000);
1133  my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift);
1134 
1135  Description: Returns a slice which is a resized copy of this slice. The
1136  start and end are moved outwards from the center of the slice
1137  if positive values are provided and moved inwards if negative
1138  values are provided. This slice remains unchanged. A slice
1139  may not be contracted below 1bp but may grow to be arbitrarily
1140  large.
1141  Returntype : Bio::EnsEMBL::Slice
1142  Exceptions : warning if an attempt is made to contract the slice below 1bp
1143  Caller : general
1144  Status : Stable
1145 
1146 =cut
1147 
1148 sub expand {
1149  my $self = shift;
1150  my $five_prime_shift = shift || 0;
1151  my $three_prime_shift = shift || 0;
1152  my $force_expand = shift || 0;
1153  my $fpref = shift;
1154  my $tpref = shift;
1155 
1156  if ( $self->{'seq'} ) {
1157  warning(
1158  "Cannot expand a slice which has a manually attached sequence ");
1159  return undef;
1160  }
1161 
1162  my $sshift = $five_prime_shift;
1163  my $eshift = $three_prime_shift;
1164 
1165  if ($sshift == 0 && $eshift == 0) { return $self; }
1166 
1167  if ( $self->{'strand'} != 1 ) {
1168  $eshift = $five_prime_shift;
1169  $sshift = $three_prime_shift;
1170  }
1171 
1172  my $new_start = $self->{'start'} - $sshift;
1173  my $new_end = $self->{'end'} + $eshift;
1174 
1175  # Wrap around on circular slices
1176  if (( $new_start <= 0 || $new_start > $self->seq_region_length() || $new_end <= 0
1177  || $new_end > $self->seq_region_length() ) && ( $self->is_circular() ) ) {
1178 
1179  if ( $new_start <= 0 ) {
1180  $new_start = $self->seq_region_length() + $new_start;
1181  }
1182  if ( $new_start > $self->seq_region_length() ) {
1183  $new_start -= $self->seq_region_length();
1184  }
1185 
1186  if ( $new_end <= 0 ) {
1187  $new_end = $self->seq_region_length() + $new_end;
1188  }
1189  if ( $new_end > $self->seq_region_length() ) {
1190  $new_end -= $self->seq_region_length();
1191  }
1192 
1193  }
1194 
1195  if ( $new_start > $new_end && (not $self->is_circular() ) ) {
1196 
1197  if ($force_expand) {
1198  # Apply max possible shift, if force_expand is set
1199  if ( $sshift < 0 ) {
1200  # if we are contracting the slice from the start - move the
1201  # start just before the end
1202  $new_start = $new_end - 1;
1203  $sshift = $self->{start} - $new_start;
1204  }
1205 
1206  if ( $new_start > $new_end ) {
1207  # if the slice still has a negative length - try to move the
1208  # end
1209  if ( $eshift < 0 ) {
1210  $new_end = $new_start + 1;
1211  $eshift = $new_end - $self->{end};
1212  }
1213  }
1214  # return the values by which the primes were actually shifted
1215  $$tpref = $self->{strand} == 1 ? $eshift : $sshift;
1216  $$fpref = $self->{strand} == 1 ? $sshift : $eshift;
1217  }
1218  if ( $new_start > $new_end ) {
1219  throw('Slice start cannot be greater than slice end');
1220  }
1221  }
1222 
1223  #fastest way to copy a slice is to do a shallow hash copy
1224  my %new_slice = %$self;
1225  $new_slice{'start'} = int($new_start);
1226  $new_slice{'end'} = int($new_end);
1227 
1228  return bless \%new_slice, ref($self);
1229 } ## end sub expand
1230 
1231 =head2 constrain_to_seq_region
1232  Example : $new_slice = $slice->expand(1000,10000);
1233  $new_slice = $new_slice->constrain_to_seq_region();
1234  Description: Used to prevent overly zealous expand calls going off the end of
1235  the sequence region. It contracts the start and end where needed
1236  and produces a slice copy with the tweaked coordinates.
1237  Returntype : Bio::EnsEMBL::Slice
1238 =cut
1239 
1240 sub constrain_to_seq_region {
1241  my ($self) = @_;
1242  # circular calculations should already be taken care of
1243  if ($self->is_circular) {return $self}
1244  my $new_start = $self->start;
1245  my $new_end = $self->end;
1246 
1247  my $seq_region = $self->seq_region_Slice;
1248 
1249  if ($new_start < $seq_region->start) {$new_start = $seq_region->start}
1250  if ($new_end > $seq_region->end) {$new_end = $seq_region->end}
1251 
1252  my %new_slice = %$self;
1253  $new_slice{'start'} = $new_start;
1254  $new_slice{'end'} = $new_end;
1255 
1256  return bless \%new_slice, ref($self);
1257 }
1258 
1259 
1260 =head2 sub_Slice
1261 
1262  Arg 1 : int $start, refers to the start of the subslice relative to the input slice
1263  Arg 2 : int $end, refers to the end of the subslice relative to the input slice
1264  Arge [3] : int $strand
1265  Example : none
1266  Description: Makes another Slice that covers only part of this Slice
1267  If a Slice is requested which lies outside of the boundaries
1268  of this function will return undef. This means that
1269  behaviour will be consistant whether or not the slice is
1270  attached to the database (i.e. if there is attached sequence
1271  to the slice). Alternatively the expand() method or the
1272  SliceAdaptor::fetch_by_region method can be used instead.
1273  Returntype : Bio::EnsEMBL::Slice or undef if arguments are wrong
1274  Exceptions : none
1275  Caller : general
1276  Status : Stable
1277 
1278 =cut
1279 
1280 sub sub_Slice {
1281  my ( $self, $start, $end, $strand ) = @_;
1282 
1283  my $length = $self->length();
1284 
1285  if( $start < 1 || $start > $length ) {
1286  # throw( "start argument not valid" );
1287  return undef;
1288  }
1289 
1290  if( $end < $start || $end > $length ) {
1291  # throw( "end argument not valid" )
1292  return undef;
1293  }
1294 
1295  my ( $new_start, $new_end, $new_strand, $new_seq );
1296  if( ! defined $strand ) {
1297  $strand = 1;
1298  }
1299 
1300  if( $self->{'strand'} == 1 ) {
1301  $new_start = $self->{'start'} + $start - 1;
1302  $new_end = $self->{'start'} + $end - 1;
1303  $new_strand = $strand;
1304  } else {
1305  $new_start = $self->{'end'} - $end + 1;;
1306  $new_end = $self->{'end'} - $start + 1;
1307  $new_strand = -$strand;
1308  }
1309 
1310  if( defined $self->{'seq'} ) {
1311  $new_seq = $self->subseq( $start, $end, $strand );
1312  }
1313 
1314  #fastest way to copy a slice is to do a shallow hash copy
1315  my $new_slice = {%{$self}};
1316  $new_slice->{'start'} = int($new_start);
1317  $new_slice->{'end'} = int($new_end);
1318  $new_slice->{'strand'} = $new_strand;
1319  if( $new_seq ) {
1320  $new_slice->{'seq'} = $new_seq;
1321  }
1322  weaken($new_slice->{adaptor});
1323 
1324  return bless $new_slice, ref($self);
1325 }
1326 
1327 
1328 
1329 =head2 seq_region_Slice
1330 
1331  Arg [1] : none
1332  Example : $slice = $slice->seq_region_Slice();
1333  Description: Returns a slice which spans the whole seq_region which this slice
1334  is on. For example if this is a slice which spans a small region
1335  of chromosome X, this method will return a slice which covers the
1336  entire chromosome X. The returned slice will always have strand
1337  of 1 and start of 1. This method cannot be used if the sequence
1338  of the slice has been set manually.
1339  Returntype : Bio::EnsEMBL::Slice
1340  Exceptions : warning if called when sequence of Slice has been set manually.
1341  Caller : general
1342  Status : Stable
1343 
1344 =cut
1345 
1346 sub seq_region_Slice {
1347  my $self = shift;
1348 
1349  if($self->{'seq'}){
1350  warning("Cannot get a seq_region_Slice of a slice which has manually ".
1351  "attached sequence ");
1352  return undef;
1353  }
1354 
1355  # quick shallow copy
1356  my $slice;
1357  %{$slice} = %{$self};
1358  bless $slice, ref($self);
1359  weaken($slice->{adaptor});
1360 
1361  $slice->{'start'} = 1;
1362  $slice->{'end'} = $slice->{'seq_region_length'};
1363  $slice->{'strand'} = 1;
1364 
1365  return $slice;
1366 }
1367 
1368 
1369 =head2 get_seq_region_id
1370 
1371  Arg [1] : none
1372  Example : my $seq_region_id = $slice->get_seq_region_id();
1373  Description: Gets the internal identifier of the seq_region that this slice
1374  is on. Note that this function will not work correctly if this
1375  slice does not have an attached adaptor. Also note that it may
1376  be better to go through the SliceAdaptor::get_seq_region_id
1377  method if you are working with multiple databases since is
1378  possible to work with slices from databases with different
1379  internal seq_region identifiers.
1380  Returntype : int or undef if slices does not have attached adaptor
1381  Exceptions : warning if slice is not associated with a SliceAdaptor
1382  Caller : assembly loading scripts, general
1383  Status : Stable
1384 
1385 =cut
1386 
1387 sub get_seq_region_id {
1388  my ($self) = @_;
1389  if($self->adaptor) {
1390  return $self->adaptor->get_seq_region_id($self);
1391  }
1392  else {
1393  warning('Cannot retrieve seq_region_id without attached adaptor.');
1394  return undef;
1395  }
1396 }
1397 
1398 =head2 get_genome_component
1399 
1400  Arg [] : none
1401  Example : my $genome_component = $slice->get_genome_component();
1402  Description: Returns the genome component of the slice
1403  Returntype : Scalar; the identifier of the genome component of the slice
1404  Exceptions : none
1405  Caller : general
1406  Status : Stable
1407 
1408 =cut
1409 
1410 sub get_genome_component {
1411  my $self = shift;
1412  return $self->adaptor->get_genome_component_for_slice($self);
1413 }
1414 
1415 =head2 get_all_Attributes
1416 
1417  Arg [1] : optional string $attrib_code
1418  The code of the attribute type to retrieve values for.
1419  Example : ($htg_phase) = @{$slice->get_all_Attributes('htg_phase')};
1420  @slice_attributes = @{$slice->get_all_Attributes()};
1421  Description: Gets a list of Attributes of this slice''s seq_region.
1422  Optionally just get Attrubutes for given code.
1423  Returntype : listref Bio::EnsEMBL::Attribute
1424  Exceptions : warning if slice does not have attached adaptor
1425  Caller : general
1426  Status : Stable
1427 
1428 =cut
1429 
1430 sub get_all_Attributes {
1431  my ($self, $attrib_code) = @_;
1432  if(my $adaptor = $self->_get_CoreAdaptor('Attribute')) {
1433  my $attribs = $adaptor->fetch_all_by_Slice($self);
1434  if(defined $attrib_code) {
1435  my $uc_attrib = uc($attrib_code);
1436  return [ grep { uc($_->code()) eq $uc_attrib } @{$attribs}];
1437  }
1438  return $attribs;
1439  }
1440  return [];
1441 }
1442 
1443 =head2 get_all_PredictionTranscripts
1444 
1445  Arg [1] : (optional) string $logic_name
1446  The name of the analysis used to generate the prediction
1447  transcripts obtained.
1448  Arg [2] : (optional) boolean $load_exons
1449  If set to true will force loading of all PredictionExons
1450  immediately rather than loading them on demand later. This
1451  is faster if there are a large number of PredictionTranscripts
1452  and the exons will be used.
1453  Example : @transcripts = @{$slice->get_all_PredictionTranscripts};
1454  Description: Retrieves the list of prediction transcripts which overlap
1455  this slice with logic_name $logic_name. If logic_name is
1456  not defined then all prediction transcripts are retrieved.
1457  Returntype : listref of Bio::EnsEMBL::PredictionTranscript
1458  Exceptions : warning if slice does not have attached adaptor
1459  Caller : none
1460  Status : Stable
1461 
1462 =cut
1463 
1464 sub get_all_PredictionTranscripts {
1465  my ($self,$logic_name, $load_exons) = @_;
1466 
1467  if(!$self->adaptor()) {
1468  warning('Cannot get PredictionTranscripts without attached adaptor');
1469  return [];
1470  }
1471  my $pta = $self->adaptor()->db()->get_PredictionTranscriptAdaptor();
1472  return $pta->fetch_all_by_Slice($self, $logic_name, $load_exons);
1473 }
1474 
1475 =head2 get_all_DnaAlignFeatures
1476 
1477  Arg [1] : (optional) string $logic_name
1478  The name of the analysis performed on the dna align features
1479  to obtain.
1480  Arg [2] : (optional) float $score
1481  The mimimum score of the features to retrieve
1482  Arg [3] : (optional) string $dbtype
1483  The name of an attached database to retrieve the features from
1484  instead, e.g. 'otherfeatures'.
1485  Arg [4] : (optional) float hcoverage
1486  The minimum hcoverage od the featurs to retrieve
1487  Example : @dna_dna_align_feats = @{$slice->get_all_DnaAlignFeatures};
1488  Description: Retrieves the DnaDnaAlignFeatures which overlap this slice with
1489  logic name $logic_name and with score above $score. If
1490  $logic_name is not defined features of all logic names are
1491  retrieved. If $score is not defined features of all scores are
1492  retrieved. Strand of the Slice is not considered.
1493  Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
1494  Exceptions : warning if slice does not have attached adaptor
1495  Caller : general
1496  Status : Stable
1497 
1498 =cut
1499 
1500 sub get_all_DnaAlignFeatures {
1501  my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
1502  return $self->_get_AlignFeatures($logic_name, $score, $dbtype, $hcoverage, 'DnaAlignFeature');
1503 }
1504 
1505 =head2 get_all_ProteinAlignFeatures
1506 
1507  Arg [1] : (optional) string $logic_name
1508  The name of the analysis performed on the protein align features
1509  to obtain.
1510  Arg [2] : (optional) float $score
1511  The mimimum score of the features to retrieve
1512  Arg [3] : (optional) string $dbtype
1513  The name of an attached database to retrieve features from
1514  instead.
1515  Arg [4] : (optional) float hcoverage
1516  The minimum hcoverage od the featurs to retrieve
1517  Example : @dna_pep_align_feats = @{$slice->get_all_ProteinAlignFeatures};
1518  Description: Retrieves the DnaPepAlignFeatures which overlap this slice with
1519  logic name $logic_name and with score above $score. If
1520  $logic_name is not defined features of all logic names are
1521  retrieved. If $score is not defined features of all scores are
1522  retrieved.
1523  Returntype : listref of Bio::EnsEMBL::DnaPepAlignFeatures
1524  Exceptions : warning if slice does not have attached adaptor
1525  Caller : general
1526  Status : Stable
1527 
1528 =cut
1529 
1530 sub get_all_ProteinAlignFeatures {
1531  my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
1532  return $self->_get_AlignFeatures($logic_name, $score, $dbtype, $hcoverage, 'ProteinAlignFeature');
1533 }
1534 
1535 =head2 _get_AlignFeatures
1536 
1537  Arg [1] : (optional) string $logic_name
1538  The name of the analysis performed on the protein align features
1539  to obtain.
1540  Arg [2] : (optional) float $score
1541  The mimimum score of the features to retrieve
1542  Arg [3] : (optional) string $dbtype
1543  The name of an attached database to retrieve features from
1544  instead.
1545  Arg [4] : (optional) float hcoverage
1546  The minimum hcoverage od the featurs to retrieve
1547  Arg [5] : string $align_type
1548  The type of adaptor to retrieve alignments from. Must be an BaseAlignFeature derived
1549  class
1550  Description: Generic method which deals with the retrieval of either AlignFeature adaptor and allows
1551  you to switch the adaptor values are retrieved from.
1552 
1553 =cut
1554 
1555 sub _get_AlignFeatures {
1556  my ($self, $logic_name, $score, $dbtype, $hcoverage, $align_type) = @_;
1557  throw "No align_type given" unless $align_type;
1558  if(my $adaptor = $self->_get_CoreAdaptor($align_type, $dbtype)) {
1559  if(defined($score) and defined ($hcoverage)){
1560  warning "Cannot specify score and hcoverage when querying for $align_type. Using score only";
1561  }
1562  if(defined($score)){
1563  return $adaptor->fetch_all_by_Slice_and_score($self,$score, $logic_name);
1564  }
1565  return $adaptor->fetch_all_by_Slice_and_hcoverage($self, $hcoverage, $logic_name);
1566  }
1567  return [];
1568 }
1569 
1570 =head2 get_all_SimilarityFeatures
1571 
1572  Arg [1] : (optional) string $logic_name
1573  the name of the analysis performed on the features to retrieve
1574  Arg [2] : (optional) float $score
1575  the lower bound of the score of the features to be retrieved
1576  Example : @feats = @{$slice->get_all_SimilarityFeatures};
1577  Description: Retrieves all dna_align_features and protein_align_features
1578  with analysis named $logic_name and with score above $score.
1579  It is probably faster to use get_all_ProteinAlignFeatures or
1580  get_all_DnaAlignFeatures if a sepcific feature type is desired.
1581  If $logic_name is not defined features of all logic names are
1582  retrieved. If $score is not defined features of all scores are
1583  retrieved.
1584  Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures
1585  Exceptions : warning if slice does not have attached adaptor
1586  Caller : general
1587  Status : Stable
1588 
1589 =cut
1590 
1591 sub get_all_SimilarityFeatures {
1592  my ($self, $logic_name, $score) = @_;
1593 
1594  my @out = ();
1595 
1596  push @out, @{$self->get_all_ProteinAlignFeatures($logic_name, $score) };
1597  push @out, @{$self->get_all_DnaAlignFeatures($logic_name, $score) };
1598 
1599  return \@out;
1600 }
1601 
1602 =head2 get_all_SimpleFeatures
1603 
1604  Arg [1] : (optional) string $logic_name
1605  The name of the analysis performed on the simple features
1606  to obtain.
1607  Arg [2] : (optional) float $score
1608  The mimimum score of the features to retrieve
1609  Example : @simple_feats = @{$slice->get_all_SimpleFeatures};
1610  Description: Retrieves the SimpleFeatures which overlap this slice with
1611  logic name $logic_name and with score above $score. If
1612  $logic_name is not defined features of all logic names are
1613  retrieved. If $score is not defined features of all scores are
1614  retrieved.
1615  Returntype : listref of Bio::EnsEMBL::SimpleFeatures
1616  Exceptions : warning if slice does not have attached adaptor
1617  Caller : general
1618  Status : Stable
1619 
1620 =cut
1621 
1622 sub get_all_SimpleFeatures {
1623  my ($self, $logic_name, $score, $dbtype) = @_;
1624  if(my $adaptor = $self->_get_CoreAdaptor('SimpleFeature', $dbtype)) {
1625  return $adaptor->fetch_all_by_Slice_and_score($self, $score, $logic_name);
1626  }
1627  return [];
1628 }
1629 
1630 =head2 get_all_RepeatFeatures
1631 
1632  Arg [1] : (optional) string $logic_name
1633  The name of the analysis performed on the repeat features
1634  to obtain.
1635  Arg [2] : (optional) string/array $repeat_type
1636  Limits features returned to those of the specified
1637  repeat_type. Can specify a single value or an array reference
1638  to limit by more than one
1639  Arg [3] : (optional) string $db
1640  Key for database e.g. core/vega/cdna/....
1641  Example : @repeat_feats = @{$slice->get_all_RepeatFeatures(undef,'Type II Transposons')};
1642  Description: Retrieves the RepeatFeatures which overlap with
1643  logic name $logic_name and with score above $score. If
1644  $logic_name is not defined features of all logic names are
1645  retrieved.
1646  Returntype : listref of Bio::EnsEMBL::RepeatFeatures
1647  Exceptions : warning if slice does not have attached adaptor
1648  Caller : general
1649  Status : Stable
1650 
1651 =cut
1652 
1653 sub get_all_RepeatFeatures {
1654  my ($self, $logic_name, $repeat_type, $dbtype) = @_;
1655  if(my $adaptor = $self->_get_CoreAdaptor('RepeatFeature', $dbtype)) {
1656  return $adaptor->fetch_all_by_Slice($self, $logic_name, $repeat_type);
1657  }
1658  return [];
1659 }
1660 
1661 =head2 get_all_LD_values
1662 
1663  Arg [1] : (optional) Bio::EnsEMBL::Variation::Population $population
1664  Description : returns all LD values on this slice. This function will only work correctly if the variation
1665  database has been attached to the core database. If the argument is passed, will return the LD information
1666  in that population
1667  ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer
1668  Exceptions : none
1669  Caller : contigview, snpview
1670  Status : Stable
1671 
1672 =cut
1673 
1674 sub get_all_LD_values {
1675  my $self = shift;
1676  my $population = shift;
1677 
1678  my $ld_adaptor = $self->_get_VariationAdaptor('LDFeatureContainer');
1679  if($ld_adaptor) {
1680  return $ld_adaptor->fetch_by_Slice($self,$population);
1681  }
1682  return [];
1683 }
1684 
1685 =head2 _get_VariationFeatureAdaptor
1686 
1687 Shortcut method here because VariationFeature is an often requested
1688 adaptor type.
1689 
1690 =cut
1691 
1692 sub _get_VariationFeatureAdaptor {
1693  my ($self, $dbtype) = @_;
1694  return $self->_get_VariationAdaptor('VariationFeature', $dbtype);
1695 }
1696 
1697 =head2 _get_StructuralVariationFeatureAdaptor
1698 
1699 Shortcut method here because StructuralVariationFeature is an often requested
1700 adaptor type.
1701 
1702 =cut
1703 
1704 sub _get_StructuralVariationFeatureAdaptor {
1705  my ($self, $dbtype) = @_;
1706  return $self->_get_VariationAdaptor('StructuralVariationFeature', $dbtype);
1707 }
1708 
1709 =head2 _get_VariationAdaptor
1710 
1711  Arg [1] : String object_type to retrieve an adaptor for
1712  Arg [2] : String dbtype to search for the given adaptor in. Defaults to variation
1713  Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
1714  it will return nothing if the adaptor was not found
1715  ReturnType : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance (specific to variation)
1716  Exceptions : none
1717 
1718 =cut
1719 
1720 sub _get_VariationAdaptor {
1721  my ($self, $object_type, $dbtype) = @_;
1722  # very important to do this defaulting since we *have* to assume the variation
1723  # database is called 'variation'. Using the current group will not work because
1724  # that will be something like 'core' (most likely), 'ensembl' or 'vega'.
1725  $dbtype ||= 'variation';
1726 
1727  # Also we do not care about Registry->get_db() for variation DBs
1728  my $do_not_check_db = 1;
1729 
1730  return $self->_get_Adaptor($object_type, $dbtype, $do_not_check_db);
1731 }
1732 
1733 =head2 _get_CoreAdaptor
1734 
1735  Arg [1] : String object_type to retrieve an adaptor for
1736  Arg [2] : String dbtype to search for the given adaptor in. Defaults to core
1737  Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
1738  it will return nothing if the adaptor was not found
1739  ReturnType : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance (specific to core-like dbs)
1740  Exceptions : none
1741 
1742 =cut
1743 
1744 sub _get_CoreAdaptor {
1745  my ($self, $object_type, $dbtype) = @_;
1746  #Simple pass through
1747  return $self->_get_Adaptor($object_type, $dbtype);
1748 }
1749 
1750 =head2 _get_Adaptor
1751 
1752  Arg [1] : String object_type to retrieve an adaptor for
1753  Arg [2] : String dbtype to search for the given adaptor in
1754  Arg [3] : Boolean Turn off the checking of Registry->get_db() for your
1755  adaptor.
1756  Description : Searches for the specified adaptor in the Registry and returns it. Otherwise
1757  it will return nothing if the adaptor was not found. We consult the
1758  "special" adaptors held by Bio::EnsEMBL::Registry::get_db() method and then
1759  fall back to the normal methods of finding an adaptor.
1760 
1761  This method will warn when adaptors are missing but will never through an
1762  exception. It is up to the calling code to decide how to handle the unavailablity
1763  of an adaptor.
1764  ReturnType : Bio::EnsEMBL::DBSQL::BaseAdaptor derrived instance. Otherwise it returns nothing
1765  Exceptions : none
1766 
1767 =cut
1768 
1769 sub _get_Adaptor {
1770  my ($self, $object_type, $dbtype, $do_not_check_db) = @_;
1771 
1772  if(!$object_type) {
1773  warning('Object type is a required parameter');
1774  return;
1775  }
1776 
1777  my $adaptor = $self->adaptor();
1778 
1779  if(!$adaptor) {
1780  warning("Cannot get a ${object_type} adaptor without a SliceAdaptor attached to this instance of ".ref($self));
1781  return;
1782  }
1783 
1784  my $local_db = $adaptor->db();
1785  my $species = $local_db->species();
1786 
1787  #First we query for the DBAdaptor using get_db(). This is a deprecated method
1788  #call but "special" adaptors can be registered via this method. We must
1789  #consult here 1st to find the possible special adaptor
1790  if(!$do_not_check_db && $dbtype) {
1791  my $db = $registry->get_db($local_db, $dbtype);
1792  if($db) {
1793  # If we got a return then use this DBAdaptor's species name, group and the given object type.
1794  # Special adaptors can have different species names
1795  $adaptor = $registry->get_adaptor($db->species(), $db->group(), $object_type);
1796  }
1797  else {
1798  #Otherwise just use the current species, dbtype and object type
1799  $adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
1800  }
1801  }
1802  # Otherwise our query group is the one attached to the current adaptor
1803  else {
1804  #If not set use the group attached to the local adaptor
1805  $dbtype ||= $local_db->group();
1806  $adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
1807  }
1808  return $adaptor if $adaptor;
1809 
1810  warning("No adaptor could be found for the species ${species}, database type ${dbtype} and object type ${object_type}");
1811  return;
1812 }
1813 
1814 =head2 get_all_VariationFeatures
1815 
1816  Args [1] : (optional) ArrayRef $so_terms
1817  SequenceOntology terms to limit the fetch to
1818  Args [2] : (optional) boolean $without_children
1819  Do not query using the children of the given SO terms
1820  i.e. query using the given terms directly
1821  Args [3] : (optional) ArrayRef $included_so
1822  ArrayRef of SequenceOntology which should be queried for
1823  without children. This argument allows you to combine SO terms with children
1824  from argument 1 with extra non-child SO terms. e.g. you wish to query for
1825  all protein_altering_variant (specified in argument 1) variations which
1826  would be defined by child SO terms but also wanted stop_retained_variant linked variations
1827  defined by this argument
1828  Args [4] : (optional) string $dbtype
1829  The dbtype of variation to obtain (i.e. can be different from the "variation" type).
1830  This assumes that the extra db has been added to the DBAdaptor under this name (using the
1831  DBConnection::add_db_adaptor method).
1832  Description : Returns all germline variation features on this slice. This function will
1833  only work correctly if the variation database has been attached to the core
1834  database.
1835  If $so_terms is specified, only variation features with a consequence type
1836  that matches or is an ontological child of any of the supplied terms will
1837  be returned
1838  ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1839  Exceptions : none
1840  Caller : contigview, snpview
1841  Status : Stable
1842 
1843 =cut
1844 
1845 sub get_all_VariationFeatures{
1846  my ($self, $so_terms, $without_children, $included_so, $dbtype) = @_;
1847  if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1848  return $vf_adaptor->fetch_all_by_Slice_SO_terms($self, $so_terms, $without_children, $included_so);
1849  }
1850  return [];
1851 }
1852 
1853 =head2 get_all_somatic_VariationFeatures
1854  Args [1] : (optional) ArrayRef $so_terms
1855  SequenceOntology terms to limit the fetch to
1856  Args [2] : (optional) boolean $without_children
1857  Do not query using the children of the given SO terms
1858  i.e. query using the given terms directly
1859  Args [3] : (optional) ArrayRef $included_so
1860  ArrayRef of SequenceOntology which should be queried for
1861  without children. This argument allows you to combine SO terms with children
1862  from argument 1 with extra non-child SO terms. e.g. you wish to query for
1863  all protein_altering_variant (specified in argument 1) variations which
1864  would be defined by child SO terms but also wanted stop_retained_variant linked variations
1865  defined by this argument
1866  Args [4] : (optional) string $dbtype
1867  The dbtype of variation to obtain (i.e. can be different from the "variation" type).
1868  This assumes that the extra db has been added to the DBAdaptor under this name (using the
1869  DBConnection::add_db_adaptor method).
1870  Description : Returns all somatic variation features on this slice. This function will only
1871  work correctly if the variation database has been attached to the core database.
1872  ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1873  Exceptions : none
1874  Status : Stable
1875 
1876 =cut
1877 
1878 sub get_all_somatic_VariationFeatures {
1879  my ($self, $so_terms, $without_children, $included_so, $dbtype) = @_;
1880  if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1881  return $vf_adaptor->fetch_all_somatic_by_Slice_SO_terms($self, $so_terms, $without_children, $included_so);
1882  }
1883  return [];
1884 }
1885 
1886 =head2 get_all_somatic_VariationFeatures_by_source
1887  Arg [1] : string $source [optional]
1888  The name of the source to query for
1889  Arg [2] : string $dbtype [optional]
1890  The dbtype of variation to obtain (i.e. can be different from the "variation" type).
1891  This assumes that the extra db has been added to the DBAdaptor under this name (using the
1892  DBConnection::add_db_adaptor method).
1893  Description : Returns all somatic variation features, from a defined source name (e.g.'COSMIC'),
1894  on this slice. This function will only work correctly if the variation database
1895  has been attached to the core database.
1896  ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1897  Exceptions : none
1898  Status : Stable
1899 
1900 =cut
1901 
1902 sub get_all_somatic_VariationFeatures_by_source {
1903  my ($self, $source, $dbtype) = @_;
1904  my $constraint = (defined($source)) ? " s.name='$source' " : undef;
1905  if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1906  return $vf_adaptor->fetch_all_somatic_by_Slice_constraint($self, $constraint);
1907  }
1908  return [];
1909 }
1910 
1911 =head2 get_all_somatic_VariationFeatures_with_phenotype
1912  Arg [1] : $variation_feature_source [optional]
1913  Arg [2] : $phenotype_source [optional]
1914  Arg [3] : $phenotype_name [optional]
1915  Arg [4] : string $dbtype [optional]
1916  The dbtype of variation to obtain (i.e. can be different from the "variation" type).
1917  This assumes that the extra db has been added to the DBAdaptor under this name (using the
1918  DBConnection::add_db_adaptor method).
1919  Description : returns all somatic variation features on this slice associated with a phenotype.
1920  (see get_all_VariationFeatures_with_phenotype for further documentation)
1921  ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1922  Exceptions : none
1923  Status : Stable
1924 
1925 =cut
1926 
1927 sub get_all_somatic_VariationFeatures_with_phenotype {
1928  my ($self, $source, $p_source, $phenotype, $dbtype) = @_;
1929  if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1930  return $vf_adaptor->fetch_all_somatic_with_phenotype_by_Slice($self, $source, $p_source, $phenotype);
1931  }
1932  return [];
1933 }
1934 
1935 
1936 =head2 get_all_VariationFeatures_by_Population
1937  Arg [1] : Bio::EnsEMBL::Variation::Population
1938  Arg [2] : $minimum_frequency [optional]
1939  Arg [3] : string $dbtype [optional]
1940  The dbtype of variation to obtain (i.e. can be different from the "variation" type).
1941  This assumes that the extra db has been added to the DBAdaptor under this name (using the
1942  DBConnection::add_db_adaptor method).
1943  Example : $pop = $pop_adaptor->fetch_by_dbID(659);
1944  @vfs = @{$slice->get_all_VariationFeatures_by_Population($pop,$slice)};
1945  Description : Retrieves all variation features in a slice which are stored for
1946  a specified population. If $minimum_frequency is supplied, only
1947  variations with a minor allele frequency (MAF) greater than
1948  $minimum_frequency will be returned.
1949  Returntype : listref of Bio::EnsEMBL::Variation::VariationFeature
1950  Exceptions : throw on incorrect argument
1951  Caller : general
1952  Status : At Risk
1953 
1954 =cut
1955 
1956 sub get_all_VariationFeatures_by_Population {
1957  my ($self, $minimum_frequency, $dbtype) = @_;
1958  if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1959  return $vf_adaptor->fetch_all_by_Slice_Population($self, $minimum_frequency);
1960  }
1961  return [];
1962 }
1963 
1964 =head2 get_all_Genes
1965 
1966  Arg [1] : (optional) string $logic_name
1967  The name of the analysis used to generate the genes to retrieve
1968  Arg [2] : (optional) string $dbtype
1969  The dbtype of genes to obtain. This assumes that the db has
1970  been added to the DBAdaptor under this name (using the
1971  DBConnection::add_db_adaptor method).
1972  Arg [3] : (optional) boolean $load_transcripts
1973  If set to true, transcripts will be loaded immediately rather
1974  than being lazy-loaded on request. This will result in a
1975  significant speed up if the Transcripts and Exons are going to
1976  be used (but a slow down if they are not).
1977  Arg [4] : (optional) string $source
1978  The source of the genes to retrieve.
1979  Arg [5] : (optional) string $biotype
1980  The biotype of the genes to retrieve.
1981  Example : @genes = @{$slice->get_all_Genes};
1982  Description: Retrieves all genes that overlap this slice, including those on
1983  the reverse strand.
1984  Returntype : listref of Bio::EnsEMBL::Genes
1985  Exceptions : none
1986  Caller : none
1987  Status : Stable
1988 
1989 =cut
1990 
1991 sub get_all_Genes{
1992  my ($self, $logic_name, $dbtype, $load_transcripts, $source, $biotype) = @_;
1993  if(my $adaptor = $self->_get_CoreAdaptor('Gene', $dbtype)) {
1994  return $adaptor->fetch_all_by_Slice( $self, $logic_name, $load_transcripts, $source, $biotype);
1995  }
1996  return [];
1997 }
1998 
1999 =head2 get_all_Genes_by_type
2000 
2001  Arg [1] : string $type
2002  The biotype of genes wanted.
2003  Arg [2] : (optional) string $logic_name
2004  Arg [3] : (optional) boolean $load_transcripts
2005  If set to true, transcripts will be loaded immediately rather
2006  than being lazy-loaded on request. This will result in a
2007  significant speed up if the Transcripts and Exons are going to
2008  be used (but a slow down if they are not).
2009  Example : @genes = @{$slice->get_all_Genes_by_type('protein_coding',
2010  'ensembl')};
2011  Description: Retrieves genes that overlap this slice of biotype $type.
2012  This is primarily used by the genebuilding code when several
2013  biotypes of genes are used.
2014 
2015  The logic name is the analysis of the genes that are retrieved.
2016  If not provided all genes will be retrieved instead. Both
2017  positive and negative strand Genes will be returned.
2018 
2019  Returntype : listref of Bio::EnsEMBL::Genes
2020  Exceptions : none
2021  Caller : genebuilder, general
2022  Status : Stable
2023 
2024 =cut
2025 
2026 sub get_all_Genes_by_type {
2027  my ($self, $type, $logic_name, $load_transcripts) = @_;
2028  return $self->get_all_Genes($logic_name, undef, $load_transcripts, undef, $type);
2029 }
2030 
2031 
2032 =head2 get_all_Genes_by_source
2033 
2034  Arg [1] : string source
2035  Arg [2] : (optional) boolean $load_transcripts
2036  If set to true, transcripts will be loaded immediately rather
2037  than being lazy-loaded on request. This will result in a
2038  significant speed up if the Transcripts and Exons are going to
2039  be used (but a slow down if they are not).
2040  Example : @genes = @{$slice->get_all_Genes_by_source('ensembl')};
2041  Description: Retrieves genes that overlap this slice of source $source.
2042  Strand of the Slice does not affect the result.
2043  Returntype : listref of Bio::EnsEMBL::Genes
2044  Exceptions : none
2045  Caller : general
2046  Status : Stable
2047 
2048 =cut
2049 
2050 sub get_all_Genes_by_source {
2051  my ($self, $source, $load_transcripts) = @_;
2052  return $self->get_all_Genes(undef, undef, $load_transcripts, $source);
2053 }
2054 
2055 =head2 get_all_Transcripts
2056 
2057  Arg [1] : (optional) boolean $load_exons
2058  If set to true exons will not be lazy-loaded but will instead
2059  be loaded right away. This is faster if the exons are
2060  actually going to be used right away.
2061  Arg [2] : (optional) string $logic_name
2062  the logic name of the type of features to obtain
2063  Arg [3] : (optional) string $db_type
2064  Example : @transcripts = @{$slice->get_all_Transcripts)_};
2065  Description: Gets all transcripts which overlap this slice. If you want to
2066  specify a particular analysis or type, then you are better off
2067  using get_all_Genes or get_all_Genes_by_type and iterating
2068  through the transcripts of each gene. Strand of the Slice is
2069  ignored.
2070  Returntype : reference to a list of Bio::EnsEMBL::Transcripts
2071  Exceptions : none
2072  Caller : general
2073  Status : Stable
2074 
2075 =cut
2076 
2077 sub get_all_Transcripts {
2078  my ($self, $load_exons, $logic_name, $dbtype, $source, $biotype) = @_;
2079  if(my $adaptor = $self->_get_CoreAdaptor('Transcript', $dbtype)) {
2080  return $adaptor->fetch_all_by_Slice($self, $load_exons, $logic_name, undef, $source, $biotype);
2081  }
2082  return [];
2083 }
2084 
2085 =head2 get_all_Transcripts_by_type
2086 
2087  Arg [1] : string $type
2088  The biotype of transcripts wanted.
2089  Arg [2] : (optional) string $logic_name
2090  Arg [3] : (optional) boolean $load_exons
2091  If set to true exons will not be lazy-loaded but will instead
2092  be loaded right away. This is faster if the exons are
2093  actually going to be used right away.
2094 
2095  Example : @transcripts = @{$slice->get_all_Transcripts_by_type('protein_coding',
2096  'ensembl')};
2097  Description: Retrieves transcripts that overlap this slice of biotype $type.
2098  This is primarily used by the genebuilding code when several
2099  biotypes of transcripts are used.
2100 
2101  The logic name is the analysis of the transcripts that are retrieved.
2102  If not provided all transcripts will be retrieved instead. Both
2103  positive and negative strand transcripts will be returned.
2104 
2105  Returntype : listref of Bio::EnsEMBL::Transcripts
2106  Exceptions : none
2107  Caller : genebuilder, general
2108  Status : Stable
2109 
2110 =cut
2111 
2112 sub get_all_Transcripts_by_type {
2113  my ($self, $biotype, $logic_name, $load_exons) = @_;
2114  return $self->get_all_Transcripts($load_exons, $logic_name, undef, undef, $biotype);
2115 }
2116 
2117 
2118 =head2 get_all_Transcripts_by_source
2119 
2120  Arg [1] : string source
2121  Arg [2] : (optional) boolean $load_exons
2122  If set to true exons will not be lazy-loaded but will instead
2123  be loaded right away. This is faster if the exons are
2124  actually going to be used right away.
2125  Example : @transcripts = @{$slice->get_all_Transcripts_by_source('ensembl')};
2126  Description: Retrieves transcripts that overlap this slice of source $source.
2127  Strand of the Slice does not affect the result.
2128  Returntype : listref of Bio::EnsEMBL::Transcripts
2129  Exceptions : none
2130  Caller : general
2131  Status : Stable
2132 
2133 =cut
2134 
2135 sub get_all_Transcripts_by_source {
2136  my ($self, $source, $load_exons) = @_;
2137  return $self->get_all_Transcripts($load_exons, undef, undef, $source);
2138 
2139 }
2140 
2141 
2142 =head2 get_all_Exons
2143 
2144  Arg [1] : none
2145  Example : @exons = @{$slice->get_all_Exons};
2146  Description: Gets all exons which overlap this slice. Note that these exons
2147  will not be associated with any transcripts, so this may not
2148  be terribly useful.
2149  Returntype : reference to a list of Bio::EnsEMBL::Exons
2150  Exceptions : none
2151  Caller : general
2152  Status : Stable
2153 
2154 =cut
2155 
2156 sub get_all_Exons {
2157  my $self = shift;
2158  if(!$self->adaptor()) {
2159  warning('Cannot get Exons without attached adaptor');
2160  return [];
2161  }
2162  return $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self);
2163 }
2164 
2165 =head2 get_all_KaryotypeBands
2166 
2167  Arg [1] : none
2168  Example : @kary_bands = @{$slice->get_all_KaryotypeBands};
2169  Description: Retrieves the karyotype bands which this slice overlaps.
2170  Returntype : listref oif Bio::EnsEMBL::KaryotypeBands
2171  Exceptions : none
2172  Caller : general, contigview
2173  Status : Stable
2174 
2175 =cut
2176 
2177 sub get_all_KaryotypeBands {
2178  my ($self) = @_;
2179  if (my $adaptor = $self->_get_CoreAdaptor('KaryotypeBand')) {
2180  return $adaptor->fetch_all_by_Slice($self);
2181  }
2182  return [];
2183 }
2184 
2185 =head2 get_repeatmasked_seq
2186 
2187  Arg [1] : listref of strings $logic_names (optional)
2188  Arg [2] : int $soft_masking_enable (optional)
2189  Arg [3] : hash reference $not_default_masking_cases (optional, default is {})
2190  The values are 0 or 1 for hard and soft masking respectively
2191  The keys of the hash should be of 2 forms
2192  "repeat_class_" . $repeat_consensus->repeat_class,
2193  e.g. "repeat_class_SINE/MIR"
2194  "repeat_name_" . $repeat_consensus->name
2195  e.g. "repeat_name_MIR"
2196  depending on which base you want to apply the not default
2197  masking either the repeat_class or repeat_name. Both can be
2198  specified in the same hash at the same time, but in that case,
2199  repeat_name setting has priority over repeat_class. For example,
2200  you may have hard masking as default, and you may want soft
2201  masking of all repeat_class SINE/MIR, but repeat_name AluSp
2202  (which are also from repeat_class SINE/MIR).
2203  Your hash will be something like {"repeat_class_SINE/MIR" => 1,
2204  "repeat_name_AluSp" => 0}
2205  Example : $rm_slice = $slice->get_repeatmasked_seq();
2206  $softrm_slice = $slice->get_repeatmasked_seq(['RepeatMask'],1);
2207  Description: Returns Bio::EnsEMBL::Slice that can be used to create repeat
2208  masked sequence instead of the regular sequence.
2209  Sequence returned by this new slice will have repeat regions
2210  hardmasked by default (sequence replaced by N) or
2211  or soft-masked when arg[2] = 1 (sequence in lowercase)
2212  Will only work with database connection to get repeat features.
2213  Returntype : Bio::EnsEMBL::RepeatMaskedSlice
2214  Exceptions : none
2215  Caller : general
2216  Status : Stable
2217 
2218 =cut
2219 
2220 sub get_repeatmasked_seq {
2221  my ($self,$logic_names,$soft_mask,$not_default_masking_cases) = @_;
2222 
2224  (-START => $self->{'start'},
2225  -END => $self->{'end'},
2226  -STRAND => $self->{'strand'},
2227  -ADAPTOR => $self->adaptor(),
2228  -SEQ => $self->{'seq'},
2229  -SEQ_REGION_NAME => $self->{'seq_region_name'},
2230  -SEQ_REGION_LENGTH => $self->{'seq_region_length'},
2231  -COORD_SYSTEM => $self->{'coord_system'},
2232  -REPEAT_MASK => $logic_names,
2233  -SOFT_MASK => $soft_mask,
2234  -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases);
2235 }
2236 
2237 
2238 
2239 =head2 _mask_features
2240 
2241  Arg [1] : reference to a string $dnaref
2242  Arg [2] : array_ref $repeats
2243  reference to a list Bio::EnsEMBL::RepeatFeature
2244  give the list of coordinates to replace with N or with
2245  lower case
2246  Arg [3] : int $soft_masking_enable (optional)
2247  Arg [4] : hash reference $not_default_masking_cases (optional, default is {})
2248  The values are 0 or 1 for hard and soft masking respectively
2249  The keys of the hash should be of 2 forms
2250  "repeat_class_" . $repeat_consensus->repeat_class,
2251  e.g. "repeat_class_SINE/MIR"
2252  "repeat_name_" . $repeat_consensus->name
2253  e.g. "repeat_name_MIR"
2254  depending on which base you want to apply the not default masking either
2255  the repeat_class or repeat_name. Both can be specified in the same hash
2256  at the same time, but in that case, repeat_name setting has priority over
2257  repeat_class. For example, you may have hard masking as default, and
2258  you may want soft masking of all repeat_class SINE/MIR,
2259  but repeat_name AluSp (which are also from repeat_class SINE/MIR).
2260  Your hash will be something like {"repeat_class_SINE/MIR" => 1,
2261  "repeat_name_AluSp" => 0}
2262  Example : none
2263  Description: replaces string positions described in the RepeatFeatures
2264  with Ns (default setting), or with the lower case equivalent
2265  (soft masking). The reference to a dna string which is passed
2266  is changed in place.
2267  Returntype : none
2268  Exceptions : none
2269  Caller : seq
2270  Status : Stable
2271 
2272 =cut
2273 
2274 sub _mask_features {
2275  my ($self,$dnaref,$repeats,$soft_mask,$not_default_masking_cases) = @_;
2276 
2277  $soft_mask = 0 unless (defined $soft_mask);
2278  $not_default_masking_cases = {} unless (defined $not_default_masking_cases);
2279 
2280  # explicit CORE::length call, to avoid any confusion with the Slice
2281  # length method
2282  my $dnalen = CORE::length($$dnaref);
2283 
2284  REP:foreach my $old_f (@{$repeats}) {
2285  my $f = $old_f->transfer( $self );
2286  my $start = $f->start;
2287  my $end = $f->end;
2288  my $length = ($end - $start) + 1;
2289 
2290  # check if we get repeat completely outside of expected slice range
2291  if ($end < 1 || $start > $dnalen) {
2292  # warning("Unexpected: Repeat completely outside slice coordinates.");
2293  next REP;
2294  }
2295 
2296  # repeat partly outside slice range, so correct
2297  # the repeat start and length to the slice size if needed
2298  if ($start < 1) {
2299  $start = 1;
2300  $length = ($end - $start) + 1;
2301  }
2302 
2303  # repeat partly outside slice range, so correct
2304  # the repeat end and length to the slice size if needed
2305  if ($end > $dnalen) {
2306  $end = $dnalen;
2307  $length = ($end - $start) + 1;
2308  }
2309 
2310  $start--;
2311 
2312  my $padstr;
2313  # if we decide to define masking on the base of the repeat_type, we'll need
2314  # to add the following, and the other commented line few lines below.
2315  my $rc_class;
2316  my $rc_name;
2317 
2318  if ($f->isa('Bio::EnsEMBL::RepeatFeature')) {
2319  $rc_class = "repeat_class_" . $f->repeat_consensus->repeat_class;
2320  $rc_name = "repeat_name_" . $f->repeat_consensus->name;
2321  }
2322 
2323  my $masking_type;
2324  $masking_type = $not_default_masking_cases->{$rc_class} if (defined $not_default_masking_cases->{$rc_class});
2325  $masking_type = $not_default_masking_cases->{$rc_name} if (defined $not_default_masking_cases->{$rc_name});
2326 
2327  $masking_type = $soft_mask unless (defined $masking_type);
2328 
2329  if ($masking_type) {
2330  $padstr = lc substr ($$dnaref,$start,$length);
2331  } else {
2332  $padstr = 'N' x $length;
2333  }
2334  substr ($$dnaref,$start,$length) = $padstr;
2335  }
2336 }
2337 
2338 
2339 =head2 get_all_SearchFeatures
2340 
2341  Arg [1] : scalar $ticket_ids
2342  Example : $slice->get_all_SearchFeatures('BLA_KpUwwWi5gY');
2343  Description: Retrieves all search features for stored blast
2344  results for the ticket that overlap this slice
2345  Returntype : listref of Bio::EnsEMBL::SeqFeatures
2346  Exceptions : none
2347  Caller : general (webby!)
2348  Status : Stable
2349 
2350 =cut
2351 
2352 sub get_all_SearchFeatures {
2353  my $self = shift;
2354  my $ticket = shift;
2355  local $_;
2356  unless($ticket) {
2357  throw("ticket argument is required");
2358  }
2359 
2360  if(!$self->adaptor()) {
2361  warning("Cannot get SearchFeatures without an attached adaptor");
2362  return [];
2363  }
2364 
2365  my $sfa = $self->adaptor()->db()->get_db_adaptor('blast');
2366 
2367  my $offset = $self->start-1;
2368 
2369  my $features = $sfa ? $sfa->get_all_SearchFeatures($ticket, $self->seq_region_name, $self->start, $self->end) : [];
2370 
2371  foreach( @$features ) {
2372  $_->start( $_->start - $offset );
2373  $_->end( $_->end - $offset );
2374  };
2375  return $features;
2376 
2377 }
2378 
2379 =head2 get_all_AssemblyExceptionFeatures
2380 
2381  Example : $slice->get_all_AssemblyExceptionFeatures();
2382  Description: Retrieves all misc features which overlap this slice. If
2383  a set code is provided only features which are members of
2384  the requested set are returned.
2385  Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures
2386  Exceptions : none
2387  Caller : general
2388  Status : Stable
2389 
2390 =cut
2391 
2392 sub get_all_AssemblyExceptionFeatures {
2393  my ($self) = @_;
2394  if(my $adaptor = $self->_get_CoreAdaptor('AssemblyExceptionFeature')) {
2395  return $adaptor->fetch_all_by_Slice($self);
2396  }
2397  return [];
2398 }
2399 
2400 
2401 
2402 =head2 get_all_MiscFeatures
2403 
2404  Arg [1] : string $set (optional)
2405  Arg [2] : string $database (optional)
2406  Example : $slice->get_all_MiscFeatures('cloneset');
2407  Description: Retrieves all misc features which overlap this slice. If
2408  a set code is provided only features which are members of
2409  the requested set are returned.
2410  Returntype : listref of Bio::EnsEMBL::MiscFeatures
2411  Exceptions : none
2412  Caller : general
2413  Status : Stable
2414 
2415 =cut
2416 
2417 sub get_all_MiscFeatures {
2418  my ($self, $misc_set, $dbtype) = @_;
2419  if(my $adaptor = $self->_get_CoreAdaptor('MiscFeature', $dbtype)) {
2420  if($misc_set) {
2421  return $adaptor->fetch_all_by_Slice_and_set_code($self,$misc_set);
2422  }
2423  return $adaptor->fetch_all_by_Slice($self);
2424  }
2425  return [];
2426 }
2427 
2428 =head2 get_all_MarkerFeatures
2429 
2430  Arg [1] : (optional) string logic_name
2431  The logic name of the marker features to retrieve
2432  Arg [2] : (optional) int $priority
2433  Lower (exclusive) priority bound of the markers to retrieve
2434  Arg [3] : (optional) int $map_weight
2435  Upper (exclusive) priority bound of the markers to retrieve
2436  Example : my @markers = @{$slice->get_all_MarkerFeatures(undef,50, 2)};
2437  Description: Retrieves all markers which lie on this slice fulfilling the
2438  specified map_weight and priority parameters (if supplied).
2439  Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
2440  Exceptions : none
2441  Caller : contigview, general
2442  Status : Stable
2443 
2444 =cut
2445 
2446 sub get_all_MarkerFeatures {
2447  my ($self, $logic_name, $priority, $map_weight) = @_;
2448  if(my $adaptor = $self->_get_CoreAdaptor('MarkerFeature')) {
2449  return $adaptor->fetch_all_by_Slice_and_priority($self, $priority, $map_weight, $logic_name);
2450  }
2451  return [];
2452 }
2453 
2454 
2455 =head2 get_MarkerFeatures_by_Name
2456 
2457  Arg [1] : string marker Name
2458  The name (synonym) of the marker feature(s) to retrieve
2459  Example : my @markers = @{$slice->get_MarkerFeatures_by_Name('z1705')};
2460  Description: Retrieves all markers with this ID
2461  Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
2462  Exceptions : none
2463  Caller : contigview, general
2464  Status : Stable
2465 
2466 =cut
2467 
2468 sub get_MarkerFeatures_by_Name {
2469  my ($self, $name) = @_;
2470  if(my $adaptor = $self->_get_CoreAdaptor('MarkerFeature')) {
2471  return $adaptor->fetch_all_by_Slice_and_MarkerName($self, $name);
2472  }
2473  return [];
2474 }
2475 
2476 
2477 =head2 get_all_compara_DnaAlignFeatures
2478 
2479  Arg [1] : string $qy_species
2480  The name of the species to retrieve similarity features from
2481  Arg [2] : string $qy_assembly
2482  The name of the assembly to retrieve similarity features from
2483  Arg [3] : string $type
2484  The type of the alignment to retrieve similarity features from
2485  Arg [4] : <optional> compara dbadptor to use.
2486  Example : $fs = $slc->get_all_compara_DnaAlignFeatures('Mus musculus',
2487  'MGSC3',
2488  'WGA');
2489  Description: Retrieves a list of DNA-DNA Alignments to the species specified
2490  by the $qy_species argument.
2491  The compara database must be attached to the core database
2492  for this call to work correctly. As well the compara database
2493  must have the core dbadaptors for both this species, and the
2494  query species added to function correctly.
2495  Returntype : reference to a list of Bio::EnsEMBL::DnaDnaAlignFeatures
2496  Exceptions : warning if compara database is not available
2497  Caller : contigview
2498  Status : Stable
2499 
2500 =cut
2501 
2502 sub get_all_compara_DnaAlignFeatures {
2503  my ($self, $qy_species, $qy_assembly, $alignment_type, $compara_db) = @_;
2504 
2505  if(!$self->adaptor()) {
2506  warning("Cannot retrieve DnaAlignFeatures without attached adaptor");
2507  return [];
2508  }
2509 
2510  unless($qy_species && $alignment_type # && $qy_assembly
2511  ) {
2512  throw("Query species and assembly and alignmemt type arguments are required");
2513  }
2514 
2515  if(!defined($compara_db)){
2516  $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara");
2517  }
2518  unless($compara_db) {
2519  warning("Compara database must be attached to core database or passed ".
2520  "as an argument to " .
2521  "retrieve compara information");
2522  return [];
2523  }
2524 
2525  my $dafa = $compara_db->get_DnaAlignFeatureAdaptor;
2526  return $dafa->fetch_all_by_Slice($self, $qy_species, $qy_assembly, $alignment_type);
2527 }
2528 
2529 =head2 get_all_compara_Syntenies
2530 
2531  Arg [1] : string $query_species e.g. "Mus_musculus" or "Mus musculus"
2532  Arg [2] : string $method_link_type, default is "SYNTENY"
2533  Arg [3] : <optional> compara dbadaptor to use.
2534  Description: gets all the compara syntenyies for a specfic species
2535  Returns : arrayref of Bio::EnsEMBL::Compara::SyntenyRegion
2536  Status : Stable
2537 
2538 =cut
2539 
2540 sub get_all_compara_Syntenies {
2541  my ($self, $qy_species, $method_link_type, $compara_db) = @_;
2542 
2543  if(!$self->adaptor()) {
2544  warning("Cannot retrieve features without attached adaptor");
2545  return [];
2546  }
2547 
2548  unless($qy_species) {
2549  throw("Query species and assembly arguments are required");
2550  }
2551 
2552  unless (defined $method_link_type) {
2553  $method_link_type = "SYNTENY";
2554  }
2555 
2556  if(!defined($compara_db)){
2557  $compara_db = Bio::EnsEMBL::Registry->get_DBAdaptor("compara", "compara");
2558  }
2559  unless($compara_db) {
2560  warning("Compara database must be attached to core database or passed ".
2561  "as an argument to " .
2562  "retrieve compara information");
2563  return [];
2564  }
2565  my $gdba = $compara_db->get_GenomeDBAdaptor();
2566  my $mlssa = $compara_db->get_MethodLinkSpeciesSetAdaptor();
2567  my $sra = $compara_db->get_SyntenyRegionAdaptor();
2568 
2569  my $this_gdb = $gdba->fetch_by_core_DBAdaptor($self->adaptor()->db());
2570  my $query_gdb = $gdba->fetch_by_registry_name($qy_species);
2571  my $mlss;
2572  if($this_gdb eq $query_gdb) {
2573  $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb]);
2574  } else {
2575  $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb, $query_gdb]);
2576  }
2577 
2578  return $sra->fetch_all_by_MethodLinkSpeciesSet_Slice($mlss, $self);
2579 }
2580 
2581 =head2 get_all_Haplotypes
2582 
2583  Arg [1] : (optional) boolean $lite_flag
2584  if true lightweight haplotype objects are used
2585  Example : @haplotypes = $slice->get_all_Haplotypes;
2586  Description: Retrieves all of the haplotypes on this slice. Only works
2587  if the haplotype adaptor has been attached to the core adaptor
2588  via $dba->add_db_adaptor('haplotype', $hdba);
2589  Returntype : listref of Bio::EnsEMBL::External::Haplotype::Haplotypes
2590  Exceptions : warning is Haplotype database is not available
2591  Caller : contigview, general
2592  Status : Stable
2593 
2594 =cut
2595 
2596 sub get_all_Haplotypes {
2597  my($self, $lite_flag) = @_;
2598 
2599  if(!$self->adaptor()) {
2600  warning("Cannot retrieve features without attached adaptor");
2601  return [];
2602  }
2603 
2604  my $haplo_db = $self->adaptor->db->get_db_adaptor('haplotype');
2605 
2606  unless($haplo_db) {
2607  warning("Haplotype database must be attached to core database to " .
2608  "retrieve haplotype information" );
2609  return [];
2610  }
2611 
2612  my $haplo_adaptor = $haplo_db->get_HaplotypeAdaptor;
2613 
2614  my $haplotypes = $haplo_adaptor->fetch_all_by_Slice($self, $lite_flag);
2615 
2616  return $haplotypes;
2617 }
2618 
2619 
2620 sub get_all_DASFactories {
2621  my $self = shift;
2622  return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
2623 }
2624 
2625 sub get_all_DASFeatures_dsn {
2626  my ($self, $source_type, $dsn) = @_;
2627 
2628  if(!$self->adaptor()) {
2629  warning("Cannot retrieve features without attached adaptor");
2630  return [];
2631  }
2632  my @X = grep { $_->adaptor->dsn eq $dsn } $self->adaptor()->db()->_each_DASFeatureFactory;
2633 
2634  return [ $X[0]->fetch_all_Features( $self, $source_type ) ];
2635 }
2636 
2637 =head2 get_all_DAS_Features
2638 
2639  Arg [1] : none
2640  Example : $features = $slice->get_all_DASFeatures;
2641  Description: Retrieves a hash reference to a hash of DAS feature
2642  sets, keyed by the DNS, NOTE the values of this hash
2643  are an anonymous array containing:
2644  (1) a pointer to an array of features;
2645  (2) a pointer to the DAS stylesheet
2646  Returntype : hashref of Bio::SeqFeatures
2647  Exceptions : ?
2648  Caller : webcode
2649  Status : Stable
2650 
2651 =cut
2652 sub get_all_DAS_Features{
2653  my ($self) = @_;
2654 
2655  $self->{_das_features} ||= {}; # Cache
2656  $self->{_das_styles} ||= {}; # Cache
2657  $self->{_das_segments} ||= {}; # Cache
2658  my %das_features;
2659  my %das_styles;
2660  my %das_segments;
2661  my $slice = $self;
2662 
2663  foreach my $dasfact( @{$self->get_all_DASFactories} ){
2664  my $dsn = $dasfact->adaptor->dsn;
2665  my $name = $dasfact->adaptor->name;
2666 # my $type = $dasfact->adaptor->type;
2667  my $url = $dasfact->adaptor->url;
2668 
2669  my ($type) = $dasfact->adaptor->mapping;
2670  if (ref $type eq 'ARRAY') {
2671  $type = shift @$type;
2672  }
2673  $type ||= $dasfact->adaptor->type;
2674  # Construct a cache key : SOURCE_URL/TYPE
2675  # Need the type to handle sources that serve multiple types of features
2676 
2677  my $key = join('/', $name, $type);
2678  if( $self->{_das_features}->{$key} ){ # Use cached
2679  $das_features{$name} = $self->{_das_features}->{$key};
2680  $das_styles{$name} = $self->{_das_styles}->{$key};
2681  $das_segments{$name} = $self->{_das_segments}->{$key};
2682  } else { # Get fresh data
2683  my ($featref, $styleref, $segref) = $dasfact->fetch_all_Features( $slice, $type );
2684  $self->{_das_features}->{$key} = $featref;
2685  $self->{_das_styles}->{$key} = $styleref;
2686  $self->{_das_segments}->{$key} = $segref;
2687  $das_features{$name} = $featref;
2688  $das_styles{$name} = $styleref;
2689  $das_segments{$name} = $segref;
2690  }
2691  }
2692 
2693  return (\%das_features, \%das_styles, \%das_segments);
2694 }
2695 
2696 sub get_all_DASFeatures{
2697  my ($self, $source_type) = @_;
2698 
2699 
2700  if(!$self->adaptor()) {
2701  warning("Cannot retrieve features without attached adaptor");
2702  return [];
2703  }
2704 
2705  my %genomic_features = map { ( $_->adaptor->dsn => [ $_->fetch_all_Features($self, $source_type) ] ) } $self->adaptor()->db()->_each_DASFeatureFactory;
2706  return \%genomic_features;
2707 
2708 }
2709 
2710 sub old_get_all_DASFeatures{
2711  my ($self,@args) = @_;
2712 
2713  if(!$self->adaptor()) {
2714  warning("Cannot retrieve features without attached adaptor");
2715  return [];
2716  }
2717 
2718  my %genomic_features =
2719  map { ( $_->adaptor->dsn => [ $_->fetch_all_by_Slice($self) ] ) }
2720  $self->adaptor()->db()->_each_DASFeatureFactory;
2721  return \%genomic_features;
2722 
2723 }
2724 
2725 
2726 =head2 get_all_ExternalFeatures
2727 
2728  Arg [1] : (optional) string $track_name
2729  If specified only features from ExternalFeatureAdaptors with
2730  the track name $track_name are retrieved.
2731  If not set, all features from every ExternalFeatureAdaptor are
2732  retrieved.
2733  Example : @x_features = @{$slice->get_all_ExternalFeatures}
2734  Description: Retrieves features on this slice from external feature adaptors
2735  Returntype : listref of Bio::SeqFeatureI implementing objects in slice
2736  coordinates
2737  Exceptions : none
2738  Caller : general
2739  Status : Stable
2740 
2741 =cut
2742 
2743 sub get_all_ExternalFeatures {
2744  my ($self, $track_name) = @_;
2745 
2746  if(!$self->adaptor()) {
2747  warning("Cannot retrieve features without attached adaptor");
2748  return [];
2749  }
2750 
2751  my $features = [];
2752 
2753  my $xfa_hash = $self->adaptor->db->get_ExternalFeatureAdaptors;
2754  my @xf_adaptors = ();
2755 
2756  if($track_name) {
2757  #use a specific adaptor
2758  if(exists $xfa_hash->{$track_name}) {
2759  push @xf_adaptors, $xfa_hash->{$track_name};
2760  }
2761  } else {
2762  #use all of the adaptors
2763  push @xf_adaptors, values %$xfa_hash;
2764  }
2765 
2766 
2767  foreach my $xfa (@xf_adaptors) {
2768  push @$features, @{$xfa->fetch_all_by_Slice($self)};
2769  }
2770 
2771  return $features;
2772 }
2773 
2774 
2775 =head2 get_all_DitagFeatures
2776 
2777  Arg [1] : (optional) string ditag type
2778  Arg [1] : (optional) string logic_name
2779  Example : @dna_dna_align_feats = @{$slice->get_all_DitagFeatures};
2780  Description: Retrieves the DitagFeatures of a specific type which overlap
2781  this slice. If type is not defined, all features are retrieved.
2782  Strandedness of the Slice is ignored.
2783  Returntype : listref of Bio::EnsEMBL::DitagFeatures
2784  Exceptions : warning if slice does not have attached adaptor
2785  Caller : general
2786  Status : Stable
2787 
2788 =cut
2789 
2790 sub get_all_DitagFeatures {
2791  my ($self, $type, $logic_name) = @_;
2792  if(my $adaptor = $self->_get_CoreAdaptor('DitagFeature')) {
2793  return $adaptor->fetch_all_by_Slice($self, $type, $logic_name);
2794  }
2795  return [];
2796 }
2797 
2798 # GENERIC FEATURES (See DBAdaptor.pm)
2799 
2800 =head2 get_generic_features
2801 
2802  Arg [1] : (optional) List of names of generic feature types to return.
2803  If no feature names are given, all generic features are
2804  returned.
2805  Example : my %features = %{$slice->get_generic_features()};
2806  Description: Gets generic features via the generic feature adaptors that
2807  have been added via DBAdaptor->add_GenricFeatureAdaptor (if
2808  any)
2809  Returntype : Hash of named features.
2810  Exceptions : none
2811  Caller : none
2812  Status : Stable
2813 
2814 =cut
2815 
2816 sub get_generic_features {
2817 
2818  my ($self, @names) = @_;
2819 
2820  if(!$self->adaptor()) {
2821  warning('Cannot retrieve features without attached adaptor');
2822  return [];
2823  }
2824 
2825  my $db = $self->adaptor()->db();
2826 
2827  my %features = (); # this will hold the results
2828 
2829  # get the adaptors for each feature
2830  my %adaptors = %{$db->get_GenericFeatureAdaptors(@names)};
2831 
2832  foreach my $adaptor_name (keys(%adaptors)) {
2833 
2834  my $adaptor_obj = $adaptors{$adaptor_name};
2835  # get the features and add them to the hash
2836  my $features_ref = $adaptor_obj->fetch_all_by_Slice($self);
2837  # add each feature to the hash to be returned
2838  foreach my $feature (@$features_ref) {
2839  $features{$adaptor_name} = $feature;
2840  }
2841  }
2842 
2843  return \%features;
2844 
2845 }
2846 
2847 =head2 project_to_slice
2848 
2849  Arg [1] : Slice to project to.
2850  Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice);
2851  foreach my $segment ( @$chr_projection ){
2852  $chr_slice = $segment->to_Slice();
2853  print $clone_slice->seq_region_name(). ':'. $segment->from_start(). '-'.
2854  $segment->from_end(). ' -> '.$chr_slice->seq_region_name(). ':'. $chr_slice->start().
2855  '-'.$chr_slice->end().
2856  $chr_slice->strand(). " length: ".($chr_slice->end()-$chr_slice->start()+1). "\n";
2857  }
2858  Description: Projection of slice to another specific slice. Needed for where we have multiple mappings
2859  and we want to state which one to project to.
2860  Returntype : list reference of Bio::EnsEMBL::ProjectionSegment objects which
2861  can also be used as [$start,$end,$slice] triplets.
2862  Exceptions : none
2863  Caller : none
2864  Status : At Risk
2865 
2866 =cut
2867 
2868 sub project_to_slice {
2869  my $self = shift;
2870  my $to_slice = shift;
2871 
2872  throw('Slice argument is required') if(!$to_slice);
2873 
2874  my $slice_adaptor = $self->adaptor();
2875 
2876  if(!$slice_adaptor) {
2877  warning("Cannot project without attached adaptor.");
2878  return [];
2879  }
2880 
2881 
2882  my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor();
2883 
2884  my $cs = $to_slice->coord_system();
2885  my $slice_cs = $self->coord_system();
2886  my $to_slice_id = $to_slice->get_seq_region_id;
2887 
2888  my @projection;
2889 
2890  # decompose this slice into its symlinked components.
2891  # this allows us to handle haplotypes and PARs
2892  my $normal_slice_proj =
2893  $slice_adaptor->fetch_normalized_slice_projection($self);
2894  foreach my $segment (@$normal_slice_proj) {
2895  my $normal_slice = $segment->[2];
2896 
2897  $slice_cs = $normal_slice->coord_system();
2898 
2899  my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor();
2900  my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
2901  next unless defined $asm_mapper;
2902 
2903  # perform the mapping between this slice and the requested system
2904  my @coords = $asm_mapper->map($normal_slice->seq_region_name(),
2905  $normal_slice->start(),
2906  $normal_slice->end(),
2907  $normal_slice->strand(),
2908  $slice_cs, undef, $to_slice, 1);
2909 
2910  #construct a projection from the mapping results and return it
2911  foreach my $coord (@coords) {
2912  my $original = $coord->{original};
2913  my $mapped = $coord->{mapped};
2914 
2915  # skip gaps
2916  next unless $mapped->isa('Bio::EnsEMBL::Mapper::Coordinate');
2917 
2918  my $mapped_start = $mapped->start();
2919  my $mapped_end = $mapped->end();
2920  my ($current_start, $current_end);
2921  if ($self->strand == 1) {
2922  $current_start = $original->start() - $self->start + 1;
2923  $current_end = $original->end() - $self->start + 1;
2924  } else {
2925  $current_start = $self->end() - $original->end + 1;
2926  $current_end = $self->end - $original->start + 1;
2927  }
2928 
2929  # for multiple mappings only get the correct one
2930  next unless $mapped->id == $to_slice_id;
2931 
2932  my $coord_cs = $mapped->coord_system();
2933 
2934  # If the normalised projection just ended up mapping to the
2935  # same coordinate system we were already in then we should just
2936  # return the original region. This can happen for example, if we
2937  # were on a PAR region on Y which refered to X and a projection to
2938  # 'toplevel' was requested.
2939  # if($coord_cs->equals($slice_cs)) {
2940  # # trim off regions which are not defined
2941  # return $self->_constrain_to_region();
2942  # }
2943 
2944  # create slices for the mapped-to coord system
2945  my $slice = $slice_adaptor->fetch_by_seq_region_id($mapped->id(),
2946  $mapped_start,
2947  $mapped_end,
2948  $mapped->strand());
2949 
2950  push @projection, bless([$current_start, $current_end, $slice],
2951  "Bio::EnsEMBL::ProjectionSegment");
2952  }
2953  }
2954 
2955 
2956  # delete the cache as we may want to map to different set next time and old
2957  # results will be cached.
2958  $mapper_aptr->delete_cache;
2959 
2960  return \@projection;
2961 }
2962 
2963 
2964 =head2 get_all_synonyms
2965 
2966  Args [1] : String external_db_name The name of the database to retrieve
2967  the synonym for
2968  Args [2] : (optional) Integer external_db_version Optionally restrict
2969  results from external_db_name to a specific version of
2970  the the specified external database
2971  Example : my @alternative_names = @{$slice->get_all_synonyms()};
2972  @alternative_names = @{$slice->get_all_synonyms('EMBL')};
2973  Description: get a list of alternative names for this slice
2974  Returntype : reference to list of SeqRegionSynonym objects.
2975  Exception : none
2976  Caller : general
2977  Status : At Risk
2978 
2979 =cut
2980 
2981 sub get_all_synonyms {
2982  my ($self, $external_db_name, $external_db_version) = @_;
2983 
2984  if ( !defined( $self->{'synonym'} ) ) {
2985  my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
2986  $self->{'synonym'} =
2987  $adap->get_synonyms( $self->get_seq_region_id($self) );
2988  }
2989 
2990  if(! $external_db_name) {
2991  return $self->{'synonym'};
2992  }
2993  my @args = ($external_db_version) ?
2994  ($external_db_name, $external_db_version) :
2995  ($external_db_name, undef, 1);
2996  my $external_db_id = $self->adaptor->db()->get_DBEntryAdaptor()->get_external_db_id(@args);
2997  if(!$external_db_id) {
2998  my $extra = ($external_db_version) ? "and version $external_db_version " : q{};
2999  throw "The external database $external_db_name ${extra}did not result in a valid identifier";
3000  }
3001 
3002  return [ grep { $_->external_db_id() == $external_db_id } @{$self->{synonym}} ];
3003 }
3004 
3005 =head2 add_synonym
3006 
3007  Args[0] : synonym.
3008  Example : $slice->add_synonym("alt_name");
3009  Description: add an alternative name for this slice
3010  Returntype : none
3011  Exception : none
3012  Caller : general
3013  Status : At Risk
3014 
3015 =cut
3016 
3017 sub add_synonym{
3018  my $self = shift;
3019  my $syn = shift;
3020  my $external_db_id = shift;
3021 
3022  my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
3023  if ( !defined( $self->{'synonym'} ) ) {
3024  $self->{'synonym'} = $self->get_all_synonyms();
3025  }
3026  my $new_syn = Bio::EnsEMBL::SeqRegionSynonym->new( #-adaptor => $adap,
3027  -synonym => $syn,
3028  -external_db_id => $external_db_id,
3029  -seq_region_id => $self->get_seq_region_id($self));
3030 
3031  push (@{$self->{'synonym'}}, $new_syn);
3032 
3033  return;
3034 }
3035 
3036 
3037 # package variable to minimize duplication
3038 my %region_so_mapping = (
3039  'chromosome' => 'SO:0000340',
3040  'supercontig' => 'SO:0000148',
3041  'scaffold' => 'SO:0000148',
3042  'contig' => 'SO:0000149'
3043 );
3044 
3045 =head2 feature_so_acc
3046 
3047  Example : $slice_so_acc = $slice->feature_so_acc;
3048  Description : This method returns a string containing the SO accession number of the slice, based on the coordinate system name.
3049  Returns : string (Sequence Ontology accession number)
3050 
3051 =cut
3052 
3053 sub feature_so_acc {
3054  my $self = shift;
3055 
3056  # return the region SO acc, or Slice acc
3057  return $region_so_mapping{$self->coord_system_name} // 'SO:0000001';
3058 }
3059 
3060 =head2 feature_so_term
3061 
3062  Description: This method returns a string containing the SO term of the slice, based on the coordinate system name
3063  Define constant SEQUENCE_ONTOLOGY in classes that require it, or override it for multiple possible values for a class.
3064  Returntype : String (Sequence Ontology term)
3065  Exceptions : Thrown if caller SEQUENCE_ONTOLOGY is undefined and is not a Bio::EnsEMBL::Slice
3066 
3067 =cut
3068 
3069 sub feature_so_term {
3070  my ($self) = shift;
3071 
3072  return defined($region_so_mapping{$self->coord_system_name}) ?
3073  $self->coord_system_name :
3074  'region';
3075 }
3076 
3077 =head2 summary_as_hash
3078 
3079  Example : $slice_summary = $slice->summary_as_hash();
3080  Description : Retrieves a textual summary of this slice.
3081  Returns : hashref of descriptive strings
3082 =cut
3083 
3084 sub summary_as_hash {
3085  my $self = shift;
3086  my %summary;
3087  my @aliases = map { $_->name } @{$self->slice->get_all_synonyms()};
3088 
3089  $summary{'seq_region_name'} = $self->seq_region_name;
3090  $summary{'id'} = $self->seq_region_name;
3091  $summary{'start'} = $self->start;
3092  $summary{'end'} = $self->end;
3093  $summary{'strand'} = 0;
3094  $summary{'source'} = $self->source || $self->coord_system->version;
3095  $summary{'Alias'} = \@aliases if scalar(@aliases);
3096  $summary{'Is_circular'} = $self->is_circular ? "true" : undef;
3097  return \%summary;
3098 }
3099 
3100 
3101 sub slice {
3102  my $self = shift;
3103  return $self;
3104 }
3105 
3106 #
3107 # Bioperl Bio::PrimarySeqI methods:
3108 #
3109 
3110 =head2 id
3111 
3112  Description: Included for Bio::PrimarySeqI interface compliance (0.7)
3113 
3114 =cut
3115 
3116 sub id { name(@_); }
3117 
3118 
3119 =head2 display_id
3120 
3121  Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3122 
3123 =cut
3124 
3125 sub display_id { name(@_); }
3126 
3127 
3128 =head2 primary_id
3129 
3130  Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3131 
3132 =cut
3133 
3134 sub primary_id { name(@_); }
3135 
3136 
3137 =head2 desc
3138 
3139 Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3140 
3141 =cut
3142 
3143 sub desc{ return $_[0]->coord_system->name().' '.$_[0]->seq_region_name(); }
3144 
3145 
3146 =head2 moltype
3147 
3148 Description: Included for Bio::PrimarySeqI interface compliance (0.7)
3149 
3150 =cut
3151 
3152 sub moltype { return 'dna'; }
3153 
3154 =head2 alphabet
3155 
3156  Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3157 
3158 =cut
3159 
3160 sub alphabet { return 'dna'; }
3161 
3162 
3163 =head2 accession_number
3164 
3165  Description: Included for Bio::PrimarySeqI interface compliance (1.2)
3166 
3167 =cut
3168 
3169 sub accession_number { name(@_); }
3170 
3171 1;
usage
public usage()
Bio::EnsEMBL::Slice::coord_system_name
public String coord_system_name()
Bio::EnsEMBL::Mapper::RangeRegistry
Definition: RangeRegistry.pm:51
EnsEMBL
Definition: Filter.pm:1
Bio::EnsEMBL::ProjectionSegment
Definition: ProjectionSegment.pm:27
map
public map()
Bio::EnsEMBL::CoordSystem::name
public String name()
Bio::EnsEMBL::DBSQL::MergedAdaptor
Definition: MergedAdaptor.pm:36
Bio::EnsEMBL::Utils::Sequence
Definition: Sequence.pm:22
accession
public accession()
Bio::EnsEMBL::CoordSystem
Definition: CoordSystem.pm:40
Bio::EnsEMBL::DBSQL::SliceAdaptor
Definition: SliceAdaptor.pm:78
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio::EnsEMBL::Slice::seq
public String seq()
Bio::EnsEMBL::SeqRegionSynonym
Definition: SeqRegionSynonym.pm:16
Bio::EnsEMBL::Registry
Definition: Registry.pm:113
Bio::EnsEMBL::Registry::get_DBAdaptor
public DBAdaptor get_DBAdaptor()
Bio::EnsEMBL::Slice::new
public Bio::EnsEMBL::Slice new()
Bio::EnsEMBL::Slice::seq_region_name
public String seq_region_name()
has_karyotype
public has_karyotype()
Bio::EnsEMBL::Slice::start
public Int start()
Bio::EnsEMBL::RepeatMaskedSlice::new
public Bio::EnsEMBL::RepeatMaskedSlice new()
Bio::EnsEMBL::Utils::Iterator
Definition: Iterator.pm:44
Bio::EnsEMBL::DBSQL::BaseAdaptor
Definition: BaseAdaptor.pm:71
get_seq_region_id
public get_seq_region_id()
Bio::EnsEMBL::RepeatFeature
Definition: RepeatFeature.pm:45
Bio::EnsEMBL::Attribute
Definition: Attribute.pm:34
Bio::EnsEMBL::Registry::get_db
public Adaptor get_db()
Bio::EnsEMBL::RepeatMaskedSlice
Definition: RepeatMaskedSlice.pm:28
Bio::EnsEMBL::Utils::Iterator::new
public Bio::EnsEMBL::Utils::Iterator new()
Bio::EnsEMBL::SeqRegionSynonym::new
public Bio::EnsEMBL::SeqRegionSynonym new()
Bio::EnsEMBL::PredictionTranscript
Definition: PredictionTranscript.pm:39
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68