3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
37 $sa = $db->get_SliceAdaptor;
40 $sa->fetch_by_region(
'chromosome',
'X', 1_000_000, 2_000_000 );
42 # get some attributes of the slice
44 my $start = $slice->start();
45 my $end = $slice->end();
47 # get the sequence from the slice
48 my $seq = $slice->seq();
50 # get some features from the slice
51 foreach $gene ( @{ $slice->get_all_Genes } ) {
52 # do something with a gene
55 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
56 # do something with dna-dna alignments
61 A Slice
object represents a region of a genome. It can be used to retrieve
62 sequence or features from an area of interest.
64 NOTE: The Slice is defined by its Strand, but normal behaviour
for get_all_*
65 methods is to
return Features on both Strands.
71 package Bio::EnsEMBL::Slice;
90 use Scalar::Util qw(weaken isweak);
94 my $registry =
"Bio::EnsEMBL::Registry";
96 @ISA = qw(Bio::PrimarySeqI);
101 Arg [...] : List of named arguments
103 string SEQ_REGION_NAME,
106 int SEQ_REGION_LENGTH, (optional)
107 string SEQ (optional)
108 int STRAND, (optional, defaults to 1)
114 -seq_region_name =>
'X',
115 -seq_region_length => 12e6,
116 -adaptor => $slice_adaptor);
117 Description: Creates a
new slice
object. A slice represents a region
118 of sequence in a particular coordinate system. Slices can be
119 used to retrieve sequence and features from an area of
120 interest in a genome.
122 Coordinates start at 1 and are inclusive. Negative
123 coordinates or coordinates exceeding the length of the
124 seq_region are permitted. Start must be less than or equal.
125 to end regardless of the strand.
127 Slice objects are immutable. Once instantiated their attributes
128 (with the exception of the adaptor) may not be altered. To
129 change the attributes a
new slice must be created.
131 Exceptions :
throws if start, end, coordsystem or seq_region_name not specified or not of the correct type
132 Caller : general, Bio::EnsEMBL::SliceAdaptor
140 #new can be called as a class or object method
141 my $class = ref($caller) || $caller;
143 my ($seq, $coord_system, $seq_region_name, $seq_region_length,
144 $start, $end, $strand, $adaptor, $empty) =
145 rearrange([qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
146 START END STRAND ADAPTOR EMPTY)], @_);
148 if ( !defined($seq_region_name) ) {
149 throw(
'SEQ_REGION_NAME argument is required');
151 if ( !defined($start) ) {
throw(
'START argument is required') }
152 if ( !defined($end) ) {
throw(
'END argument is required') }
154 ## if ( $start > $end + 1 ) {
155 ## throw('start must be less than or equal to end+1');
158 if ( !defined($seq_region_length) ) { $seq_region_length = $end }
160 if ( $seq_region_length <= 0 ) {
161 throw(
'SEQ_REGION_LENGTH must be > 0');
164 if ( defined($seq) && CORE::length($seq) != ( $end - $start + 1 ) ) {
165 throw(
'SEQ must be the same length as the defined LENGTH not '
168 . ( $end - $start + 1 ) );
171 if(defined($coord_system)) {
172 if(!ref($coord_system) || !$coord_system->isa(
'Bio::EnsEMBL::CoordSystem')){
173 throw(
'COORD_SYSTEM argument must be a Bio::EnsEMBL::CoordSystem');
175 if($coord_system->is_top_level()) {
176 throw(
'Cannot create slice on toplevel CoordSystem.');
179 warning(
"Slice without coordinate system");
184 if($strand != 1 && $strand != -1) {
185 throw(
'STRAND argument must be -1 or 1');
188 if(defined($adaptor)) {
189 if(!ref($adaptor) || !$adaptor->isa(
'Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
190 throw(
'ADAPTOR argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
194 my $self = bless {
'coord_system' => $coord_system,
196 'seq_region_name' => $seq_region_name,
197 'seq_region_length' => $seq_region_length,
198 'start' => int($start),
200 'strand' => $strand}, $class;
202 $self->adaptor($adaptor);
210 Arg [1] : hashref to be blessed
223 my $self = bless $hashref, $class;
224 weaken($self->{adaptor})
if ( ! isweak($self->{adaptor}) );
231 Example : $adaptor = $slice->adaptor();
232 Description: Getter/Setter
for the slice
object adaptor used
233 by
this slice
for database interaction.
235 Exceptions : thorws
if argument passed is not a SliceAdaptor
247 if(!ref($ad) || !$ad->isa(
'Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
248 throw(
'Argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
251 weaken($self->{
'adaptor'} = $ad);
254 return $self->{
'adaptor'};
259 =head2 seq_region_name
262 Example : $seq_region = $slice->seq_region_name();
263 Description: Returns the name of the seq_region that
this slice is on. For
264 example
if this slice is in chromosomal coordinates the
265 seq_region_name might be
'X' or
'10'.
267 This
function was formerly named chr_name, but since slices can
268 now be on coordinate systems other than chromosomal it has been
277 sub seq_region_name {
279 return $self->{
'seq_region_name'};
282 =head2 seq_region_start
284 Example : $seq_region_start = $slice->seq_region_start();
285 Description: Returns the start position of
this slice relative to the
286 start of the sequence region that it was created on.
287 Since slices are always in genomic coordinates
this is
296 sub seq_region_start {
298 return $self->start();
301 =head2 seq_region_end
303 Example : $seq_region_end = $slice->seq_region_end();
304 Description: Returns the end position of
this slice relative to the
305 start of the sequence region that it was created on.
306 Since slices are always in genomic coordinates
this is
320 =head2 seq_region_length
323 Example : $seq_region_length = $slice->seq_region_length();
324 Description: Returns the length of the entire seq_region that
this slice is
325 on. For example
if this slice is on a chromosome
this will be
326 the length (in basepairs) of the entire chromosome.
334 sub seq_region_length {
336 return $self->{
'seq_region_length'};
343 Example : print $slice->coord_system->name();
344 Description: Returns the coordinate system that
this slice is on.
354 return $self->{
'coord_system'};
359 Arg [1] : (optional) String $value
360 Example : print $slice->source();
361 Description: Returns the source
this slice is coming from
371 $self->{
'source'} = shift
if (@_);
372 return $self->{
'source'};
375 =head2 coord_system_name
378 Example : print $slice->coord_system_name()
379 Description: Convenience method. Gets the name of the coord_system which
381 Returns undef
if this Slice does not have an attached
383 Returntype:
string or undef
390 sub coord_system_name {
392 my $csystem = $self->{
'coord_system'};
393 return ($csystem) ? $csystem->
name() : undef;
400 Example : $cp = $slice->centrepoint();
401 Description: Returns the mid position of
this slice relative to the
402 start of the sequence region that it was created on.
403 Coordinates are inclusive and start at 1.
413 return ($self->{
'start'}+$self->{
'end'})/2;
419 Example : $start = $slice->start();
420 Description: Returns the start position of
this slice relative to the
421 start of the sequence region that it was created on.
422 Coordinates are inclusive and start at 1. Negative coordinates
423 or coordinates exceeding the length of the sequence region are
424 permitted. Start is always less than or equal to end
425 regardless of the orientation of the slice.
435 return $self->{
'start'};
443 Example : $end = $slice->end();
444 Description: Returns the end position of
this slice relative to the
445 start of the sequence region that it was created on.
446 Coordinates are inclusive and start at 1. Negative coordinates
447 or coordinates exceeding the length of the sequence region are
448 permitted. End is always greater than or equal to start
449 regardless of the orientation of the slice.
459 return $self->{
'end'};
467 Example : $strand = $slice->strand();
468 Description: Returns the orientation of
this slice on the seq_region it has
470 Returntype : int (either 1 or -1)
472 Caller : general, invert
479 return $self->{
'strand'};
489 Example : my $results = $cache{$slice->name()};
490 Description: Returns the name of
this slice. The name is formatted as a colon
491 delimited
string with the following attributes:
492 coord_system:version:seq_region_name:start:end:strand
494 Slices with the same name are equivalent and thus the name can
506 my $cs = $self->{
'coord_system'};
509 ($cs) ? $cs->name() :
'',
510 ($cs) ? $cs->version() :
'',
511 $self->{
'seq_region_name'},
522 Example : $length = $slice->length();
523 Description: Returns the length of
this slice in basepairs
534 my $length = $self->{
'end'} - $self->{
'start'} + 1;
536 if ( $self->{
'start'} > $self->{
'end'} && $self->is_circular() ) {
537 $length += $self->{
'seq_region_length'};
545 Example : my $reference = $slice->is_reference()
546 Description: Returns 1
if slice is a reference slice
else 0
556 if ( !defined( $self->{
'is_reference'} ) ) {
557 $self->{
'is_reference'} =
558 $self->adaptor()->is_reference( $self->get_seq_region_id() );
561 return $self->{
'is_reference'};
566 Example : my $top = $slice->is_toplevel()
567 Description: Returns 1
if slice is a toplevel slice
else 0
577 if ( !defined( $self->{
'toplevel'} ) ) {
578 $self->{
'toplevel'} =
579 $self->adaptor()->is_toplevel( $self->get_seq_region_id() );
582 return $self->{
'toplevel'};
587 Example : my $top = $slice->has_karyotype()
588 Description: Returns 1
if slice is part of the karyotype
else 0
598 if ( !defined( $self->{
'karyotype'} ) ) {
599 $self->{
'karyotype'} =
600 $self->adaptor()->has_karyotype( $self->get_seq_region_id() );
603 return $self->{
'karyotype'};
606 =head2 karyotype_rank
608 Example : my $rank = $slice->karyotype_rank()
609 Description: Returns the numeric ranking in the karyotype. Otherwise 0 is returned
618 if(! defined( $self->{karyotype_rank})) {
619 my $rank = $self->adaptor()->get_karyotype_rank($self->get_seq_region_id());
620 $self->{karyotype_rank} = $rank
if $rank;
622 return $self->{karyotype_rank} || 0;
627 Example : my $circ = $slice->is_circular()
628 Description: Returns 1
if slice is a circular slice
else 0
637 my $adaptor = $self->adaptor();
638 return 0
if ! defined $adaptor;
639 if (! exists $self->{
'circular'}) {
640 my $id = $adaptor->get_seq_region_id($self);
641 $self->{circular} = $adaptor->is_circular($id);
643 return $self->{circular};
649 Example : $inverted_slice = $slice->invert;
650 Description: Creates a copy of
this slice on the opposite strand and
662 # make a shallow copy of the slice via a hash copy and flip the strand
664 $s{
'strand'} = $self->{
'strand'} * -1;
666 # reverse compliment any attached sequence
667 reverse_comp(\$s{
'seq'})
if($s{
'seq'});
669 # bless and return the copy
670 return bless \%s, ref $self;
678 Example : print
"SEQUENCE = ", $slice->
seq();
679 Description: Returns the sequence of the region represented by
this
680 slice formatted as a
string.
681 Only available
for the
default coord_system
692 # special case for in-between (insert) coordinates
693 return '' if($self->start() == $self->end() + 1);
695 return $self->{
'seq'}
if($self->{
'seq'});
697 if($self->adaptor()) {
698 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
699 return ${$seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1)};
702 # no attached sequence, and no db, so just return Ns
703 return 'N' x $self->length();
710 Arg [1] :
int $startBasePair
711 relative to start of slice, which is 1.
712 Arg [2] :
int $endBasePair
713 relative to start of slice.
714 Arg [3] : (optional)
int $strand
715 The strand of the slice to obtain sequence from. Default
717 Description: returns
string of dna sequence
719 Exceptions : end should be at least as big as start
727 my ( $self, $start, $end, $strand ) = @_;
729 if ( $end+1 < $start ) {
730 throw(
"End coord + 1 is less than start coord");
733 # handle 'between' case for insertions
734 return '' if( $start == $end + 1);
736 $strand = 1 unless(defined $strand);
738 if ( $strand != -1 && $strand != 1 ) {
739 throw(
"Invalid strand [$strand] in call to Slice::subseq.");
743 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
744 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
748 ## check for gap at the beginning and pad it with Ns
750 $subseq =
"N" x (1 - $start);
753 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
754 ## check for gap at the end and pad it with Ns
755 if ($end > $self->length()) {
756 $subseq .=
"N" x ($end - $self->length());
758 reverse_comp(\$subseq)
if($strand == -1);
763 =head2 sub_Slice_Iterator
765 Arg[1] :
int The chunk size to request
766 Example : my $i = $slice->sub_Slice_Iterator(60000);
767 while($i->has_next()) { warn $i->next()->name(); }
768 Description : Returns an iterator which batches subslices of
this Slice
769 in the requested chunk size
776 sub sub_Slice_Iterator {
777 my ($self, $chunk_size) = @_;
778 throw "Need a chunk size to divide the slice by" if ! $chunk_size;
780 my $end = $self->length();
781 my $iterator_sub = sub {
782 while($here <= $end) {
783 my $there = $here + $chunk_size - 1;
784 $there = $end
if($there > $end);
785 my $slice = $self->sub_Slice($here, $there);
794 =head2 assembly_exception_type
796 Example : $self->assembly_exception_type();
797 Description : Returns the type of slice
this is. If it is reference then you
798 will get
'REF' back. Otherwise you will get the first
799 element from C<get_all_AssemblyExceptionFeatures()>. If no
800 assembly exception exists you will get an empty
string back.
808 sub assembly_exception_type {
811 if($self->is_reference()) {
815 my $assembly_exceptions = $self->get_all_AssemblyExceptionFeatures();
816 if(@{$assembly_exceptions}) {
817 $type = $assembly_exceptions->[0]->type();
825 Example : print ($slice->is_chromosome()) ?
'I am a chromosome' :
'Not one';
826 Description : A chromosome is a slice with a karyotype rank assigned to it.
827 Returntype : Boolean indicates
if the current
object is a chromosome
835 return $self->has_karyotype();
839 =head2 get_base_count
842 Example : $c_count = $slice->get_base_count->{
'c'};
843 Description: Retrieves a hashref containing the counts of each bases in the
844 sequence spanned by
this slice. The format of the hash is :
852 All bases which are not in the set [A,a,C,c,T,t,G,g] are
853 included in the
'n' count. The
'n' count could therefore be
854 inclusive of ambiguity codes such as
'y'.
855 The %gc is the ratio of GC to AT content as in:
856 total(GC)/total(ACTG) * 100
857 This
function is conservative in its memory
usage and scales to
858 work
for entire chromosomes.
878 my $len = $self->length();
882 while ( $start <= $len ) {
883 $end = $start + $RANGE - 1;
884 $end = $len
if ( $end > $len );
886 $seq = $self->subseq( $start, $end );
896 my $actg = $a + $c + $t + $g;
899 if ( $actg > 0 ) { # Avoid dividing by 0
900 $gc_content = sprintf(
"%1.2f", ( ( $g + $c )/$actg )*100 );
908 '%gc' => $gc_content };
915 Arg [1] :
string $name
916 The name of the coordinate system to project
this slice onto
917 Arg [2] :
string $version
918 The version of the coordinate system (such as
'NCBI34') to
919 project
this slice onto
921 my $clone_projection = $slice->project(
'clone');
923 foreach my $segment (@$clone_projection) {
924 my $clone = $segment->to_Slice();
925 print $slice->seq_region_name(),
':', $segment->from_start(),
'-',
926 $segment->from_end(),
' -> ',
927 $clone->seq_region_name(),
':', $clone->start(),
'-',$clone->end(),
928 ':', $clone->strand(),
"\n";
930 Description: Returns the results of
'projecting' this slice onto another
931 coordinate system. Projecting to a coordinate system that
932 the slice is assembled from is analagous to retrieving a tiling
933 path. This method may also be used to
'project up' to a higher
934 level coordinate system, however.
936 This method returns a listref of triplets [start,end,slice]
937 which represents the projection. The start and end defined the
938 region of
this slice which is made up of the third value of
939 the triplet: a slice in the requested coordinate system.
941 can also be used as [$start,$end,$slice] triplets
951 my $cs_version = shift;
953 throw(
'Coord_system name argument is required')
if(!$cs_name);
955 my $slice_adaptor = $self->adaptor();
957 if(!$slice_adaptor) {
958 warning(
"Cannot project without attached adaptor.");
962 if(!$self->coord_system()) {
963 warning(
"Cannot project without attached coord system.");
968 my $db = $slice_adaptor->db();
969 my $csa = $db->get_CoordSystemAdaptor();
970 my $cs = $csa->fetch_by_name($cs_name, $cs_version);
971 my $slice_cs = $self->coord_system();
974 throw(
"Cannot project to unknown coordinate system " .
975 "[$cs_name $cs_version]");
978 # no mapping is needed if the requested coord system is the one we are in
979 # but we do need to check if some of the slice is outside of defined regions
980 if($slice_cs->equals($cs)) {
981 return $self->_constrain_to_region();
986 # decompose this slice into its symlinked components.
987 # this allows us to handle haplotypes and PARs
988 my $normal_slice_proj =
989 $slice_adaptor->fetch_normalized_slice_projection($self);
990 foreach my $segment (@$normal_slice_proj) {
991 my $normal_slice = $segment->[2];
993 $slice_cs = $normal_slice->coord_system();
995 my $asma = $db->get_AssemblyMapperAdaptor();
996 my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
997 next unless defined $asm_mapper;
999 # perform the mapping between this slice and the requested system
1000 my @coords = $asm_mapper->map($normal_slice->seq_region_name(),
1001 $normal_slice->start(),
1002 $normal_slice->end(),
1003 $normal_slice->strand(),
1004 $slice_cs, undef, undef, 1);
1006 # construct a projection from the mapping results and return it
1007 foreach my $coord (@coords) {
1008 my $original = $coord->{original};
1009 my $mapped = $coord->{mapped};
1012 next unless $mapped->isa(
'Bio::EnsEMBL::Mapper::Coordinate');
1014 my $mapped_start = $mapped->start();
1015 my $mapped_end = $mapped->end();
1016 my ($current_start, $current_end);
1017 if ($self->strand == 1) {
1018 $current_start = $original->start() - $self->start + 1;
1019 $current_end = $original->end() - $self->start + 1;
1021 $current_start = $self->end() - $original->end + 1;
1022 $current_end = $self->end - $original->start + 1;
1025 my $coord_cs = $mapped->coord_system();
1027 # If the normalised projection just ended up mapping to the
1028 # same coordinate system we were already in then we should just
1029 # return the original region. This can happen for example, if we
1030 # were on a PAR region on Y which refered to X and a projection to
1031 # 'toplevel' was requested.
1032 if($coord_cs->equals($slice_cs)) {
1033 # trim off regions which are not defined
1034 return $self->_constrain_to_region();
1036 #create slices for the mapped-to coord system
1037 my $slice = $slice_adaptor->fetch_by_seq_region_id($mapped->id(),
1042 if ($current_end > $slice->seq_region_length() && $slice->is_circular ) {
1043 $current_end -= $slice->seq_region_length();
1046 push @projection, bless([$current_start, $current_end, $slice],
1047 "Bio::EnsEMBL::ProjectionSegment");
1052 return \@projection;
1056 sub _constrain_to_region {
1059 my $entire_len = $self->seq_region_length();
1061 #if the slice has negative coordinates or coordinates exceeding the
1062 #exceeding length of the sequence region we want to shrink the slice to
1065 if($self->{
'start'} > $entire_len || $self->{
'end'} < 1) {
1066 #none of this slice is in a defined region
1070 my $right_contract = 0;
1071 my $left_contract = 0;
1072 if($self->{
'end'} > $entire_len) {
1073 $right_contract = $entire_len - $self->{
'end'};
1075 if($self->{
'start'} < 1) {
1076 $left_contract = $self->{
'start'} - 1;
1080 if($left_contract || $right_contract) {
1081 #if slice in negative strand, need to swap contracts
1082 if ($self->strand == 1) {
1083 $new_slice = $self->expand($left_contract, $right_contract);
1085 elsif ($self->strand == -1) {
1086 $new_slice = $self->expand($right_contract, $left_contract);
1092 return [bless [1-$left_contract, $self->length()+$right_contract,
1093 $new_slice],
"Bio::EnsEMBL::ProjectionSegment" ];
1099 Arg [1] : (optional)
int $five_prime_expand
1100 The number of basepairs to shift
this slices five_prime
1101 coordinate by. Positive values make the slice larger,
1102 negative make the slice smaller.
1105 Arg [2] : (optional)
int $three_prime_expand
1106 The number of basepairs to shift
this slices three_prime
1107 coordinate by. Positive values make the slice larger,
1108 negative make the slice smaller.
1110 Arg [3] : (optional)
bool $force_expand
1111 if set to 1, then the slice will be contracted even in the
case
1112 when shifts $five_prime_expand and $three_prime_expand overlap.
1113 In that
case $five_prime_expand and $three_prime_expand will be set
1114 to a maximum possible number and that will result in the slice
1115 which would have only 2pbs.
1117 Arg [4] : (optional)
int* $fpref
1118 The reference to a number of basepairs to shift
this slices five_prime
1119 coordinate by. Normally it would be set to $five_prime_expand.
1120 But in
case when $five_prime_expand shift can not be applied and
1121 $force_expand is set to 1, then $$fpref will contain the maximum possible
1123 Arg [5] : (optional)
int* $tpref
1124 The reference to a number of basepairs to shift
this slices three_prime
1125 coordinate by. Normally it would be set to $three_prime_expand.
1126 But in
case when $five_prime_expand shift can not be applied and
1127 $force_expand is set to 1, then $$tpref will contain the maximum possible
1129 Example : my $expanded_slice = $slice->expand( 1000, 1000);
1130 my $contracted_slice = $slice->expand(-1000,-1000);
1131 my $shifted_right_slice = $slice->expand(-1000, 1000);
1132 my $shifted_left_slice = $slice->expand( 1000,-1000);
1133 my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift);
1135 Description: Returns a slice which is a resized copy of
this slice. The
1136 start and end are moved outwards from the center of the slice
1137 if positive values are provided and moved inwards
if negative
1138 values are provided. This slice remains unchanged. A slice
1139 may not be contracted below 1bp but may grow to be arbitrarily
1142 Exceptions : warning
if an attempt is made to contract the slice below 1bp
1150 my $five_prime_shift = shift || 0;
1151 my $three_prime_shift = shift || 0;
1152 my $force_expand = shift || 0;
1156 if ( $self->{
'seq'} ) {
1158 "Cannot expand a slice which has a manually attached sequence ");
1162 my $sshift = $five_prime_shift;
1163 my $eshift = $three_prime_shift;
1165 if ($sshift == 0 && $eshift == 0) {
return $self; }
1167 if ( $self->{
'strand'} != 1 ) {
1168 $eshift = $five_prime_shift;
1169 $sshift = $three_prime_shift;
1172 my $new_start = $self->{
'start'} - $sshift;
1173 my $new_end = $self->{
'end'} + $eshift;
1175 # Wrap around on circular slices
1176 if (( $new_start <= 0 || $new_start > $self->seq_region_length() || $new_end <= 0
1177 || $new_end > $self->seq_region_length() ) && ( $self->is_circular() ) ) {
1179 if ( $new_start <= 0 ) {
1180 $new_start = $self->seq_region_length() + $new_start;
1182 if ( $new_start > $self->seq_region_length() ) {
1183 $new_start -= $self->seq_region_length();
1186 if ( $new_end <= 0 ) {
1187 $new_end = $self->seq_region_length() + $new_end;
1189 if ( $new_end > $self->seq_region_length() ) {
1190 $new_end -= $self->seq_region_length();
1195 if ( $new_start > $new_end && (not $self->is_circular() ) ) {
1197 if ($force_expand) {
1198 # Apply max possible shift, if force_expand is set
1199 if ( $sshift < 0 ) {
1200 # if we are contracting the slice from the start - move the
1201 # start just before the end
1202 $new_start = $new_end - 1;
1203 $sshift = $self->{start} - $new_start;
1206 if ( $new_start > $new_end ) {
1207 # if the slice still has a negative length - try to move the
1209 if ( $eshift < 0 ) {
1210 $new_end = $new_start + 1;
1211 $eshift = $new_end - $self->{end};
1214 # return the values by which the primes were actually shifted
1215 $$tpref = $self->{strand} == 1 ? $eshift : $sshift;
1216 $$fpref = $self->{strand} == 1 ? $sshift : $eshift;
1218 if ( $new_start > $new_end ) {
1219 throw(
'Slice start cannot be greater than slice end');
1223 #fastest way to copy a slice is to do a shallow hash copy
1224 my %new_slice = %$self;
1225 $new_slice{
'start'} = int($new_start);
1226 $new_slice{
'end'} = int($new_end);
1228 return bless \%new_slice, ref($self);
1231 =head2 constrain_to_seq_region
1232 Example : $new_slice = $slice->expand(1000,10000);
1233 $new_slice = $new_slice->constrain_to_seq_region();
1234 Description: Used to prevent overly zealous expand calls going off the end of
1235 the sequence region. It contracts the start and end where needed
1236 and produces a slice copy with the tweaked coordinates.
1240 sub constrain_to_seq_region {
1242 # circular calculations should already be taken care of
1243 if ($self->is_circular) {
return $self}
1244 my $new_start = $self->
start;
1245 my $new_end = $self->end;
1247 my $seq_region = $self->seq_region_Slice;
1249 if ($new_start < $seq_region->start) {$new_start = $seq_region->start}
1250 if ($new_end > $seq_region->end) {$new_end = $seq_region->end}
1252 my %new_slice = %$self;
1253 $new_slice{
'start'} = $new_start;
1254 $new_slice{
'end'} = $new_end;
1256 return bless \%new_slice, ref($self);
1262 Arg 1 :
int $start, refers to the start of the subslice relative to the input slice
1263 Arg 2 :
int $end, refers to the end of the subslice relative to the input slice
1264 Arge [3] :
int $strand
1266 Description: Makes another Slice that covers only part of
this Slice
1267 If a Slice is requested which lies outside of the boundaries
1268 of
this function will
return undef. This means that
1269 behaviour will be consistant whether or not the slice is
1270 attached to the database (i.e.
if there is attached sequence
1271 to the slice). Alternatively the expand() method or the
1272 SliceAdaptor::fetch_by_region method can be used instead.
1273 Returntype :
Bio::
EnsEMBL::Slice or undef if arguments are wrong
1281 my ( $self, $start, $end, $strand ) = @_;
1283 my $length = $self->length();
1285 if( $start < 1 || $start > $length ) {
1286 # throw( "start argument not valid" );
1290 if( $end < $start || $end > $length ) {
1291 # throw( "end argument not valid" )
1295 my ( $new_start, $new_end, $new_strand, $new_seq );
1296 if( ! defined $strand ) {
1300 if( $self->{
'strand'} == 1 ) {
1301 $new_start = $self->{
'start'} + $start - 1;
1302 $new_end = $self->{
'start'} + $end - 1;
1303 $new_strand = $strand;
1305 $new_start = $self->{
'end'} - $end + 1;;
1306 $new_end = $self->{
'end'} - $start + 1;
1307 $new_strand = -$strand;
1310 if( defined $self->{
'seq'} ) {
1311 $new_seq = $self->subseq( $start, $end, $strand );
1314 #fastest way to copy a slice is to do a shallow hash copy
1315 my $new_slice = {%{$self}};
1316 $new_slice->{
'start'} = int($new_start);
1317 $new_slice->{
'end'} = int($new_end);
1318 $new_slice->{
'strand'} = $new_strand;
1320 $new_slice->{
'seq'} = $new_seq;
1322 weaken($new_slice->{adaptor});
1324 return bless $new_slice, ref($self);
1329 =head2 seq_region_Slice
1332 Example : $slice = $slice->seq_region_Slice();
1333 Description: Returns a slice which spans the whole seq_region which
this slice
1334 is on. For example
if this is a slice which spans a small region
1335 of chromosome X,
this method will
return a slice which covers the
1336 entire chromosome X. The returned slice will always have strand
1337 of 1 and start of 1. This method cannot be used
if the sequence
1338 of the slice has been set manually.
1340 Exceptions : warning
if called when sequence of Slice has been set manually.
1346 sub seq_region_Slice {
1350 warning(
"Cannot get a seq_region_Slice of a slice which has manually ".
1351 "attached sequence ");
1355 # quick shallow copy
1357 %{$slice} = %{$self};
1358 bless $slice, ref($self);
1359 weaken($slice->{adaptor});
1361 $slice->{
'start'} = 1;
1362 $slice->{
'end'} = $slice->{
'seq_region_length'};
1363 $slice->{
'strand'} = 1;
1372 Example : my $seq_region_id = $slice->get_seq_region_id();
1373 Description: Gets the
internal identifier of the seq_region that
this slice
1374 is on. Note that
this function will not work correctly
if this
1375 slice does not have an attached adaptor. Also note that it may
1377 method
if you are working with multiple databases since is
1378 possible to work with slices from databases with different
1379 internal seq_region identifiers.
1380 Returntype :
int or undef
if slices does not have attached adaptor
1381 Exceptions : warning
if slice is not associated with a SliceAdaptor
1382 Caller : assembly loading scripts, general
1389 if($self->adaptor) {
1390 return $self->adaptor->get_seq_region_id($self);
1393 warning(
'Cannot retrieve seq_region_id without attached adaptor.');
1398 =head2 get_genome_component
1401 Example : my $genome_component = $slice->get_genome_component();
1402 Description: Returns the genome component of the slice
1403 Returntype : Scalar; the identifier of the genome component of the slice
1410 sub get_genome_component {
1412 return $self->adaptor->get_genome_component_for_slice($self);
1415 =head2 get_all_Attributes
1417 Arg [1] : optional
string $attrib_code
1418 The code of the attribute type to retrieve values
for.
1419 Example : ($htg_phase) = @{$slice->get_all_Attributes(
'htg_phase')};
1420 @slice_attributes = @{$slice->get_all_Attributes()};
1421 Description: Gets a list of Attributes of
this slice
''s seq_region.
1422 Optionally just get Attrubutes
for given code.
1424 Exceptions : warning
if slice does not have attached adaptor
1430 sub get_all_Attributes {
1431 my ($self, $attrib_code) = @_;
1432 if(my $adaptor = $self->_get_CoreAdaptor(
'Attribute')) {
1433 my $attribs = $adaptor->fetch_all_by_Slice($self);
1434 if(defined $attrib_code) {
1435 my $uc_attrib = uc($attrib_code);
1436 return [ grep { uc($_->code()) eq $uc_attrib } @{$attribs}];
1443 =head2 get_all_PredictionTranscripts
1445 Arg [1] : (optional)
string $logic_name
1446 The name of the analysis used to generate the prediction
1447 transcripts obtained.
1448 Arg [2] : (optional)
boolean $load_exons
1449 If set to
true will force loading of all PredictionExons
1450 immediately rather than loading them on demand later. This
1451 is faster
if there are a large number of PredictionTranscripts
1452 and the exons will be used.
1453 Example : @transcripts = @{$slice->get_all_PredictionTranscripts};
1454 Description: Retrieves the list of prediction transcripts which overlap
1455 this slice with logic_name $logic_name. If logic_name is
1456 not defined then all prediction transcripts are retrieved.
1458 Exceptions : warning
if slice does not have attached adaptor
1464 sub get_all_PredictionTranscripts {
1465 my ($self,$logic_name, $load_exons) = @_;
1467 if(!$self->adaptor()) {
1468 warning(
'Cannot get PredictionTranscripts without attached adaptor');
1471 my $pta = $self->adaptor()->db()->get_PredictionTranscriptAdaptor();
1472 return $pta->fetch_all_by_Slice($self, $logic_name, $load_exons);
1475 =head2 get_all_DnaAlignFeatures
1477 Arg [1] : (optional)
string $logic_name
1478 The name of the analysis performed on the dna align features
1480 Arg [2] : (optional)
float $score
1481 The mimimum score of the features to retrieve
1482 Arg [3] : (optional)
string $dbtype
1483 The name of an attached database to retrieve the features from
1484 instead, e.g.
'otherfeatures'.
1485 Arg [4] : (optional)
float hcoverage
1486 The minimum hcoverage od the featurs to retrieve
1487 Example : @dna_dna_align_feats = @{$slice->get_all_DnaAlignFeatures};
1488 Description: Retrieves the DnaDnaAlignFeatures which overlap
this slice with
1489 logic name $logic_name and with score above $score. If
1490 $logic_name is not defined features of all logic names are
1491 retrieved. If $score is not defined features of all scores are
1492 retrieved. Strand of the Slice is not considered.
1493 Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
1494 Exceptions : warning
if slice does not have attached adaptor
1500 sub get_all_DnaAlignFeatures {
1501 my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
1502 return $self->_get_AlignFeatures($logic_name, $score, $dbtype, $hcoverage,
'DnaAlignFeature');
1505 =head2 get_all_ProteinAlignFeatures
1507 Arg [1] : (optional)
string $logic_name
1508 The name of the analysis performed on the protein align features
1510 Arg [2] : (optional)
float $score
1511 The mimimum score of the features to retrieve
1512 Arg [3] : (optional)
string $dbtype
1513 The name of an attached database to retrieve features from
1515 Arg [4] : (optional)
float hcoverage
1516 The minimum hcoverage od the featurs to retrieve
1517 Example : @dna_pep_align_feats = @{$slice->get_all_ProteinAlignFeatures};
1518 Description: Retrieves the DnaPepAlignFeatures which overlap
this slice with
1519 logic name $logic_name and with score above $score. If
1520 $logic_name is not defined features of all logic names are
1521 retrieved. If $score is not defined features of all scores are
1523 Returntype : listref of Bio::EnsEMBL::DnaPepAlignFeatures
1524 Exceptions : warning
if slice does not have attached adaptor
1530 sub get_all_ProteinAlignFeatures {
1531 my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
1532 return $self->_get_AlignFeatures($logic_name, $score, $dbtype, $hcoverage,
'ProteinAlignFeature');
1535 =head2 _get_AlignFeatures
1537 Arg [1] : (optional)
string $logic_name
1538 The name of the analysis performed on the protein align features
1540 Arg [2] : (optional)
float $score
1541 The mimimum score of the features to retrieve
1542 Arg [3] : (optional)
string $dbtype
1543 The name of an attached database to retrieve features from
1545 Arg [4] : (optional)
float hcoverage
1546 The minimum hcoverage od the featurs to retrieve
1547 Arg [5] :
string $align_type
1548 The type of adaptor to retrieve alignments from. Must be an BaseAlignFeature derived
1550 Description: Generic method which deals with the retrieval of either AlignFeature adaptor and allows
1551 you to
switch the adaptor values are retrieved from.
1555 sub _get_AlignFeatures {
1556 my ($self, $logic_name, $score, $dbtype, $hcoverage, $align_type) = @_;
1557 throw "No align_type given" unless $align_type;
1558 if(my $adaptor = $self->_get_CoreAdaptor($align_type, $dbtype)) {
1559 if(defined($score) and defined ($hcoverage)){
1560 warning
"Cannot specify score and hcoverage when querying for $align_type. Using score only";
1562 if(defined($score)){
1563 return $adaptor->fetch_all_by_Slice_and_score($self,$score, $logic_name);
1565 return $adaptor->fetch_all_by_Slice_and_hcoverage($self, $hcoverage, $logic_name);
1570 =head2 get_all_SimilarityFeatures
1572 Arg [1] : (optional)
string $logic_name
1573 the name of the analysis performed on the features to retrieve
1574 Arg [2] : (optional)
float $score
1575 the lower bound of the score of the features to be retrieved
1576 Example : @feats = @{$slice->get_all_SimilarityFeatures};
1577 Description: Retrieves all dna_align_features and protein_align_features
1578 with analysis named $logic_name and with score above $score.
1579 It is probably faster to use get_all_ProteinAlignFeatures or
1580 get_all_DnaAlignFeatures
if a sepcific feature type is desired.
1581 If $logic_name is not defined features of all logic names are
1582 retrieved. If $score is not defined features of all scores are
1584 Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures
1585 Exceptions : warning
if slice does not have attached adaptor
1591 sub get_all_SimilarityFeatures {
1592 my ($self, $logic_name, $score) = @_;
1596 push @out, @{$self->get_all_ProteinAlignFeatures($logic_name, $score) };
1597 push @out, @{$self->get_all_DnaAlignFeatures($logic_name, $score) };
1602 =head2 get_all_SimpleFeatures
1604 Arg [1] : (optional)
string $logic_name
1605 The name of the analysis performed on the simple features
1607 Arg [2] : (optional)
float $score
1608 The mimimum score of the features to retrieve
1609 Example : @simple_feats = @{$slice->get_all_SimpleFeatures};
1610 Description: Retrieves the SimpleFeatures which overlap
this slice with
1611 logic name $logic_name and with score above $score. If
1612 $logic_name is not defined features of all logic names are
1613 retrieved. If $score is not defined features of all scores are
1615 Returntype : listref of Bio::EnsEMBL::SimpleFeatures
1616 Exceptions : warning
if slice does not have attached adaptor
1622 sub get_all_SimpleFeatures {
1623 my ($self, $logic_name, $score, $dbtype) = @_;
1624 if(my $adaptor = $self->_get_CoreAdaptor(
'SimpleFeature', $dbtype)) {
1625 return $adaptor->fetch_all_by_Slice_and_score($self, $score, $logic_name);
1630 =head2 get_all_RepeatFeatures
1632 Arg [1] : (optional)
string $logic_name
1633 The name of the analysis performed on the repeat features
1635 Arg [2] : (optional)
string/array $repeat_type
1636 Limits features returned to those of the specified
1637 repeat_type. Can specify a single value or an array reference
1638 to limit by more than one
1639 Arg [3] : (optional)
string $db
1640 Key
for database e.g. core/vega/cdna/....
1641 Example : @repeat_feats = @{$slice->get_all_RepeatFeatures(undef,
'Type II Transposons')};
1642 Description: Retrieves the RepeatFeatures which overlap with
1643 logic name $logic_name and with score above $score. If
1644 $logic_name is not defined features of all logic names are
1646 Returntype : listref of Bio::EnsEMBL::RepeatFeatures
1647 Exceptions : warning
if slice does not have attached adaptor
1653 sub get_all_RepeatFeatures {
1654 my ($self, $logic_name, $repeat_type, $dbtype) = @_;
1655 if(my $adaptor = $self->_get_CoreAdaptor(
'RepeatFeature', $dbtype)) {
1656 return $adaptor->fetch_all_by_Slice($self, $logic_name, $repeat_type);
1661 =head2 get_all_LD_values
1663 Arg [1] : (optional) Bio::EnsEMBL::Variation::Population $population
1664 Description : returns all LD values on
this slice. This
function will only work correctly
if the variation
1665 database has been attached to the core database. If the argument is passed, will
return the LD information
1667 ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer
1669 Caller : contigview, snpview
1674 sub get_all_LD_values {
1676 my $population = shift;
1678 my $ld_adaptor = $self->_get_VariationAdaptor(
'LDFeatureContainer');
1680 return $ld_adaptor->fetch_by_Slice($self,$population);
1685 =head2 _get_VariationFeatureAdaptor
1687 Shortcut method here because VariationFeature is an often requested
1692 sub _get_VariationFeatureAdaptor {
1693 my ($self, $dbtype) = @_;
1694 return $self->_get_VariationAdaptor(
'VariationFeature', $dbtype);
1697 =head2 _get_StructuralVariationFeatureAdaptor
1699 Shortcut method here because StructuralVariationFeature is an often requested
1704 sub _get_StructuralVariationFeatureAdaptor {
1705 my ($self, $dbtype) = @_;
1706 return $self->_get_VariationAdaptor(
'StructuralVariationFeature', $dbtype);
1709 =head2 _get_VariationAdaptor
1711 Arg [1] : String object_type to retrieve an adaptor
for
1712 Arg [2] : String dbtype to search
for the given adaptor in. Defaults to variation
1713 Description : Searches
for the specified adaptor in the Registry and returns it. Otherwise
1714 it will
return nothing
if the adaptor was not found
1720 sub _get_VariationAdaptor {
1721 my ($self, $object_type, $dbtype) = @_;
1722 # very important to do this defaulting since we *have* to assume the variation
1723 # database is called 'variation'. Using the current group will not work because
1724 # that will be something like 'core' (most likely), 'ensembl' or 'vega'.
1725 $dbtype ||=
'variation';
1727 # Also we do not care about Registry->get_db() for variation DBs
1728 my $do_not_check_db = 1;
1730 return $self->_get_Adaptor($object_type, $dbtype, $do_not_check_db);
1733 =head2 _get_CoreAdaptor
1735 Arg [1] : String object_type to retrieve an adaptor
for
1736 Arg [2] : String dbtype to search
for the given adaptor in. Defaults to core
1737 Description : Searches
for the specified adaptor in the Registry and returns it. Otherwise
1738 it will
return nothing
if the adaptor was not found
1744 sub _get_CoreAdaptor {
1745 my ($self, $object_type, $dbtype) = @_;
1746 #Simple pass through
1747 return $self->_get_Adaptor($object_type, $dbtype);
1752 Arg [1] : String object_type to retrieve an adaptor
for
1753 Arg [2] : String dbtype to search
for the given adaptor in
1754 Arg [3] : Boolean Turn off the checking of Registry->get_db()
for your
1756 Description : Searches
for the specified adaptor in the Registry and returns it. Otherwise
1757 it will
return nothing
if the adaptor was not found. We consult the
1759 fall back to the normal methods of finding an adaptor.
1761 This method will warn when adaptors are missing but will never through an
1762 exception. It is up to the calling code to decide how to handle the unavailablity
1764 ReturnType :
Bio::
EnsEMBL::DBSQL::BaseAdaptor derrived instance. Otherwise it returns nothing
1770 my ($self, $object_type, $dbtype, $do_not_check_db) = @_;
1773 warning(
'Object type is a required parameter');
1777 my $adaptor = $self->adaptor();
1780 warning(
"Cannot get a ${object_type} adaptor without a SliceAdaptor attached to this instance of ".ref($self));
1784 my $local_db = $adaptor->db();
1785 my $species = $local_db->species();
1787 #First we query for the DBAdaptor using get_db(). This is a deprecated method
1788 #call but "special" adaptors can be registered via this method. We must
1789 #consult here 1st to find the possible special adaptor
1790 if(!$do_not_check_db && $dbtype) {
1791 my $db = $registry->get_db($local_db, $dbtype);
1793 # If we got a return then use this DBAdaptor's species name, group and the given object type.
1794 # Special adaptors can have different species names
1795 $adaptor = $registry->get_adaptor($db->species(), $db->group(), $object_type);
1798 #Otherwise just use the current species, dbtype and object type
1799 $adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
1802 # Otherwise our query group is the one attached to the current adaptor
1804 #If not set use the group attached to the local adaptor
1805 $dbtype ||= $local_db->group();
1806 $adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
1808 return $adaptor
if $adaptor;
1810 warning(
"No adaptor could be found for the species ${species}, database type ${dbtype} and object type ${object_type}");
1814 =head2 get_all_VariationFeatures
1816 Args [1] : (optional) ArrayRef $so_terms
1817 SequenceOntology terms to limit the fetch to
1818 Args [2] : (optional)
boolean $without_children
1819 Do not query
using the children of the given SO terms
1820 i.e. query
using the given terms directly
1821 Args [3] : (optional) ArrayRef $included_so
1822 ArrayRef of SequenceOntology which should be queried
for
1823 without children. This argument allows you to combine SO terms with children
1824 from argument 1 with extra non-child SO terms. e.g. you wish to query
for
1825 all protein_altering_variant (specified in argument 1) variations which
1826 would be defined by child SO terms but also wanted stop_retained_variant linked variations
1827 defined by
this argument
1828 Args [4] : (optional)
string $dbtype
1829 The dbtype of variation to obtain (i.e. can be different from the
"variation" type).
1830 This assumes that the extra db has been added to the DBAdaptor under
this name (
using the
1831 DBConnection::add_db_adaptor method).
1832 Description : Returns all germline variation features on
this slice. This
function will
1833 only work correctly
if the variation database has been attached to the core
1835 If $so_terms is specified, only variation features with a consequence type
1836 that matches or is an ontological child of any of the supplied terms will
1838 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1840 Caller : contigview, snpview
1845 sub get_all_VariationFeatures{
1846 my ($self, $so_terms, $without_children, $included_so, $dbtype) = @_;
1847 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1848 return $vf_adaptor->fetch_all_by_Slice_SO_terms($self, $so_terms, $without_children, $included_so);
1853 =head2 get_all_somatic_VariationFeatures
1854 Args [1] : (optional) ArrayRef $so_terms
1855 SequenceOntology terms to limit the fetch to
1856 Args [2] : (optional)
boolean $without_children
1857 Do not query
using the children of the given SO terms
1858 i.e. query
using the given terms directly
1859 Args [3] : (optional) ArrayRef $included_so
1860 ArrayRef of SequenceOntology which should be queried
for
1861 without children. This argument allows you to combine SO terms with children
1862 from argument 1 with extra non-child SO terms. e.g. you wish to query
for
1863 all protein_altering_variant (specified in argument 1) variations which
1864 would be defined by child SO terms but also wanted stop_retained_variant linked variations
1865 defined by
this argument
1866 Args [4] : (optional)
string $dbtype
1867 The dbtype of variation to obtain (i.e. can be different from the
"variation" type).
1868 This assumes that the extra db has been added to the DBAdaptor under
this name (
using the
1869 DBConnection::add_db_adaptor method).
1870 Description : Returns all somatic variation features on
this slice. This
function will only
1871 work correctly
if the variation database has been attached to the core database.
1872 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1878 sub get_all_somatic_VariationFeatures {
1879 my ($self, $so_terms, $without_children, $included_so, $dbtype) = @_;
1880 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1881 return $vf_adaptor->fetch_all_somatic_by_Slice_SO_terms($self, $so_terms, $without_children, $included_so);
1886 =head2 get_all_somatic_VariationFeatures_by_source
1887 Arg [1] :
string $source [optional]
1888 The name of the source to query
for
1889 Arg [2] :
string $dbtype [optional]
1890 The dbtype of variation to obtain (i.e. can be different from the
"variation" type).
1891 This assumes that the extra db has been added to the DBAdaptor under
this name (
using the
1892 DBConnection::add_db_adaptor method).
1893 Description : Returns all somatic variation features, from a defined source name (e.g.
'COSMIC'),
1894 on
this slice. This
function will only work correctly
if the variation database
1895 has been attached to the core database.
1896 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1902 sub get_all_somatic_VariationFeatures_by_source {
1903 my ($self, $source, $dbtype) = @_;
1904 my $constraint = (defined($source)) ?
" s.name='$source' " : undef;
1905 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1906 return $vf_adaptor->fetch_all_somatic_by_Slice_constraint($self, $constraint);
1911 =head2 get_all_somatic_VariationFeatures_with_phenotype
1912 Arg [1] : $variation_feature_source [optional]
1913 Arg [2] : $phenotype_source [optional]
1914 Arg [3] : $phenotype_name [optional]
1915 Arg [4] :
string $dbtype [optional]
1916 The dbtype of variation to obtain (i.e. can be different from the
"variation" type).
1917 This assumes that the extra db has been added to the DBAdaptor under
this name (
using the
1918 DBConnection::add_db_adaptor method).
1919 Description : returns all somatic variation features on
this slice associated with a phenotype.
1920 (see get_all_VariationFeatures_with_phenotype
for further documentation)
1921 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1927 sub get_all_somatic_VariationFeatures_with_phenotype {
1928 my ($self, $source, $p_source, $phenotype, $dbtype) = @_;
1929 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1930 return $vf_adaptor->fetch_all_somatic_with_phenotype_by_Slice($self, $source, $p_source, $phenotype);
1936 =head2 get_all_VariationFeatures_by_Population
1937 Arg [1] : Bio::EnsEMBL::Variation::Population
1938 Arg [2] : $minimum_frequency [optional]
1939 Arg [3] :
string $dbtype [optional]
1940 The dbtype of variation to obtain (i.e. can be different from the
"variation" type).
1941 This assumes that the extra db has been added to the DBAdaptor under
this name (
using the
1942 DBConnection::add_db_adaptor method).
1943 Example : $pop = $pop_adaptor->fetch_by_dbID(659);
1944 @vfs = @{$slice->get_all_VariationFeatures_by_Population($pop,$slice)};
1945 Description : Retrieves all variation features in a slice which are stored
for
1946 a specified population. If $minimum_frequency is supplied, only
1947 variations with a minor allele frequency (MAF) greater than
1948 $minimum_frequency will be returned.
1949 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeature
1950 Exceptions :
throw on incorrect argument
1956 sub get_all_VariationFeatures_by_Population {
1957 my ($self, $minimum_frequency, $dbtype) = @_;
1958 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1959 return $vf_adaptor->fetch_all_by_Slice_Population($self, $minimum_frequency);
1964 =head2 get_all_Genes
1966 Arg [1] : (optional)
string $logic_name
1967 The name of the analysis used to generate the genes to retrieve
1968 Arg [2] : (optional)
string $dbtype
1969 The dbtype of genes to obtain. This assumes that the db has
1970 been added to the DBAdaptor under
this name (
using the
1971 DBConnection::add_db_adaptor method).
1972 Arg [3] : (optional)
boolean $load_transcripts
1973 If set to
true, transcripts will be loaded immediately rather
1974 than being lazy-loaded on request. This will result in a
1975 significant speed up
if the Transcripts and Exons are going to
1976 be used (but a slow down
if they are not).
1977 Arg [4] : (optional)
string $source
1978 The source of the genes to retrieve.
1979 Arg [5] : (optional)
string $biotype
1980 The biotype of the genes to retrieve.
1981 Example : @genes = @{$slice->get_all_Genes};
1982 Description: Retrieves all genes that overlap
this slice, including those on
1984 Returntype : listref of Bio::EnsEMBL::Genes
1992 my ($self, $logic_name, $dbtype, $load_transcripts, $source, $biotype) = @_;
1993 if(my $adaptor = $self->_get_CoreAdaptor(
'Gene', $dbtype)) {
1994 return $adaptor->fetch_all_by_Slice( $self, $logic_name, $load_transcripts, $source, $biotype);
1999 =head2 get_all_Genes_by_type
2001 Arg [1] :
string $type
2002 The biotype of genes wanted.
2003 Arg [2] : (optional)
string $logic_name
2004 Arg [3] : (optional)
boolean $load_transcripts
2005 If set to
true, transcripts will be loaded immediately rather
2006 than being lazy-loaded on request. This will result in a
2007 significant speed up
if the Transcripts and Exons are going to
2008 be used (but a slow down
if they are not).
2009 Example : @genes = @{$slice->get_all_Genes_by_type(
'protein_coding',
2011 Description: Retrieves genes that overlap
this slice of biotype $type.
2012 This is primarily used by the genebuilding code when several
2013 biotypes of genes are used.
2015 The logic name is the analysis of the genes that are retrieved.
2016 If not provided all genes will be retrieved instead. Both
2017 positive and negative strand Genes will be returned.
2019 Returntype : listref of Bio::EnsEMBL::Genes
2021 Caller : genebuilder, general
2026 sub get_all_Genes_by_type {
2027 my ($self, $type, $logic_name, $load_transcripts) = @_;
2028 return $self->get_all_Genes($logic_name, undef, $load_transcripts, undef, $type);
2032 =head2 get_all_Genes_by_source
2034 Arg [1] :
string source
2035 Arg [2] : (optional)
boolean $load_transcripts
2036 If set to
true, transcripts will be loaded immediately rather
2037 than being lazy-loaded on request. This will result in a
2038 significant speed up
if the Transcripts and Exons are going to
2039 be used (but a slow down
if they are not).
2040 Example : @genes = @{$slice->get_all_Genes_by_source(
'ensembl')};
2041 Description: Retrieves genes that overlap
this slice of source $source.
2042 Strand of the Slice does not affect the result.
2043 Returntype : listref of Bio::EnsEMBL::Genes
2050 sub get_all_Genes_by_source {
2051 my ($self, $source, $load_transcripts) = @_;
2052 return $self->get_all_Genes(undef, undef, $load_transcripts, $source);
2055 =head2 get_all_Transcripts
2057 Arg [1] : (optional)
boolean $load_exons
2058 If set to
true exons will not be lazy-loaded but will instead
2059 be loaded right away. This is faster
if the exons are
2060 actually going to be used right away.
2061 Arg [2] : (optional)
string $logic_name
2062 the logic name of the type of features to obtain
2063 Arg [3] : (optional)
string $db_type
2064 Example : @transcripts = @{$slice->get_all_Transcripts)_};
2065 Description: Gets all transcripts which overlap
this slice. If you want to
2066 specify a particular analysis or type, then you are better off
2067 using get_all_Genes or get_all_Genes_by_type and iterating
2068 through the transcripts of each gene. Strand of the Slice is
2070 Returntype : reference to a list of Bio::EnsEMBL::Transcripts
2077 sub get_all_Transcripts {
2078 my ($self, $load_exons, $logic_name, $dbtype, $source, $biotype) = @_;
2079 if(my $adaptor = $self->_get_CoreAdaptor(
'Transcript', $dbtype)) {
2080 return $adaptor->fetch_all_by_Slice($self, $load_exons, $logic_name, undef, $source, $biotype);
2085 =head2 get_all_Transcripts_by_type
2087 Arg [1] :
string $type
2088 The biotype of transcripts wanted.
2089 Arg [2] : (optional)
string $logic_name
2090 Arg [3] : (optional)
boolean $load_exons
2091 If set to
true exons will not be lazy-loaded but will instead
2092 be loaded right away. This is faster
if the exons are
2093 actually going to be used right away.
2095 Example : @transcripts = @{$slice->get_all_Transcripts_by_type(
'protein_coding',
2097 Description: Retrieves transcripts that overlap
this slice of biotype $type.
2098 This is primarily used by the genebuilding code when several
2099 biotypes of transcripts are used.
2101 The logic name is the analysis of the transcripts that are retrieved.
2102 If not provided all transcripts will be retrieved instead. Both
2103 positive and negative strand transcripts will be returned.
2105 Returntype : listref of Bio::EnsEMBL::Transcripts
2107 Caller : genebuilder, general
2112 sub get_all_Transcripts_by_type {
2113 my ($self, $biotype, $logic_name, $load_exons) = @_;
2114 return $self->get_all_Transcripts($load_exons, $logic_name, undef, undef, $biotype);
2118 =head2 get_all_Transcripts_by_source
2120 Arg [1] :
string source
2121 Arg [2] : (optional)
boolean $load_exons
2122 If set to
true exons will not be lazy-loaded but will instead
2123 be loaded right away. This is faster
if the exons are
2124 actually going to be used right away.
2125 Example : @transcripts = @{$slice->get_all_Transcripts_by_source(
'ensembl')};
2126 Description: Retrieves transcripts that overlap
this slice of source $source.
2127 Strand of the Slice does not affect the result.
2128 Returntype : listref of Bio::EnsEMBL::Transcripts
2135 sub get_all_Transcripts_by_source {
2136 my ($self, $source, $load_exons) = @_;
2137 return $self->get_all_Transcripts($load_exons, undef, undef, $source);
2142 =head2 get_all_Exons
2145 Example : @exons = @{$slice->get_all_Exons};
2146 Description: Gets all exons which overlap
this slice. Note that these exons
2147 will not be associated with any transcripts, so
this may not
2149 Returntype : reference to a list of Bio::EnsEMBL::Exons
2158 if(!$self->adaptor()) {
2159 warning(
'Cannot get Exons without attached adaptor');
2162 return $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self);
2165 =head2 get_all_KaryotypeBands
2168 Example : @kary_bands = @{$slice->get_all_KaryotypeBands};
2169 Description: Retrieves the karyotype bands which
this slice overlaps.
2170 Returntype : listref oif Bio::EnsEMBL::KaryotypeBands
2172 Caller : general, contigview
2177 sub get_all_KaryotypeBands {
2179 if (my $adaptor = $self->_get_CoreAdaptor(
'KaryotypeBand')) {
2180 return $adaptor->fetch_all_by_Slice($self);
2185 =head2 get_repeatmasked_seq
2187 Arg [1] : listref of strings $logic_names (optional)
2188 Arg [2] :
int $soft_masking_enable (optional)
2189 Arg [3] : hash reference $not_default_masking_cases (optional,
default is {})
2190 The values are 0 or 1
for hard and soft masking respectively
2191 The keys of the hash should be of 2 forms
2192 "repeat_class_" . $repeat_consensus->repeat_class,
2193 e.g.
"repeat_class_SINE/MIR"
2194 "repeat_name_" . $repeat_consensus->name
2195 e.g.
"repeat_name_MIR"
2196 depending on which base you want to apply the not
default
2197 masking either the repeat_class or repeat_name. Both can be
2198 specified in the same hash at the same time, but in that
case,
2199 repeat_name setting has priority over repeat_class. For example,
2200 you may have hard masking as
default, and you may want soft
2201 masking of all repeat_class SINE/MIR, but repeat_name AluSp
2202 (which are also from repeat_class SINE/MIR).
2203 Your hash will be something like {
"repeat_class_SINE/MIR" => 1,
2204 "repeat_name_AluSp" => 0}
2205 Example : $rm_slice = $slice->get_repeatmasked_seq();
2206 $softrm_slice = $slice->get_repeatmasked_seq([
'RepeatMask'],1);
2208 masked sequence instead of the regular sequence.
2209 Sequence returned by
this new slice will have repeat regions
2210 hardmasked by
default (sequence replaced by N) or
2211 or soft-masked when arg[2] = 1 (sequence in lowercase)
2212 Will only work with database connection to get repeat features.
2220 sub get_repeatmasked_seq {
2221 my ($self,$logic_names,$soft_mask,$not_default_masking_cases) = @_;
2224 (-START => $self->{
'start'},
2225 -END => $self->{
'end'},
2226 -STRAND => $self->{
'strand'},
2227 -ADAPTOR => $self->adaptor(),
2228 -SEQ => $self->{
'seq'},
2229 -SEQ_REGION_NAME => $self->{
'seq_region_name'},
2230 -SEQ_REGION_LENGTH => $self->{
'seq_region_length'},
2231 -COORD_SYSTEM => $self->{
'coord_system'},
2232 -REPEAT_MASK => $logic_names,
2233 -SOFT_MASK => $soft_mask,
2234 -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases);
2239 =head2 _mask_features
2241 Arg [1] : reference to a
string $dnaref
2242 Arg [2] : array_ref $repeats
2244 give the list of coordinates to replace with N or with
2246 Arg [3] :
int $soft_masking_enable (optional)
2247 Arg [4] : hash reference $not_default_masking_cases (optional,
default is {})
2248 The values are 0 or 1
for hard and soft masking respectively
2249 The keys of the hash should be of 2 forms
2250 "repeat_class_" . $repeat_consensus->repeat_class,
2251 e.g.
"repeat_class_SINE/MIR"
2252 "repeat_name_" . $repeat_consensus->name
2253 e.g.
"repeat_name_MIR"
2254 depending on which base you want to apply the not
default masking either
2255 the repeat_class or repeat_name. Both can be specified in the same hash
2256 at the same time, but in that
case, repeat_name setting has priority over
2257 repeat_class. For example, you may have hard masking as
default, and
2258 you may want soft masking of all repeat_class SINE/MIR,
2259 but repeat_name AluSp (which are also from repeat_class SINE/MIR).
2260 Your hash will be something like {
"repeat_class_SINE/MIR" => 1,
2261 "repeat_name_AluSp" => 0}
2263 Description: replaces
string positions described in the RepeatFeatures
2264 with Ns (
default setting), or with the lower
case equivalent
2265 (soft masking). The reference to a dna
string which is passed
2266 is changed in place.
2274 sub _mask_features {
2275 my ($self,$dnaref,$repeats,$soft_mask,$not_default_masking_cases) = @_;
2277 $soft_mask = 0 unless (defined $soft_mask);
2278 $not_default_masking_cases = {} unless (defined $not_default_masking_cases);
2280 # explicit CORE::length call, to avoid any confusion with the Slice
2282 my $dnalen = CORE::length($$dnaref);
2284 REP:
foreach my $old_f (@{$repeats}) {
2285 my $f = $old_f->transfer( $self );
2286 my $start = $f->start;
2288 my $length = ($end - $start) + 1;
2290 # check if we get repeat completely outside of expected slice range
2291 if ($end < 1 || $start > $dnalen) {
2292 # warning("Unexpected: Repeat completely outside slice coordinates.");
2296 # repeat partly outside slice range, so correct
2297 # the repeat start and length to the slice size if needed
2300 $length = ($end - $start) + 1;
2303 # repeat partly outside slice range, so correct
2304 # the repeat end and length to the slice size if needed
2305 if ($end > $dnalen) {
2307 $length = ($end - $start) + 1;
2313 # if we decide to define masking on the base of the repeat_type, we'll need
2314 # to add the following, and the other commented line few lines below.
2318 if ($f->isa(
'Bio::EnsEMBL::RepeatFeature')) {
2319 $rc_class =
"repeat_class_" . $f->repeat_consensus->repeat_class;
2320 $rc_name =
"repeat_name_" . $f->repeat_consensus->name;
2324 $masking_type = $not_default_masking_cases->{$rc_class}
if (defined $not_default_masking_cases->{$rc_class});
2325 $masking_type = $not_default_masking_cases->{$rc_name}
if (defined $not_default_masking_cases->{$rc_name});
2327 $masking_type = $soft_mask unless (defined $masking_type);
2329 if ($masking_type) {
2330 $padstr = lc substr ($$dnaref,$start,$length);
2332 $padstr =
'N' x $length;
2334 substr ($$dnaref,$start,$length) = $padstr;
2339 =head2 get_all_SearchFeatures
2341 Arg [1] : scalar $ticket_ids
2342 Example : $slice->get_all_SearchFeatures(
'BLA_KpUwwWi5gY');
2343 Description: Retrieves all search features
for stored blast
2344 results
for the ticket that overlap
this slice
2345 Returntype : listref of Bio::EnsEMBL::SeqFeatures
2347 Caller : general (webby!)
2352 sub get_all_SearchFeatures {
2357 throw(
"ticket argument is required");
2360 if(!$self->adaptor()) {
2361 warning(
"Cannot get SearchFeatures without an attached adaptor");
2365 my $sfa = $self->adaptor()->db()->get_db_adaptor(
'blast');
2367 my $offset = $self->start-1;
2369 my $features = $sfa ? $sfa->get_all_SearchFeatures($ticket, $self->seq_region_name, $self->start, $self->end) : [];
2371 foreach( @$features ) {
2372 $_->start( $_->start - $offset );
2373 $_->end( $_->end - $offset );
2379 =head2 get_all_AssemblyExceptionFeatures
2381 Example : $slice->get_all_AssemblyExceptionFeatures();
2382 Description: Retrieves all misc features which overlap
this slice. If
2383 a set code is provided only features which are members of
2384 the requested set are returned.
2385 Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures
2392 sub get_all_AssemblyExceptionFeatures {
2394 if(my $adaptor = $self->_get_CoreAdaptor(
'AssemblyExceptionFeature')) {
2395 return $adaptor->fetch_all_by_Slice($self);
2402 =head2 get_all_MiscFeatures
2404 Arg [1] :
string $set (optional)
2405 Arg [2] :
string $database (optional)
2406 Example : $slice->get_all_MiscFeatures(
'cloneset');
2407 Description: Retrieves all misc features which overlap
this slice. If
2408 a set code is provided only features which are members of
2409 the requested set are returned.
2410 Returntype : listref of Bio::EnsEMBL::MiscFeatures
2417 sub get_all_MiscFeatures {
2418 my ($self, $misc_set, $dbtype) = @_;
2419 if(my $adaptor = $self->_get_CoreAdaptor(
'MiscFeature', $dbtype)) {
2421 return $adaptor->fetch_all_by_Slice_and_set_code($self,$misc_set);
2423 return $adaptor->fetch_all_by_Slice($self);
2428 =head2 get_all_MarkerFeatures
2430 Arg [1] : (optional)
string logic_name
2431 The logic name of the marker features to retrieve
2432 Arg [2] : (optional)
int $priority
2433 Lower (exclusive) priority bound of the markers to retrieve
2434 Arg [3] : (optional)
int $map_weight
2435 Upper (exclusive) priority bound of the markers to retrieve
2436 Example : my @markers = @{$slice->get_all_MarkerFeatures(undef,50, 2)};
2437 Description: Retrieves all markers which lie on
this slice fulfilling the
2438 specified map_weight and priority parameters (
if supplied).
2439 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
2441 Caller : contigview, general
2446 sub get_all_MarkerFeatures {
2447 my ($self, $logic_name, $priority, $map_weight) = @_;
2448 if(my $adaptor = $self->_get_CoreAdaptor(
'MarkerFeature')) {
2449 return $adaptor->fetch_all_by_Slice_and_priority($self, $priority, $map_weight, $logic_name);
2455 =head2 get_MarkerFeatures_by_Name
2457 Arg [1] :
string marker Name
2458 The name (synonym) of the marker feature(s) to retrieve
2459 Example : my @markers = @{$slice->get_MarkerFeatures_by_Name(
'z1705')};
2460 Description: Retrieves all markers with
this ID
2461 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
2463 Caller : contigview, general
2468 sub get_MarkerFeatures_by_Name {
2469 my ($self, $name) = @_;
2470 if(my $adaptor = $self->_get_CoreAdaptor(
'MarkerFeature')) {
2471 return $adaptor->fetch_all_by_Slice_and_MarkerName($self, $name);
2477 =head2 get_all_compara_DnaAlignFeatures
2479 Arg [1] :
string $qy_species
2480 The name of the species to retrieve similarity features from
2481 Arg [2] :
string $qy_assembly
2482 The name of the assembly to retrieve similarity features from
2483 Arg [3] :
string $type
2484 The type of the alignment to retrieve similarity features from
2485 Arg [4] : <optional> compara dbadptor to use.
2486 Example : $fs = $slc->get_all_compara_DnaAlignFeatures(
'Mus musculus',
2489 Description: Retrieves a list of DNA-DNA Alignments to the species specified
2490 by the $qy_species argument.
2491 The compara database must be attached to the core database
2492 for this call to work correctly. As well the compara database
2493 must have the core dbadaptors
for both
this species, and the
2494 query species added to
function correctly.
2495 Returntype : reference to a list of Bio::EnsEMBL::DnaDnaAlignFeatures
2496 Exceptions : warning
if compara database is not available
2502 sub get_all_compara_DnaAlignFeatures {
2503 my ($self, $qy_species, $qy_assembly, $alignment_type, $compara_db) = @_;
2505 if(!$self->adaptor()) {
2506 warning(
"Cannot retrieve DnaAlignFeatures without attached adaptor");
2510 unless($qy_species && $alignment_type # && $qy_assembly
2512 throw(
"Query species and assembly and alignmemt type arguments are required");
2515 if(!defined($compara_db)){
2518 unless($compara_db) {
2519 warning(
"Compara database must be attached to core database or passed ".
2520 "as an argument to " .
2521 "retrieve compara information");
2525 my $dafa = $compara_db->get_DnaAlignFeatureAdaptor;
2526 return $dafa->fetch_all_by_Slice($self, $qy_species, $qy_assembly, $alignment_type);
2529 =head2 get_all_compara_Syntenies
2531 Arg [1] :
string $query_species e.g.
"Mus_musculus" or
"Mus musculus"
2532 Arg [2] :
string $method_link_type,
default is
"SYNTENY"
2533 Arg [3] : <optional> compara dbadaptor to use.
2534 Description: gets all the compara syntenyies
for a specfic species
2535 Returns : arrayref of Bio::EnsEMBL::Compara::SyntenyRegion
2540 sub get_all_compara_Syntenies {
2541 my ($self, $qy_species, $method_link_type, $compara_db) = @_;
2543 if(!$self->adaptor()) {
2544 warning(
"Cannot retrieve features without attached adaptor");
2548 unless($qy_species) {
2549 throw(
"Query species and assembly arguments are required");
2552 unless (defined $method_link_type) {
2553 $method_link_type =
"SYNTENY";
2556 if(!defined($compara_db)){
2559 unless($compara_db) {
2560 warning(
"Compara database must be attached to core database or passed ".
2561 "as an argument to " .
2562 "retrieve compara information");
2565 my $gdba = $compara_db->get_GenomeDBAdaptor();
2566 my $mlssa = $compara_db->get_MethodLinkSpeciesSetAdaptor();
2567 my $sra = $compara_db->get_SyntenyRegionAdaptor();
2569 my $this_gdb = $gdba->fetch_by_core_DBAdaptor($self->adaptor()->db());
2570 my $query_gdb = $gdba->fetch_by_registry_name($qy_species);
2572 if($this_gdb eq $query_gdb) {
2573 $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb]);
2575 $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb, $query_gdb]);
2578 return $sra->fetch_all_by_MethodLinkSpeciesSet_Slice($mlss, $self);
2581 =head2 get_all_Haplotypes
2583 Arg [1] : (optional)
boolean $lite_flag
2584 if true lightweight haplotype objects are used
2585 Example : @haplotypes = $slice->get_all_Haplotypes;
2586 Description: Retrieves all of the haplotypes on
this slice. Only works
2587 if the haplotype adaptor has been attached to the core adaptor
2588 via $dba->add_db_adaptor(
'haplotype', $hdba);
2589 Returntype : listref of Bio::EnsEMBL::External::Haplotype::Haplotypes
2590 Exceptions : warning is Haplotype database is not available
2591 Caller : contigview, general
2596 sub get_all_Haplotypes {
2597 my($self, $lite_flag) = @_;
2599 if(!$self->adaptor()) {
2600 warning(
"Cannot retrieve features without attached adaptor");
2604 my $haplo_db = $self->adaptor->db->get_db_adaptor(
'haplotype');
2607 warning(
"Haplotype database must be attached to core database to " .
2608 "retrieve haplotype information" );
2612 my $haplo_adaptor = $haplo_db->get_HaplotypeAdaptor;
2614 my $haplotypes = $haplo_adaptor->fetch_all_by_Slice($self, $lite_flag);
2620 sub get_all_DASFactories {
2622 return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
2625 sub get_all_DASFeatures_dsn {
2626 my ($self, $source_type, $dsn) = @_;
2628 if(!$self->adaptor()) {
2629 warning(
"Cannot retrieve features without attached adaptor");
2632 my @X = grep { $_->adaptor->dsn eq $dsn } $self->adaptor()->db()->_each_DASFeatureFactory;
2634 return [ $X[0]->fetch_all_Features( $self, $source_type ) ];
2637 =head2 get_all_DAS_Features
2640 Example : $features = $slice->get_all_DASFeatures;
2641 Description: Retrieves a hash reference to a hash of DAS feature
2642 sets, keyed by the DNS, NOTE the values of
this hash
2643 are an anonymous array containing:
2644 (1) a pointer to an array of features;
2645 (2) a pointer to the DAS stylesheet
2646 Returntype : hashref of Bio::SeqFeatures
2652 sub get_all_DAS_Features{
2655 $self->{_das_features} ||= {}; # Cache
2656 $self->{_das_styles} ||= {}; # Cache
2657 $self->{_das_segments} ||= {}; # Cache
2663 foreach my $dasfact( @{$self->get_all_DASFactories} ){
2664 my $dsn = $dasfact->adaptor->dsn;
2665 my $name = $dasfact->adaptor->name;
2666 # my $type = $dasfact->adaptor->type;
2667 my $url = $dasfact->adaptor->url;
2669 my ($type) = $dasfact->adaptor->mapping;
2670 if (ref $type eq
'ARRAY') {
2671 $type = shift @$type;
2673 $type ||= $dasfact->adaptor->type;
2674 # Construct a cache key : SOURCE_URL/TYPE
2675 # Need the type to handle sources that serve multiple types of features
2677 my $key = join(
'/', $name, $type);
2678 if( $self->{_das_features}->{$key} ){ # Use cached
2679 $das_features{$name} = $self->{_das_features}->{$key};
2680 $das_styles{$name} = $self->{_das_styles}->{$key};
2681 $das_segments{$name} = $self->{_das_segments}->{$key};
2682 }
else { # Get fresh data
2683 my ($featref, $styleref, $segref) = $dasfact->fetch_all_Features( $slice, $type );
2684 $self->{_das_features}->{$key} = $featref;
2685 $self->{_das_styles}->{$key} = $styleref;
2686 $self->{_das_segments}->{$key} = $segref;
2687 $das_features{$name} = $featref;
2688 $das_styles{$name} = $styleref;
2689 $das_segments{$name} = $segref;
2693 return (\%das_features, \%das_styles, \%das_segments);
2696 sub get_all_DASFeatures{
2697 my ($self, $source_type) = @_;
2700 if(!$self->adaptor()) {
2701 warning(
"Cannot retrieve features without attached adaptor");
2705 my %genomic_features =
map { ( $_->adaptor->dsn => [ $_->fetch_all_Features($self, $source_type) ] ) } $self->adaptor()->db()->_each_DASFeatureFactory;
2706 return \%genomic_features;
2710 sub old_get_all_DASFeatures{
2711 my ($self,@args) = @_;
2713 if(!$self->adaptor()) {
2714 warning(
"Cannot retrieve features without attached adaptor");
2718 my %genomic_features =
2719 map { ( $_->adaptor->dsn => [ $_->fetch_all_by_Slice($self) ] ) }
2720 $self->adaptor()->db()->_each_DASFeatureFactory;
2721 return \%genomic_features;
2726 =head2 get_all_ExternalFeatures
2728 Arg [1] : (optional)
string $track_name
2729 If specified only features from ExternalFeatureAdaptors with
2730 the track name $track_name are retrieved.
2731 If not set, all features from every ExternalFeatureAdaptor are
2733 Example : @x_features = @{$slice->get_all_ExternalFeatures}
2734 Description: Retrieves features on
this slice from external feature adaptors
2735 Returntype : listref of Bio::SeqFeatureI implementing objects in slice
2743 sub get_all_ExternalFeatures {
2744 my ($self, $track_name) = @_;
2746 if(!$self->adaptor()) {
2747 warning(
"Cannot retrieve features without attached adaptor");
2753 my $xfa_hash = $self->adaptor->db->get_ExternalFeatureAdaptors;
2754 my @xf_adaptors = ();
2757 #use a specific adaptor
2758 if(exists $xfa_hash->{$track_name}) {
2759 push @xf_adaptors, $xfa_hash->{$track_name};
2762 #use all of the adaptors
2763 push @xf_adaptors, values %$xfa_hash;
2767 foreach my $xfa (@xf_adaptors) {
2768 push @$features, @{$xfa->fetch_all_by_Slice($self)};
2775 =head2 get_all_DitagFeatures
2777 Arg [1] : (optional)
string ditag type
2778 Arg [1] : (optional)
string logic_name
2779 Example : @dna_dna_align_feats = @{$slice->get_all_DitagFeatures};
2780 Description: Retrieves the DitagFeatures of a specific type which overlap
2781 this slice. If type is not defined, all features are retrieved.
2782 Strandedness of the Slice is ignored.
2783 Returntype : listref of Bio::EnsEMBL::DitagFeatures
2784 Exceptions : warning
if slice does not have attached adaptor
2790 sub get_all_DitagFeatures {
2791 my ($self, $type, $logic_name) = @_;
2792 if(my $adaptor = $self->_get_CoreAdaptor(
'DitagFeature')) {
2793 return $adaptor->fetch_all_by_Slice($self, $type, $logic_name);
2798 # GENERIC FEATURES (See DBAdaptor.pm)
2800 =head2 get_generic_features
2802 Arg [1] : (optional) List of names of
generic feature types to
return.
2803 If no feature names are given, all
generic features are
2805 Example : my %features = %{$slice->get_generic_features()};
2806 Description: Gets
generic features via the
generic feature adaptors that
2807 have been added via DBAdaptor->add_GenricFeatureAdaptor (
if
2809 Returntype : Hash of named features.
2816 sub get_generic_features {
2818 my ($self, @names) = @_;
2820 if(!$self->adaptor()) {
2821 warning(
'Cannot retrieve features without attached adaptor');
2825 my $db = $self->adaptor()->db();
2827 my %features = (); #
this will hold the results
2829 # get the adaptors for each feature
2830 my %adaptors = %{$db->get_GenericFeatureAdaptors(@names)};
2832 foreach my $adaptor_name (keys(%adaptors)) {
2834 my $adaptor_obj = $adaptors{$adaptor_name};
2835 # get the features and add them to the hash
2836 my $features_ref = $adaptor_obj->fetch_all_by_Slice($self);
2837 # add each feature to the hash to be returned
2838 foreach my $feature (@$features_ref) {
2839 $features{$adaptor_name} = $feature;
2847 =head2 project_to_slice
2849 Arg [1] : Slice to project to.
2850 Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice);
2851 foreach my $segment ( @$chr_projection ){
2852 $chr_slice = $segment->to_Slice();
2853 print $clone_slice->seq_region_name().
':'. $segment->from_start().
'-'.
2854 $segment->from_end().
' -> '.$chr_slice->seq_region_name().
':'. $chr_slice->start().
2855 '-'.$chr_slice->end().
2856 $chr_slice->strand().
" length: ".($chr_slice->end()-$chr_slice->start()+1).
"\n";
2858 Description: Projection of slice to another specific slice. Needed
for where we have multiple mappings
2859 and we want to state which one to project to.
2861 can also be used as [$start,$end,$slice] triplets.
2868 sub project_to_slice {
2870 my $to_slice = shift;
2872 throw(
'Slice argument is required')
if(!$to_slice);
2874 my $slice_adaptor = $self->adaptor();
2876 if(!$slice_adaptor) {
2877 warning(
"Cannot project without attached adaptor.");
2882 my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor();
2884 my $cs = $to_slice->coord_system();
2885 my $slice_cs = $self->coord_system();
2886 my $to_slice_id = $to_slice->get_seq_region_id;
2890 # decompose this slice into its symlinked components.
2891 # this allows us to handle haplotypes and PARs
2892 my $normal_slice_proj =
2893 $slice_adaptor->fetch_normalized_slice_projection($self);
2894 foreach my $segment (@$normal_slice_proj) {
2895 my $normal_slice = $segment->[2];
2897 $slice_cs = $normal_slice->coord_system();
2899 my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor();
2900 my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
2901 next unless defined $asm_mapper;
2903 # perform the mapping between this slice and the requested system
2904 my @coords = $asm_mapper->map($normal_slice->seq_region_name(),
2905 $normal_slice->start(),
2906 $normal_slice->end(),
2907 $normal_slice->strand(),
2908 $slice_cs, undef, $to_slice, 1);
2910 #construct a projection from the mapping results and return it
2911 foreach my $coord (@coords) {
2912 my $original = $coord->{original};
2913 my $mapped = $coord->{mapped};
2916 next unless $mapped->isa(
'Bio::EnsEMBL::Mapper::Coordinate');
2918 my $mapped_start = $mapped->start();
2919 my $mapped_end = $mapped->end();
2920 my ($current_start, $current_end);
2921 if ($self->strand == 1) {
2922 $current_start = $original->start() - $self->start + 1;
2923 $current_end = $original->end() - $self->start + 1;
2925 $current_start = $self->end() - $original->end + 1;
2926 $current_end = $self->end - $original->start + 1;
2929 # for multiple mappings only get the correct one
2930 next unless $mapped->id == $to_slice_id;
2932 my $coord_cs = $mapped->coord_system();
2934 # If the normalised projection just ended up mapping to the
2935 # same coordinate system we were already in then we should just
2936 # return the original region. This can happen for example, if we
2937 # were on a PAR region on Y which refered to X and a projection to
2938 # 'toplevel' was requested.
2939 # if($coord_cs->equals($slice_cs)) {
2940 # # trim off regions which are not defined
2941 # return $self->_constrain_to_region();
2944 # create slices for the mapped-to coord system
2945 my $slice = $slice_adaptor->fetch_by_seq_region_id($mapped->id(),
2950 push @projection, bless([$current_start, $current_end, $slice],
2951 "Bio::EnsEMBL::ProjectionSegment");
2956 # delete the cache as we may want to map to different set next time and old
2957 # results will be cached.
2958 $mapper_aptr->delete_cache;
2960 return \@projection;
2964 =head2 get_all_synonyms
2966 Args [1] : String external_db_name The name of the database to retrieve
2968 Args [2] : (optional) Integer external_db_version Optionally restrict
2969 results from external_db_name to a specific version of
2970 the the specified external database
2971 Example : my @alternative_names = @{$slice->get_all_synonyms()};
2972 @alternative_names = @{$slice->get_all_synonyms(
'EMBL')};
2973 Description: get a list of alternative names
for this slice
2974 Returntype : reference to list of SeqRegionSynonym objects.
2981 sub get_all_synonyms {
2982 my ($self, $external_db_name, $external_db_version) = @_;
2984 if ( !defined( $self->{
'synonym'} ) ) {
2985 my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
2986 $self->{
'synonym'} =
2987 $adap->get_synonyms( $self->get_seq_region_id($self) );
2990 if(! $external_db_name) {
2991 return $self->{
'synonym'};
2993 my @args = ($external_db_version) ?
2994 ($external_db_name, $external_db_version) :
2995 ($external_db_name, undef, 1);
2996 my $external_db_id = $self->adaptor->db()->get_DBEntryAdaptor()->get_external_db_id(@args);
2997 if(!$external_db_id) {
2998 my $extra = ($external_db_version) ?
"and version $external_db_version " : q{};
2999 throw "The external database $external_db_name ${extra}did not result in a valid identifier";
3002 return [ grep { $_->external_db_id() == $external_db_id } @{$self->{synonym}} ];
3008 Example : $slice->add_synonym(
"alt_name");
3009 Description: add an alternative name
for this slice
3020 my $external_db_id = shift;
3022 my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
3023 if ( !defined( $self->{
'synonym'} ) ) {
3024 $self->{
'synonym'} = $self->get_all_synonyms();
3028 -external_db_id => $external_db_id,
3029 -seq_region_id => $self->get_seq_region_id($self));
3031 push (@{$self->{
'synonym'}}, $new_syn);
3037 # package variable to minimize duplication
3038 my %region_so_mapping = (
3039 'chromosome' =>
'SO:0000340',
3040 'supercontig' =>
'SO:0000148',
3041 'scaffold' =>
'SO:0000148',
3042 'contig' =>
'SO:0000149'
3045 =head2 feature_so_acc
3047 Example : $slice_so_acc = $slice->feature_so_acc;
3048 Description : This method returns a
string containing the SO
accession number of the slice, based on the coordinate system name.
3049 Returns : string (Sequence Ontology
accession number)
3053 sub feature_so_acc {
3056 # return the region SO acc, or Slice acc
3057 return $region_so_mapping{$self->coord_system_name}
3060 =head2 feature_so_term
3062 Description: This method returns a
string containing the SO term of the slice, based on the coordinate system name
3063 Define constant SEQUENCE_ONTOLOGY in classes that require it, or
override it
for multiple possible values
for a
class.
3064 Returntype : String (Sequence Ontology term)
3065 Exceptions : Thrown
if caller SEQUENCE_ONTOLOGY is undefined and is not a
Bio::EnsEMBL::Slice
3069 sub feature_so_term {
3072 return defined($region_so_mapping{$self->coord_system_name}) ?
3077 =head2 summary_as_hash
3079 Example : $slice_summary = $slice->summary_as_hash();
3080 Description : Retrieves a textual summary of
this slice.
3081 Returns : hashref of descriptive strings
3084 sub summary_as_hash {
3087 my @aliases =
map { $_->name } @{$self->slice->get_all_synonyms()};
3089 $summary{
'seq_region_name'} = $self->seq_region_name;
3090 $summary{
'id'} = $self->seq_region_name;
3091 $summary{
'start'} = $self->start;
3092 $summary{
'end'} = $self->end;
3093 $summary{
'strand'} = 0;
3094 $summary{
'source'} = $self->source || $self->coord_system->version;
3095 $summary{
'Alias'} = \@aliases
if scalar(@aliases);
3096 $summary{
'Is_circular'} = $self->is_circular ?
"true" : undef;
3107 # Bioperl Bio::PrimarySeqI methods:
3112 Description: Included
for Bio::PrimarySeqI
interface compliance (0.7)
3116 sub
id { name(@_); }
3121 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
3125 sub display_id { name(@_); }
3130 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
3134 sub primary_id { name(@_); }
3139 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
3143 sub desc{
return $_[0]->coord_system->name().
' '.$_[0]->seq_region_name(); }
3148 Description: Included
for Bio::PrimarySeqI
interface compliance (0.7)
3152 sub moltype {
return 'dna'; }
3156 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
3160 sub alphabet {
return 'dna'; }
3163 =head2 accession_number
3165 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
3169 sub accession_number { name(@_); }