3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
37 $sa = $db->get_SliceAdaptor;
40 $sa->fetch_by_region(
'chromosome',
'X', 1_000_000, 2_000_000 );
42 # get some attributes of the slice
44 my $start = $slice->start();
45 my $end = $slice->end();
47 # get the sequence from the slice
48 my $seq = $slice->seq();
50 # get some features from the slice
51 foreach $gene ( @{ $slice->get_all_Genes } ) {
52 # do something with a gene
55 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
56 # do something with dna-dna alignments
61 A Slice
object represents a region of a genome. It can be used to retrieve
62 sequence or features from an area of interest.
64 NOTE: The Slice is defined by its Strand, but normal behaviour
for get_all_*
65 methods is to
return Features on both Strands.
71 package Bio::EnsEMBL::Slice;
89 use Scalar::Util qw(weaken isweak);
93 my $registry =
"Bio::EnsEMBL::Registry";
95 @ISA = qw(Bio::PrimarySeqI);
100 Arg [...] : List of named arguments
102 string SEQ_REGION_NAME,
105 int SEQ_REGION_LENGTH, (optional)
106 string SEQ (optional)
107 int STRAND, (optional, defaults to 1)
113 -seq_region_name =>
'X',
114 -seq_region_length => 12e6,
115 -adaptor => $slice_adaptor);
116 Description: Creates a
new slice
object. A slice represents a region
117 of sequence in a particular coordinate system. Slices can be
118 used to retrieve sequence and features from an area of
119 interest in a genome.
121 Coordinates start at 1 and are inclusive. Negative
122 coordinates or coordinates exceeding the length of the
123 seq_region are permitted. Start must be less than or equal.
124 to end regardless of the strand.
126 Slice objects are immutable. Once instantiated their attributes
127 (with the exception of the adaptor) may not be altered. To
128 change the attributes a
new slice must be created.
130 Exceptions :
throws if start, end, coordsystem or seq_region_name not specified or not of the correct type
131 Caller : general, Bio::EnsEMBL::SliceAdaptor
139 #new can be called as a class or object method
140 my $class = ref($caller) || $caller;
142 my ($seq, $coord_system, $seq_region_name, $seq_region_length,
143 $start, $end, $strand, $adaptor, $empty) =
144 rearrange([qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
145 START END STRAND ADAPTOR EMPTY)], @_);
147 if ( !defined($seq_region_name) ) {
148 throw(
'SEQ_REGION_NAME argument is required');
150 if ( !defined($start) ) {
throw(
'START argument is required') }
151 if ( !defined($end) ) {
throw(
'END argument is required') }
153 ## if ( $start > $end + 1 ) {
154 ## throw('start must be less than or equal to end+1');
157 if ( !defined($seq_region_length) ) { $seq_region_length = $end }
159 if ( $seq_region_length <= 0 ) {
160 throw(
'SEQ_REGION_LENGTH must be > 0');
163 if ( defined($seq) && CORE::length($seq) != ( $end - $start + 1 ) ) {
164 throw(
'SEQ must be the same length as the defined LENGTH not '
167 . ( $end - $start + 1 ) );
170 if(defined($coord_system)) {
171 if(!ref($coord_system) || !$coord_system->isa(
'Bio::EnsEMBL::CoordSystem')){
172 throw(
'COORD_SYSTEM argument must be a Bio::EnsEMBL::CoordSystem');
174 if($coord_system->is_top_level()) {
175 throw(
'Cannot create slice on toplevel CoordSystem.');
178 warning(
"Slice without coordinate system");
183 if($strand != 1 && $strand != -1) {
184 throw(
'STRAND argument must be -1 or 1');
187 if(defined($adaptor)) {
188 if(!ref($adaptor) || !$adaptor->isa(
'Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
189 throw(
'ADAPTOR argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
193 my $self = bless {
'coord_system' => $coord_system,
195 'seq_region_name' => $seq_region_name,
196 'seq_region_length' => $seq_region_length,
197 'start' => int($start),
199 'strand' => $strand}, $class;
201 $self->adaptor($adaptor);
209 Arg [1] : hashref to be blessed
222 my $self = bless $hashref, $class;
223 weaken($self->{adaptor})
if ( ! isweak($self->{adaptor}) );
230 Example : $adaptor = $slice->adaptor();
231 Description: Getter/Setter
for the slice
object adaptor used
232 by
this slice
for database interaction.
234 Exceptions : thorws
if argument passed is not a SliceAdaptor
246 if(!ref($ad) || !$ad->isa(
'Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
247 throw(
'Argument must be a Bio::EnsEMBL::DBSQL::SliceAdaptor');
250 weaken($self->{
'adaptor'} = $ad);
253 return $self->{
'adaptor'};
258 =head2 seq_region_name
261 Example : $seq_region = $slice->seq_region_name();
262 Description: Returns the name of the seq_region that
this slice is on. For
263 example
if this slice is in chromosomal coordinates the
264 seq_region_name might be
'X' or
'10'.
266 This
function was formerly named chr_name, but since slices can
267 now be on coordinate systems other than chromosomal it has been
276 sub seq_region_name {
278 return $self->{
'seq_region_name'};
281 =head2 seq_region_start
283 Example : $seq_region_start = $slice->seq_region_start();
284 Description: Returns the start position of
this slice relative to the
285 start of the sequence region that it was created on.
286 Since slices are always in genomic coordinates
this is
295 sub seq_region_start {
297 return $self->start();
300 =head2 seq_region_end
302 Example : $seq_region_end = $slice->seq_region_end();
303 Description: Returns the end position of
this slice relative to the
304 start of the sequence region that it was created on.
305 Since slices are always in genomic coordinates
this is
319 =head2 seq_region_length
322 Example : $seq_region_length = $slice->seq_region_length();
323 Description: Returns the length of the entire seq_region that
this slice is
324 on. For example
if this slice is on a chromosome
this will be
325 the length (in basepairs) of the entire chromosome.
333 sub seq_region_length {
335 return $self->{
'seq_region_length'};
342 Example : print $slice->coord_system->name();
343 Description: Returns the coordinate system that
this slice is on.
353 return $self->{
'coord_system'};
358 Arg [1] : (optional) String $value
359 Example : print $slice->source();
360 Description: Returns the source
this slice is coming from
370 $self->{
'source'} = shift
if (@_);
371 return $self->{
'source'};
374 =head2 coord_system_name
377 Example : print $slice->coord_system_name()
378 Description: Convenience method. Gets the name of the coord_system which
380 Returns undef
if this Slice does not have an attached
382 Returntype:
string or undef
389 sub coord_system_name {
391 my $csystem = $self->{
'coord_system'};
392 return ($csystem) ? $csystem->
name() : undef;
399 Example : $cp = $slice->centrepoint();
400 Description: Returns the mid position of
this slice relative to the
401 start of the sequence region that it was created on.
402 Coordinates are inclusive and start at 1.
412 return ($self->{
'start'}+$self->{
'end'})/2;
418 Example : $start = $slice->start();
419 Description: Returns the start position of
this slice relative to the
420 start of the sequence region that it was created on.
421 Coordinates are inclusive and start at 1. Negative coordinates
422 or coordinates exceeding the length of the sequence region are
423 permitted. Start is always less than or equal to end
424 regardless of the orientation of the slice.
434 return $self->{
'start'};
442 Example : $end = $slice->end();
443 Description: Returns the end position of
this slice relative to the
444 start of the sequence region that it was created on.
445 Coordinates are inclusive and start at 1. Negative coordinates
446 or coordinates exceeding the length of the sequence region are
447 permitted. End is always greater than or equal to start
448 regardless of the orientation of the slice.
458 return $self->{
'end'};
466 Example : $strand = $slice->strand();
467 Description: Returns the orientation of
this slice on the seq_region it has
469 Returntype : int (either 1 or -1)
471 Caller : general, invert
478 return $self->{
'strand'};
488 Example : my $results = $cache{$slice->name()};
489 Description: Returns the name of
this slice. The name is formatted as a colon
490 delimited
string with the following attributes:
491 coord_system:version:seq_region_name:start:end:strand
493 Slices with the same name are equivalent and thus the name can
505 my $cs = $self->{
'coord_system'};
508 ($cs) ? $cs->name() :
'',
509 ($cs) ? $cs->version() :
'',
510 $self->{
'seq_region_name'},
521 Example : $length = $slice->length();
522 Description: Returns the length of
this slice in basepairs
533 my $length = $self->{
'end'} - $self->{
'start'} + 1;
535 if ( $self->{
'start'} > $self->{
'end'} && $self->is_circular() ) {
536 $length += $self->{
'seq_region_length'};
544 Example : my $reference = $slice->is_reference()
545 Description: Returns 1
if slice is a reference slice
else 0
555 if ( !defined( $self->{
'is_reference'} ) ) {
556 $self->{
'is_reference'} =
557 $self->adaptor()->is_reference( $self->get_seq_region_id() );
560 return $self->{
'is_reference'};
565 Example : my $top = $slice->is_toplevel()
566 Description: Returns 1
if slice is a toplevel slice
else 0
576 if ( !defined( $self->{
'toplevel'} ) ) {
577 $self->{
'toplevel'} =
578 $self->adaptor()->is_toplevel( $self->get_seq_region_id() );
581 return $self->{
'toplevel'};
586 Example : my $top = $slice->has_karyotype()
587 Description: Returns 1
if slice is part of the karyotype
else 0
597 if ( !defined( $self->{
'karyotype'} ) ) {
598 $self->{
'karyotype'} =
599 $self->adaptor()->has_karyotype( $self->get_seq_region_id() );
602 return $self->{
'karyotype'};
605 =head2 karyotype_rank
607 Example : my $rank = $slice->karyotype_rank()
608 Description: Returns the numeric ranking in the karyotype. Otherwise 0 is returned
617 if(! defined( $self->{karyotype_rank})) {
618 my $rank = $self->adaptor()->get_karyotype_rank($self->get_seq_region_id());
619 $self->{karyotype_rank} = $rank
if $rank;
621 return $self->{karyotype_rank} || 0;
626 Example : my $circ = $slice->is_circular()
627 Description: Returns 1
if slice is a circular slice
else 0
636 my $adaptor = $self->adaptor();
637 return 0
if ! defined $adaptor;
638 if (! exists $self->{
'circular'}) {
639 my $id = $adaptor->get_seq_region_id($self);
640 $self->{circular} = $adaptor->is_circular($id);
642 return $self->{circular};
648 Example : $inverted_slice = $slice->invert;
649 Description: Creates a copy of
this slice on the opposite strand and
661 # make a shallow copy of the slice via a hash copy and flip the strand
663 $s{
'strand'} = $self->{
'strand'} * -1;
665 # reverse compliment any attached sequence
666 reverse_comp(\$s{
'seq'})
if($s{
'seq'});
668 # bless and return the copy
669 return bless \%s, ref $self;
677 Example : print
"SEQUENCE = ", $slice->
seq();
678 Description: Returns the sequence of the region represented by
this
679 slice formatted as a
string.
680 Only available
for the
default coord_system
691 # special case for in-between (insert) coordinates
692 return '' if($self->start() == $self->end() + 1);
694 return $self->{
'seq'}
if($self->{
'seq'});
696 if($self->adaptor()) {
697 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
698 return ${$seqAdaptor->fetch_by_Slice_start_end_strand($self,1,undef,1)};
701 # no attached sequence, and no db, so just return Ns
702 return 'N' x $self->length();
709 Arg [1] :
int $startBasePair
710 relative to start of slice, which is 1.
711 Arg [2] :
int $endBasePair
712 relative to start of slice.
713 Arg [3] : (optional)
int $strand
714 The strand of the slice to obtain sequence from. Default
716 Description: returns
string of dna sequence
718 Exceptions : end should be at least as big as start
726 my ( $self, $start, $end, $strand ) = @_;
728 if ( $end+1 < $start ) {
729 throw(
"End coord + 1 is less than start coord");
732 # handle 'between' case for insertions
733 return '' if( $start == $end + 1);
735 $strand = 1 unless(defined $strand);
737 if ( $strand != -1 && $strand != 1 ) {
738 throw(
"Invalid strand [$strand] in call to Slice::subseq.");
742 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
743 $subseq = ${$seqAdaptor->fetch_by_Slice_start_end_strand
747 ## check for gap at the beginning and pad it with Ns
749 $subseq =
"N" x (1 - $start);
752 $subseq .= substr ($self->seq(), $start-1, $end - $start + 1);
753 ## check for gap at the end and pad it with Ns
754 if ($end > $self->length()) {
755 $subseq .=
"N" x ($end - $self->length());
757 reverse_comp(\$subseq)
if($strand == -1);
762 =head2 sub_Slice_Iterator
764 Arg[1] :
int The chunk size to request
765 Example : my $i = $slice->sub_Slice_Iterator(60000);
766 while($i->has_next()) { warn $i->next()->name(); }
767 Description : Returns an iterator which batches subslices of
this Slice
768 in the requested chunk size
775 sub sub_Slice_Iterator {
776 my ($self, $chunk_size) = @_;
777 throw "Need a chunk size to divide the slice by" if ! $chunk_size;
779 my $end = $self->length();
780 my $iterator_sub = sub {
781 while($here <= $end) {
782 my $there = $here + $chunk_size - 1;
783 $there = $end
if($there > $end);
784 my $slice = $self->sub_Slice($here, $there);
793 =head2 assembly_exception_type
795 Example : $self->assembly_exception_type();
796 Description : Returns the type of slice
this is. If it is reference then you
797 will get
'REF' back. Otherwise you will get the first
798 element from C<get_all_AssemblyExceptionFeatures()>. If no
799 assembly exception exists you will get an empty
string back.
807 sub assembly_exception_type {
810 if($self->is_reference()) {
814 my $assembly_exceptions = $self->get_all_AssemblyExceptionFeatures();
815 if(@{$assembly_exceptions}) {
816 $type = $assembly_exceptions->[0]->type();
824 Example : print ($slice->is_chromosome()) ?
'I am a chromosome' :
'Not one';
825 Description : A chromosome is a slice with a karyotype rank assigned to it.
826 Returntype : Boolean indicates
if the current
object is a chromosome
834 return $self->has_karyotype();
838 =head2 get_base_count
841 Example : $c_count = $slice->get_base_count->{
'c'};
842 Description: Retrieves a hashref containing the counts of each bases in the
843 sequence spanned by
this slice. The format of the hash is :
851 All bases which are not in the set [A,a,C,c,T,t,G,g] are
852 included in the
'n' count. The
'n' count could therefore be
853 inclusive of ambiguity codes such as
'y'.
854 The %gc is the ratio of GC to AT content as in:
855 total(GC)/total(ACTG) * 100
856 This
function is conservative in its memory
usage and scales to
857 work
for entire chromosomes.
877 my $len = $self->length();
881 while ( $start <= $len ) {
882 $end = $start + $RANGE - 1;
883 $end = $len
if ( $end > $len );
885 $seq = $self->subseq( $start, $end );
895 my $actg = $a + $c + $t + $g;
898 if ( $actg > 0 ) { # Avoid dividing by 0
899 $gc_content = sprintf(
"%1.2f", ( ( $g + $c )/$actg )*100 );
907 '%gc' => $gc_content };
914 Arg [1] :
string $name
915 The name of the coordinate system to project
this slice onto
916 Arg [2] :
string $version
917 The version of the coordinate system (such as
'NCBI34') to
918 project
this slice onto
920 my $clone_projection = $slice->project(
'clone');
922 foreach my $segment (@$clone_projection) {
923 my $clone = $segment->to_Slice();
924 print $slice->seq_region_name(),
':', $segment->from_start(),
'-',
925 $segment->from_end(),
' -> ',
926 $clone->seq_region_name(),
':', $clone->start(),
'-',$clone->end(),
927 ':', $clone->strand(),
"\n";
929 Description: Returns the results of
'projecting' this slice onto another
930 coordinate system. Projecting to a coordinate system that
931 the slice is assembled from is analagous to retrieving a tiling
932 path. This method may also be used to
'project up' to a higher
933 level coordinate system, however.
935 This method returns a listref of triplets [start,end,slice]
936 which represents the projection. The start and end defined the
937 region of
this slice which is made up of the third value of
938 the triplet: a slice in the requested coordinate system.
940 can also be used as [$start,$end,$slice] triplets
950 my $cs_version = shift;
952 throw(
'Coord_system name argument is required')
if(!$cs_name);
954 my $slice_adaptor = $self->adaptor();
956 if(!$slice_adaptor) {
957 warning(
"Cannot project without attached adaptor.");
961 if(!$self->coord_system()) {
962 warning(
"Cannot project without attached coord system.");
967 my $db = $slice_adaptor->db();
968 my $csa = $db->get_CoordSystemAdaptor();
969 my $cs = $csa->fetch_by_name($cs_name, $cs_version);
970 my $slice_cs = $self->coord_system();
973 throw(
"Cannot project to unknown coordinate system " .
974 "[$cs_name $cs_version]");
977 # no mapping is needed if the requested coord system is the one we are in
978 # but we do need to check if some of the slice is outside of defined regions
979 if($slice_cs->equals($cs)) {
980 return $self->_constrain_to_region();
985 # decompose this slice into its symlinked components.
986 # this allows us to handle haplotypes and PARs
987 my $normal_slice_proj =
988 $slice_adaptor->fetch_normalized_slice_projection($self);
989 foreach my $segment (@$normal_slice_proj) {
990 my $normal_slice = $segment->[2];
992 $slice_cs = $normal_slice->coord_system();
994 my $asma = $db->get_AssemblyMapperAdaptor();
995 my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
996 next unless defined $asm_mapper;
998 # perform the mapping between this slice and the requested system
999 my @coords = $asm_mapper->map($normal_slice->seq_region_name(),
1000 $normal_slice->start(),
1001 $normal_slice->end(),
1002 $normal_slice->strand(),
1003 $slice_cs, undef, undef, 1);
1005 # construct a projection from the mapping results and return it
1006 foreach my $coord (@coords) {
1007 my $original = $coord->{original};
1008 my $mapped = $coord->{mapped};
1011 next unless $mapped->isa(
'Bio::EnsEMBL::Mapper::Coordinate');
1013 my $mapped_start = $mapped->start();
1014 my $mapped_end = $mapped->end();
1015 my ($current_start, $current_end);
1016 if ($self->strand == 1) {
1017 $current_start = $original->start() - $self->start + 1;
1018 $current_end = $original->end() - $self->start + 1;
1020 $current_start = $self->end() - $original->end + 1;
1021 $current_end = $self->end - $original->start + 1;
1024 my $coord_cs = $mapped->coord_system();
1026 # If the normalised projection just ended up mapping to the
1027 # same coordinate system we were already in then we should just
1028 # return the original region. This can happen for example, if we
1029 # were on a PAR region on Y which refered to X and a projection to
1030 # 'toplevel' was requested.
1031 if($coord_cs->equals($slice_cs)) {
1032 # trim off regions which are not defined
1033 return $self->_constrain_to_region();
1035 #create slices for the mapped-to coord system
1036 my $slice = $slice_adaptor->fetch_by_seq_region_id($mapped->id(),
1041 if ($current_end > $slice->seq_region_length() && $slice->is_circular ) {
1042 $current_end -= $slice->seq_region_length();
1045 push @projection, bless([$current_start, $current_end, $slice],
1046 "Bio::EnsEMBL::ProjectionSegment");
1051 return \@projection;
1055 sub _constrain_to_region {
1058 my $entire_len = $self->seq_region_length();
1060 #if the slice has negative coordinates or coordinates exceeding the
1061 #exceeding length of the sequence region we want to shrink the slice to
1064 if($self->{
'start'} > $entire_len || $self->{
'end'} < 1) {
1065 #none of this slice is in a defined region
1069 my $right_contract = 0;
1070 my $left_contract = 0;
1071 if($self->{
'end'} > $entire_len) {
1072 $right_contract = $entire_len - $self->{
'end'};
1074 if($self->{
'start'} < 1) {
1075 $left_contract = $self->{
'start'} - 1;
1079 if($left_contract || $right_contract) {
1080 #if slice in negative strand, need to swap contracts
1081 if ($self->strand == 1) {
1082 $new_slice = $self->expand($left_contract, $right_contract);
1084 elsif ($self->strand == -1) {
1085 $new_slice = $self->expand($right_contract, $left_contract);
1091 return [bless [1-$left_contract, $self->length()+$right_contract,
1092 $new_slice],
"Bio::EnsEMBL::ProjectionSegment" ];
1098 Arg [1] : (optional)
int $five_prime_expand
1099 The number of basepairs to shift
this slices five_prime
1100 coordinate by. Positive values make the slice larger,
1101 negative make the slice smaller.
1104 Arg [2] : (optional)
int $three_prime_expand
1105 The number of basepairs to shift
this slices three_prime
1106 coordinate by. Positive values make the slice larger,
1107 negative make the slice smaller.
1109 Arg [3] : (optional)
bool $force_expand
1110 if set to 1, then the slice will be contracted even in the
case
1111 when shifts $five_prime_expand and $three_prime_expand overlap.
1112 In that
case $five_prime_expand and $three_prime_expand will be set
1113 to a maximum possible number and that will result in the slice
1114 which would have only 2pbs.
1116 Arg [4] : (optional)
int* $fpref
1117 The reference to a number of basepairs to shift
this slices five_prime
1118 coordinate by. Normally it would be set to $five_prime_expand.
1119 But in
case when $five_prime_expand shift can not be applied and
1120 $force_expand is set to 1, then $$fpref will contain the maximum possible
1122 Arg [5] : (optional)
int* $tpref
1123 The reference to a number of basepairs to shift
this slices three_prime
1124 coordinate by. Normally it would be set to $three_prime_expand.
1125 But in
case when $five_prime_expand shift can not be applied and
1126 $force_expand is set to 1, then $$tpref will contain the maximum possible
1128 Example : my $expanded_slice = $slice->expand( 1000, 1000);
1129 my $contracted_slice = $slice->expand(-1000,-1000);
1130 my $shifted_right_slice = $slice->expand(-1000, 1000);
1131 my $shifted_left_slice = $slice->expand( 1000,-1000);
1132 my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift);
1134 Description: Returns a slice which is a resized copy of
this slice. The
1135 start and end are moved outwards from the center of the slice
1136 if positive values are provided and moved inwards
if negative
1137 values are provided. This slice remains unchanged. A slice
1138 may not be contracted below 1bp but may grow to be arbitrarily
1141 Exceptions : warning
if an attempt is made to contract the slice below 1bp
1149 my $five_prime_shift = shift || 0;
1150 my $three_prime_shift = shift || 0;
1151 my $force_expand = shift || 0;
1155 if ( $self->{
'seq'} ) {
1157 "Cannot expand a slice which has a manually attached sequence ");
1161 my $sshift = $five_prime_shift;
1162 my $eshift = $three_prime_shift;
1164 if ($sshift == 0 && $eshift == 0) {
return $self; }
1166 if ( $self->{
'strand'} != 1 ) {
1167 $eshift = $five_prime_shift;
1168 $sshift = $three_prime_shift;
1171 my $new_start = $self->{
'start'} - $sshift;
1172 my $new_end = $self->{
'end'} + $eshift;
1174 # Wrap around on circular slices
1175 if (( $new_start <= 0 || $new_start > $self->seq_region_length() || $new_end <= 0
1176 || $new_end > $self->seq_region_length() ) && ( $self->is_circular() ) ) {
1178 if ( $new_start <= 0 ) {
1179 $new_start = $self->seq_region_length() + $new_start;
1181 if ( $new_start > $self->seq_region_length() ) {
1182 $new_start -= $self->seq_region_length();
1185 if ( $new_end <= 0 ) {
1186 $new_end = $self->seq_region_length() + $new_end;
1188 if ( $new_end > $self->seq_region_length() ) {
1189 $new_end -= $self->seq_region_length();
1194 if ( $new_start > $new_end && (not $self->is_circular() ) ) {
1196 if ($force_expand) {
1197 # Apply max possible shift, if force_expand is set
1198 if ( $sshift < 0 ) {
1199 # if we are contracting the slice from the start - move the
1200 # start just before the end
1201 $new_start = $new_end - 1;
1202 $sshift = $self->{start} - $new_start;
1205 if ( $new_start > $new_end ) {
1206 # if the slice still has a negative length - try to move the
1208 if ( $eshift < 0 ) {
1209 $new_end = $new_start + 1;
1210 $eshift = $new_end - $self->{end};
1213 # return the values by which the primes were actually shifted
1214 $$tpref = $self->{strand} == 1 ? $eshift : $sshift;
1215 $$fpref = $self->{strand} == 1 ? $sshift : $eshift;
1217 if ( $new_start > $new_end ) {
1218 throw(
'Slice start cannot be greater than slice end');
1222 #fastest way to copy a slice is to do a shallow hash copy
1223 my %new_slice = %$self;
1224 $new_slice{
'start'} = int($new_start);
1225 $new_slice{
'end'} = int($new_end);
1227 return bless \%new_slice, ref($self);
1230 =head2 constrain_to_seq_region
1231 Example : $new_slice = $slice->expand(1000,10000);
1232 $new_slice = $new_slice->constrain_to_seq_region();
1233 Description: Used to prevent overly zealous expand calls going off the end of
1234 the sequence region. It contracts the start and end where needed
1235 and produces a slice copy with the tweaked coordinates.
1239 sub constrain_to_seq_region {
1241 # circular calculations should already be taken care of
1242 if ($self->is_circular) {
return $self}
1243 my $new_start = $self->
start;
1244 my $new_end = $self->end;
1246 my $seq_region = $self->seq_region_Slice;
1248 if ($new_start < $seq_region->start) {$new_start = $seq_region->start}
1249 if ($new_end > $seq_region->end) {$new_end = $seq_region->end}
1251 my %new_slice = %$self;
1252 $new_slice{
'start'} = $new_start;
1253 $new_slice{
'end'} = $new_end;
1255 return bless \%new_slice, ref($self);
1261 Arg 1 :
int $start, refers to the start of the subslice relative to the input slice
1262 Arg 2 :
int $end, refers to the end of the subslice relative to the input slice
1263 Arge [3] :
int $strand
1265 Description: Makes another Slice that covers only part of
this Slice
1266 If a Slice is requested which lies outside of the boundaries
1267 of
this function will
return undef. This means that
1268 behaviour will be consistant whether or not the slice is
1269 attached to the database (i.e.
if there is attached sequence
1270 to the slice). Alternatively the expand() method or the
1271 SliceAdaptor::fetch_by_region method can be used instead.
1272 Returntype :
Bio::
EnsEMBL::Slice or undef if arguments are wrong
1280 my ( $self, $start, $end, $strand ) = @_;
1282 my $length = $self->length();
1284 if( $start < 1 || $start > $length ) {
1285 # throw( "start argument not valid" );
1289 if( $end < $start || $end > $length ) {
1290 # throw( "end argument not valid" )
1294 my ( $new_start, $new_end, $new_strand, $new_seq );
1295 if( ! defined $strand ) {
1299 if( $self->{
'strand'} == 1 ) {
1300 $new_start = $self->{
'start'} + $start - 1;
1301 $new_end = $self->{
'start'} + $end - 1;
1302 $new_strand = $strand;
1304 $new_start = $self->{
'end'} - $end + 1;;
1305 $new_end = $self->{
'end'} - $start + 1;
1306 $new_strand = -$strand;
1309 if( defined $self->{
'seq'} ) {
1310 $new_seq = $self->subseq( $start, $end, $strand );
1313 #fastest way to copy a slice is to do a shallow hash copy
1314 my $new_slice = {%{$self}};
1315 $new_slice->{
'start'} = int($new_start);
1316 $new_slice->{
'end'} = int($new_end);
1317 $new_slice->{
'strand'} = $new_strand;
1319 $new_slice->{
'seq'} = $new_seq;
1321 weaken($new_slice->{adaptor});
1323 return bless $new_slice, ref($self);
1328 =head2 seq_region_Slice
1331 Example : $slice = $slice->seq_region_Slice();
1332 Description: Returns a slice which spans the whole seq_region which
this slice
1333 is on. For example
if this is a slice which spans a small region
1334 of chromosome X,
this method will
return a slice which covers the
1335 entire chromosome X. The returned slice will always have strand
1336 of 1 and start of 1. This method cannot be used
if the sequence
1337 of the slice has been set manually.
1339 Exceptions : warning
if called when sequence of Slice has been set manually.
1345 sub seq_region_Slice {
1349 warning(
"Cannot get a seq_region_Slice of a slice which has manually ".
1350 "attached sequence ");
1354 # quick shallow copy
1356 %{$slice} = %{$self};
1357 bless $slice, ref($self);
1358 weaken($slice->{adaptor});
1360 $slice->{
'start'} = 1;
1361 $slice->{
'end'} = $slice->{
'seq_region_length'};
1362 $slice->{
'strand'} = 1;
1371 Example : my $seq_region_id = $slice->get_seq_region_id();
1372 Description: Gets the
internal identifier of the seq_region that
this slice
1373 is on. Note that
this function will not work correctly
if this
1374 slice does not have an attached adaptor. Also note that it may
1376 method
if you are working with multiple databases since is
1377 possible to work with slices from databases with different
1378 internal seq_region identifiers.
1379 Returntype :
int or undef
if slices does not have attached adaptor
1380 Exceptions : warning
if slice is not associated with a SliceAdaptor
1381 Caller : assembly loading scripts, general
1388 if($self->adaptor) {
1389 return $self->adaptor->get_seq_region_id($self);
1392 warning(
'Cannot retrieve seq_region_id without attached adaptor.');
1397 =head2 get_genome_component
1400 Example : my $genome_component = $slice->get_genome_component();
1401 Description: Returns the genome component of the slice
1402 Returntype : Scalar; the identifier of the genome component of the slice
1409 sub get_genome_component {
1411 return $self->adaptor->get_genome_component_for_slice($self);
1414 =head2 get_all_Attributes
1416 Arg [1] : optional
string $attrib_code
1417 The code of the attribute type to retrieve values
for.
1418 Example : ($htg_phase) = @{$slice->get_all_Attributes(
'htg_phase')};
1419 @slice_attributes = @{$slice->get_all_Attributes()};
1420 Description: Gets a list of Attributes of
this slice
''s seq_region.
1421 Optionally just get Attrubutes
for given code.
1423 Exceptions : warning
if slice does not have attached adaptor
1429 sub get_all_Attributes {
1430 my ($self, $attrib_code) = @_;
1431 if(my $adaptor = $self->_get_CoreAdaptor(
'Attribute')) {
1432 my $attribs = $adaptor->fetch_all_by_Slice($self);
1433 if(defined $attrib_code) {
1434 my $uc_attrib = uc($attrib_code);
1435 return [ grep { uc($_->code()) eq $uc_attrib } @{$attribs}];
1442 =head2 get_all_PredictionTranscripts
1444 Arg [1] : (optional)
string $logic_name
1445 The name of the analysis used to generate the prediction
1446 transcripts obtained.
1447 Arg [2] : (optional)
boolean $load_exons
1448 If set to
true will force loading of all PredictionExons
1449 immediately rather than loading them on demand later. This
1450 is faster
if there are a large number of PredictionTranscripts
1451 and the exons will be used.
1452 Example : @transcripts = @{$slice->get_all_PredictionTranscripts};
1453 Description: Retrieves the list of prediction transcripts which overlap
1454 this slice with logic_name $logic_name. If logic_name is
1455 not defined then all prediction transcripts are retrieved.
1457 Exceptions : warning
if slice does not have attached adaptor
1463 sub get_all_PredictionTranscripts {
1464 my ($self,$logic_name, $load_exons) = @_;
1466 if(!$self->adaptor()) {
1467 warning(
'Cannot get PredictionTranscripts without attached adaptor');
1470 my $pta = $self->adaptor()->db()->get_PredictionTranscriptAdaptor();
1471 return $pta->fetch_all_by_Slice($self, $logic_name, $load_exons);
1474 =head2 get_all_DnaAlignFeatures
1476 Arg [1] : (optional)
string $logic_name
1477 The name of the analysis performed on the dna align features
1479 Arg [2] : (optional)
float $score
1480 The mimimum score of the features to retrieve
1481 Arg [3] : (optional)
string $dbtype
1482 The name of an attached database to retrieve the features from
1483 instead, e.g.
'otherfeatures'.
1484 Arg [4] : (optional)
float hcoverage
1485 The minimum hcoverage od the featurs to retrieve
1486 Example : @dna_dna_align_feats = @{$slice->get_all_DnaAlignFeatures};
1487 Description: Retrieves the DnaDnaAlignFeatures which overlap
this slice with
1488 logic name $logic_name and with score above $score. If
1489 $logic_name is not defined features of all logic names are
1490 retrieved. If $score is not defined features of all scores are
1491 retrieved. Strand of the Slice is not considered.
1492 Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
1493 Exceptions : warning
if slice does not have attached adaptor
1499 sub get_all_DnaAlignFeatures {
1500 my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
1501 return $self->_get_AlignFeatures($logic_name, $score, $dbtype, $hcoverage,
'DnaAlignFeature');
1504 =head2 get_all_ProteinAlignFeatures
1506 Arg [1] : (optional)
string $logic_name
1507 The name of the analysis performed on the protein align features
1509 Arg [2] : (optional)
float $score
1510 The mimimum score of the features to retrieve
1511 Arg [3] : (optional)
string $dbtype
1512 The name of an attached database to retrieve features from
1514 Arg [4] : (optional)
float hcoverage
1515 The minimum hcoverage od the featurs to retrieve
1516 Example : @dna_pep_align_feats = @{$slice->get_all_ProteinAlignFeatures};
1517 Description: Retrieves the DnaPepAlignFeatures which overlap
this slice with
1518 logic name $logic_name and with score above $score. If
1519 $logic_name is not defined features of all logic names are
1520 retrieved. If $score is not defined features of all scores are
1522 Returntype : listref of Bio::EnsEMBL::DnaPepAlignFeatures
1523 Exceptions : warning
if slice does not have attached adaptor
1529 sub get_all_ProteinAlignFeatures {
1530 my ($self, $logic_name, $score, $dbtype, $hcoverage) = @_;
1531 return $self->_get_AlignFeatures($logic_name, $score, $dbtype, $hcoverage,
'ProteinAlignFeature');
1534 =head2 _get_AlignFeatures
1536 Arg [1] : (optional)
string $logic_name
1537 The name of the analysis performed on the protein align features
1539 Arg [2] : (optional)
float $score
1540 The mimimum score of the features to retrieve
1541 Arg [3] : (optional)
string $dbtype
1542 The name of an attached database to retrieve features from
1544 Arg [4] : (optional)
float hcoverage
1545 The minimum hcoverage od the featurs to retrieve
1546 Arg [5] :
string $align_type
1547 The type of adaptor to retrieve alignments from. Must be an BaseAlignFeature derived
1549 Description: Generic method which deals with the retrieval of either AlignFeature adaptor and allows
1550 you to
switch the adaptor values are retrieved from.
1554 sub _get_AlignFeatures {
1555 my ($self, $logic_name, $score, $dbtype, $hcoverage, $align_type) = @_;
1556 throw "No align_type given" unless $align_type;
1557 if(my $adaptor = $self->_get_CoreAdaptor($align_type, $dbtype)) {
1558 if(defined($score) and defined ($hcoverage)){
1559 warning
"Cannot specify score and hcoverage when querying for $align_type. Using score only";
1561 if(defined($score)){
1562 return $adaptor->fetch_all_by_Slice_and_score($self,$score, $logic_name);
1564 return $adaptor->fetch_all_by_Slice_and_hcoverage($self, $hcoverage, $logic_name);
1569 =head2 get_all_SimilarityFeatures
1571 Arg [1] : (optional)
string $logic_name
1572 the name of the analysis performed on the features to retrieve
1573 Arg [2] : (optional)
float $score
1574 the lower bound of the score of the features to be retrieved
1575 Example : @feats = @{$slice->get_all_SimilarityFeatures};
1576 Description: Retrieves all dna_align_features and protein_align_features
1577 with analysis named $logic_name and with score above $score.
1578 It is probably faster to use get_all_ProteinAlignFeatures or
1579 get_all_DnaAlignFeatures
if a sepcific feature type is desired.
1580 If $logic_name is not defined features of all logic names are
1581 retrieved. If $score is not defined features of all scores are
1583 Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures
1584 Exceptions : warning
if slice does not have attached adaptor
1590 sub get_all_SimilarityFeatures {
1591 my ($self, $logic_name, $score) = @_;
1595 push @out, @{$self->get_all_ProteinAlignFeatures($logic_name, $score) };
1596 push @out, @{$self->get_all_DnaAlignFeatures($logic_name, $score) };
1601 =head2 get_all_SimpleFeatures
1603 Arg [1] : (optional)
string $logic_name
1604 The name of the analysis performed on the simple features
1606 Arg [2] : (optional)
float $score
1607 The mimimum score of the features to retrieve
1608 Example : @simple_feats = @{$slice->get_all_SimpleFeatures};
1609 Description: Retrieves the SimpleFeatures which overlap
this slice with
1610 logic name $logic_name and with score above $score. If
1611 $logic_name is not defined features of all logic names are
1612 retrieved. If $score is not defined features of all scores are
1614 Returntype : listref of Bio::EnsEMBL::SimpleFeatures
1615 Exceptions : warning
if slice does not have attached adaptor
1621 sub get_all_SimpleFeatures {
1622 my ($self, $logic_name, $score, $dbtype) = @_;
1623 if(my $adaptor = $self->_get_CoreAdaptor(
'SimpleFeature', $dbtype)) {
1624 return $adaptor->fetch_all_by_Slice_and_score($self, $score, $logic_name);
1629 =head2 get_all_RepeatFeatures
1631 Arg [1] : (optional)
string $logic_name
1632 The name of the analysis performed on the repeat features
1634 Arg [2] : (optional)
string/array $repeat_type
1635 Limits features returned to those of the specified
1636 repeat_type. Can specify a single value or an array reference
1637 to limit by more than one
1638 Arg [3] : (optional)
string $db
1639 Key
for database e.g. core/vega/cdna/....
1640 Example : @repeat_feats = @{$slice->get_all_RepeatFeatures(undef,
'Type II Transposons')};
1641 Description: Retrieves the RepeatFeatures which overlap with
1642 logic name $logic_name and with score above $score. If
1643 $logic_name is not defined features of all logic names are
1645 Returntype : listref of Bio::EnsEMBL::RepeatFeatures
1646 Exceptions : warning
if slice does not have attached adaptor
1652 sub get_all_RepeatFeatures {
1653 my ($self, $logic_name, $repeat_type, $dbtype) = @_;
1654 if(my $adaptor = $self->_get_CoreAdaptor(
'RepeatFeature', $dbtype)) {
1655 return $adaptor->fetch_all_by_Slice($self, $logic_name, $repeat_type);
1660 =head2 get_all_LD_values
1662 Arg [1] : (optional) Bio::EnsEMBL::Variation::Population $population
1663 Description : returns all LD values on
this slice. This
function will only work correctly
if the variation
1664 database has been attached to the core database. If the argument is passed, will
return the LD information
1666 ReturnType : Bio::EnsEMBL::Variation::LDFeatureContainer
1668 Caller : contigview, snpview
1673 sub get_all_LD_values {
1675 my $population = shift;
1677 my $ld_adaptor = $self->_get_VariationAdaptor(
'LDFeatureContainer');
1679 return $ld_adaptor->fetch_by_Slice($self,$population);
1684 =head2 _get_VariationFeatureAdaptor
1686 Shortcut method here because VariationFeature is an often requested
1691 sub _get_VariationFeatureAdaptor {
1692 my ($self, $dbtype) = @_;
1693 return $self->_get_VariationAdaptor(
'VariationFeature', $dbtype);
1696 =head2 _get_StructuralVariationFeatureAdaptor
1698 Shortcut method here because StructuralVariationFeature is an often requested
1703 sub _get_StructuralVariationFeatureAdaptor {
1704 my ($self, $dbtype) = @_;
1705 return $self->_get_VariationAdaptor(
'StructuralVariationFeature', $dbtype);
1708 =head2 _get_VariationAdaptor
1710 Arg [1] : String object_type to retrieve an adaptor
for
1711 Arg [2] : String dbtype to search
for the given adaptor in. Defaults to variation
1712 Description : Searches
for the specified adaptor in the Registry and returns it. Otherwise
1713 it will
return nothing
if the adaptor was not found
1719 sub _get_VariationAdaptor {
1720 my ($self, $object_type, $dbtype) = @_;
1721 # very important to do this defaulting since we *have* to assume the variation
1722 # database is called 'variation'. Using the current group will not work because
1723 # that will be something like 'core' (most likely), 'ensembl' or 'vega'.
1724 $dbtype ||=
'variation';
1726 # Also we do not care about Registry->get_db() for variation DBs
1727 my $do_not_check_db = 1;
1729 return $self->_get_Adaptor($object_type, $dbtype, $do_not_check_db);
1732 =head2 _get_CoreAdaptor
1734 Arg [1] : String object_type to retrieve an adaptor
for
1735 Arg [2] : String dbtype to search
for the given adaptor in. Defaults to core
1736 Description : Searches
for the specified adaptor in the Registry and returns it. Otherwise
1737 it will
return nothing
if the adaptor was not found
1743 sub _get_CoreAdaptor {
1744 my ($self, $object_type, $dbtype) = @_;
1745 #Simple pass through
1746 return $self->_get_Adaptor($object_type, $dbtype);
1751 Arg [1] : String object_type to retrieve an adaptor
for
1752 Arg [2] : String dbtype to search
for the given adaptor in
1753 Arg [3] : Boolean Turn off the checking of Registry->get_db()
for your
1755 Description : Searches
for the specified adaptor in the Registry and returns it. Otherwise
1756 it will
return nothing
if the adaptor was not found. We consult the
1758 fall back to the normal methods of finding an adaptor.
1760 This method will warn when adaptors are missing but will never through an
1761 exception. It is up to the calling code to decide how to handle the unavailablity
1763 ReturnType :
Bio::
EnsEMBL::DBSQL::BaseAdaptor derrived instance. Otherwise it returns nothing
1769 my ($self, $object_type, $dbtype, $do_not_check_db) = @_;
1772 warning(
'Object type is a required parameter');
1776 my $adaptor = $self->adaptor();
1779 warning(
"Cannot get a ${object_type} adaptor without a SliceAdaptor attached to this instance of ".ref($self));
1783 my $local_db = $adaptor->db();
1784 my $species = $local_db->species();
1786 #First we query for the DBAdaptor using get_db(). This is a deprecated method
1787 #call but "special" adaptors can be registered via this method. We must
1788 #consult here 1st to find the possible special adaptor
1789 if(!$do_not_check_db && $dbtype) {
1790 my $db = $registry->get_db($local_db, $dbtype);
1792 # If we got a return then use this DBAdaptor's species name, group and the given object type.
1793 # Special adaptors can have different species names
1794 $adaptor = $registry->get_adaptor($db->species(), $db->group(), $object_type);
1797 #Otherwise just use the current species, dbtype and object type
1798 $adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
1801 # Otherwise our query group is the one attached to the current adaptor
1803 #If not set use the group attached to the local adaptor
1804 $dbtype ||= $local_db->group();
1805 $adaptor = $registry->get_adaptor($species, $dbtype, $object_type);
1807 return $adaptor
if $adaptor;
1809 warning(
"No adaptor could be found for the species ${species}, database type ${dbtype} and object type ${object_type}");
1813 =head2 get_all_VariationFeatures
1815 Args [1] : (optional) ArrayRef $so_terms
1816 SequenceOntology terms to limit the fetch to
1817 Args [2] : (optional)
boolean $without_children
1818 Do not query
using the children of the given SO terms
1819 i.e. query
using the given terms directly
1820 Args [3] : (optional) ArrayRef $included_so
1821 ArrayRef of SequenceOntology which should be queried
for
1822 without children. This argument allows you to combine SO terms with children
1823 from argument 1 with extra non-child SO terms. e.g. you wish to query
for
1824 all protein_altering_variant (specified in argument 1) variations which
1825 would be defined by child SO terms but also wanted stop_retained_variant linked variations
1826 defined by
this argument
1827 Args [4] : (optional)
string $dbtype
1828 The dbtype of variation to obtain (i.e. can be different from the
"variation" type).
1829 This assumes that the extra db has been added to the DBAdaptor under
this name (
using the
1830 DBConnection::add_db_adaptor method).
1831 Description : Returns all germline variation features on
this slice. This
function will
1832 only work correctly
if the variation database has been attached to the core
1834 If $so_terms is specified, only variation features with a consequence type
1835 that matches or is an ontological child of any of the supplied terms will
1837 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1839 Caller : contigview, snpview
1844 sub get_all_VariationFeatures{
1845 my ($self, $so_terms, $without_children, $included_so, $dbtype) = @_;
1846 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1847 return $vf_adaptor->fetch_all_by_Slice_SO_terms($self, $so_terms, $without_children, $included_so);
1852 =head2 get_all_somatic_VariationFeatures
1853 Args [1] : (optional) ArrayRef $so_terms
1854 SequenceOntology terms to limit the fetch to
1855 Args [2] : (optional)
boolean $without_children
1856 Do not query
using the children of the given SO terms
1857 i.e. query
using the given terms directly
1858 Args [3] : (optional) ArrayRef $included_so
1859 ArrayRef of SequenceOntology which should be queried
for
1860 without children. This argument allows you to combine SO terms with children
1861 from argument 1 with extra non-child SO terms. e.g. you wish to query
for
1862 all protein_altering_variant (specified in argument 1) variations which
1863 would be defined by child SO terms but also wanted stop_retained_variant linked variations
1864 defined by
this argument
1865 Args [4] : (optional)
string $dbtype
1866 The dbtype of variation to obtain (i.e. can be different from the
"variation" type).
1867 This assumes that the extra db has been added to the DBAdaptor under
this name (
using the
1868 DBConnection::add_db_adaptor method).
1869 Description : Returns all somatic variation features on
this slice. This
function will only
1870 work correctly
if the variation database has been attached to the core database.
1871 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1877 sub get_all_somatic_VariationFeatures {
1878 my ($self, $so_terms, $without_children, $included_so, $dbtype) = @_;
1879 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1880 return $vf_adaptor->fetch_all_somatic_by_Slice_SO_terms($self, $so_terms, $without_children, $included_so);
1885 =head2 get_all_somatic_VariationFeatures_by_source
1886 Arg [1] :
string $source [optional]
1887 The name of the source to query
for
1888 Arg [2] :
string $dbtype [optional]
1889 The dbtype of variation to obtain (i.e. can be different from the
"variation" type).
1890 This assumes that the extra db has been added to the DBAdaptor under
this name (
using the
1891 DBConnection::add_db_adaptor method).
1892 Description : Returns all somatic variation features, from a defined source name (e.g.
'COSMIC'),
1893 on
this slice. This
function will only work correctly
if the variation database
1894 has been attached to the core database.
1895 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1901 sub get_all_somatic_VariationFeatures_by_source {
1902 my ($self, $source, $dbtype) = @_;
1903 my $constraint = (defined($source)) ?
" s.name='$source' " : undef;
1904 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1905 return $vf_adaptor->fetch_all_somatic_by_Slice_constraint($self, $constraint);
1910 =head2 get_all_somatic_VariationFeatures_with_phenotype
1911 Arg [1] : $variation_feature_source [optional]
1912 Arg [2] : $phenotype_source [optional]
1913 Arg [3] : $phenotype_name [optional]
1914 Arg [4] :
string $dbtype [optional]
1915 The dbtype of variation to obtain (i.e. can be different from the
"variation" type).
1916 This assumes that the extra db has been added to the DBAdaptor under
this name (
using the
1917 DBConnection::add_db_adaptor method).
1918 Description : returns all somatic variation features on
this slice associated with a phenotype.
1919 (see get_all_VariationFeatures_with_phenotype
for further documentation)
1920 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
1926 sub get_all_somatic_VariationFeatures_with_phenotype {
1927 my ($self, $source, $p_source, $phenotype, $dbtype) = @_;
1928 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1929 return $vf_adaptor->fetch_all_somatic_with_phenotype_by_Slice($self, $source, $p_source, $phenotype);
1935 =head2 get_all_VariationFeatures_by_Population
1936 Arg [1] : Bio::EnsEMBL::Variation::Population
1937 Arg [2] : $minimum_frequency [optional]
1938 Arg [3] :
string $dbtype [optional]
1939 The dbtype of variation to obtain (i.e. can be different from the
"variation" type).
1940 This assumes that the extra db has been added to the DBAdaptor under
this name (
using the
1941 DBConnection::add_db_adaptor method).
1942 Example : $pop = $pop_adaptor->fetch_by_dbID(659);
1943 @vfs = @{$slice->get_all_VariationFeatures_by_Population($pop,$slice)};
1944 Description : Retrieves all variation features in a slice which are stored
for
1945 a specified population. If $minimum_frequency is supplied, only
1946 variations with a minor allele frequency (MAF) greater than
1947 $minimum_frequency will be returned.
1948 Returntype : listref of Bio::EnsEMBL::Variation::VariationFeature
1949 Exceptions :
throw on incorrect argument
1955 sub get_all_VariationFeatures_by_Population {
1956 my ($self, $minimum_frequency, $dbtype) = @_;
1957 if (my $vf_adaptor = $self->_get_VariationFeatureAdaptor($dbtype)) {
1958 return $vf_adaptor->fetch_all_by_Slice_Population($self, $minimum_frequency);
1963 =head2 get_all_Genes
1965 Arg [1] : (optional)
string $logic_name
1966 The name of the analysis used to generate the genes to retrieve
1967 Arg [2] : (optional)
string $dbtype
1968 The dbtype of genes to obtain. This assumes that the db has
1969 been added to the DBAdaptor under
this name (
using the
1970 DBConnection::add_db_adaptor method).
1971 Arg [3] : (optional)
boolean $load_transcripts
1972 If set to
true, transcripts will be loaded immediately rather
1973 than being lazy-loaded on request. This will result in a
1974 significant speed up
if the Transcripts and Exons are going to
1975 be used (but a slow down
if they are not).
1976 Arg [4] : (optional)
string $source
1977 The source of the genes to retrieve.
1978 Arg [5] : (optional)
string $biotype
1979 The biotype of the genes to retrieve.
1980 Example : @genes = @{$slice->get_all_Genes};
1981 Description: Retrieves all genes that overlap
this slice, including those on
1983 Returntype : listref of Bio::EnsEMBL::Genes
1991 my ($self, $logic_name, $dbtype, $load_transcripts, $source, $biotype) = @_;
1992 if(my $adaptor = $self->_get_CoreAdaptor(
'Gene', $dbtype)) {
1993 return $adaptor->fetch_all_by_Slice( $self, $logic_name, $load_transcripts, $source, $biotype);
1998 =head2 get_all_Genes_by_type
2000 Arg [1] :
string $type
2001 The biotype of genes wanted.
2002 Arg [2] : (optional)
string $logic_name
2003 Arg [3] : (optional)
boolean $load_transcripts
2004 If set to
true, transcripts will be loaded immediately rather
2005 than being lazy-loaded on request. This will result in a
2006 significant speed up
if the Transcripts and Exons are going to
2007 be used (but a slow down
if they are not).
2008 Example : @genes = @{$slice->get_all_Genes_by_type(
'protein_coding',
2010 Description: Retrieves genes that overlap
this slice of biotype $type.
2011 This is primarily used by the genebuilding code when several
2012 biotypes of genes are used.
2014 The logic name is the analysis of the genes that are retrieved.
2015 If not provided all genes will be retrieved instead. Both
2016 positive and negative strand Genes will be returned.
2018 Returntype : listref of Bio::EnsEMBL::Genes
2020 Caller : genebuilder, general
2025 sub get_all_Genes_by_type {
2026 my ($self, $type, $logic_name, $load_transcripts) = @_;
2027 return $self->get_all_Genes($logic_name, undef, $load_transcripts, undef, $type);
2031 =head2 get_all_Genes_by_source
2033 Arg [1] :
string source
2034 Arg [2] : (optional)
boolean $load_transcripts
2035 If set to
true, transcripts will be loaded immediately rather
2036 than being lazy-loaded on request. This will result in a
2037 significant speed up
if the Transcripts and Exons are going to
2038 be used (but a slow down
if they are not).
2039 Example : @genes = @{$slice->get_all_Genes_by_source(
'ensembl')};
2040 Description: Retrieves genes that overlap
this slice of source $source.
2041 Strand of the Slice does not affect the result.
2042 Returntype : listref of Bio::EnsEMBL::Genes
2049 sub get_all_Genes_by_source {
2050 my ($self, $source, $load_transcripts) = @_;
2051 return $self->get_all_Genes(undef, undef, $load_transcripts, $source);
2054 =head2 get_all_Transcripts
2056 Arg [1] : (optional)
boolean $load_exons
2057 If set to
true exons will not be lazy-loaded but will instead
2058 be loaded right away. This is faster
if the exons are
2059 actually going to be used right away.
2060 Arg [2] : (optional)
string $logic_name
2061 the logic name of the type of features to obtain
2062 Arg [3] : (optional)
string $db_type
2063 Example : @transcripts = @{$slice->get_all_Transcripts)_};
2064 Description: Gets all transcripts which overlap
this slice. If you want to
2065 specify a particular analysis or type, then you are better off
2066 using get_all_Genes or get_all_Genes_by_type and iterating
2067 through the transcripts of each gene. Strand of the Slice is
2069 Returntype : reference to a list of Bio::EnsEMBL::Transcripts
2076 sub get_all_Transcripts {
2077 my ($self, $load_exons, $logic_name, $dbtype, $source, $biotype) = @_;
2078 if(my $adaptor = $self->_get_CoreAdaptor(
'Transcript', $dbtype)) {
2079 return $adaptor->fetch_all_by_Slice($self, $load_exons, $logic_name, undef, $source, $biotype);
2084 =head2 get_all_Transcripts_by_type
2086 Arg [1] :
string $type
2087 The biotype of transcripts wanted.
2088 Arg [2] : (optional)
string $logic_name
2089 Arg [3] : (optional)
boolean $load_exons
2090 If set to
true exons will not be lazy-loaded but will instead
2091 be loaded right away. This is faster
if the exons are
2092 actually going to be used right away.
2094 Example : @transcripts = @{$slice->get_all_Transcripts_by_type(
'protein_coding',
2096 Description: Retrieves transcripts that overlap
this slice of biotype $type.
2097 This is primarily used by the genebuilding code when several
2098 biotypes of transcripts are used.
2100 The logic name is the analysis of the transcripts that are retrieved.
2101 If not provided all transcripts will be retrieved instead. Both
2102 positive and negative strand transcripts will be returned.
2104 Returntype : listref of Bio::EnsEMBL::Transcripts
2106 Caller : genebuilder, general
2111 sub get_all_Transcripts_by_type {
2112 my ($self, $biotype, $logic_name, $load_exons) = @_;
2113 return $self->get_all_Transcripts($load_exons, $logic_name, undef, undef, $biotype);
2117 =head2 get_all_Transcripts_by_source
2119 Arg [1] :
string source
2120 Arg [2] : (optional)
boolean $load_exons
2121 If set to
true exons will not be lazy-loaded but will instead
2122 be loaded right away. This is faster
if the exons are
2123 actually going to be used right away.
2124 Example : @transcripts = @{$slice->get_all_Transcripts_by_source(
'ensembl')};
2125 Description: Retrieves transcripts that overlap
this slice of source $source.
2126 Strand of the Slice does not affect the result.
2127 Returntype : listref of Bio::EnsEMBL::Transcripts
2134 sub get_all_Transcripts_by_source {
2135 my ($self, $source, $load_exons) = @_;
2136 return $self->get_all_Transcripts($load_exons, undef, undef, $source);
2141 =head2 get_all_Exons
2144 Example : @exons = @{$slice->get_all_Exons};
2145 Description: Gets all exons which overlap
this slice. Note that these exons
2146 will not be associated with any transcripts, so
this may not
2148 Returntype : reference to a list of Bio::EnsEMBL::Exons
2157 if(!$self->adaptor()) {
2158 warning(
'Cannot get Exons without attached adaptor');
2161 return $self->adaptor->db->get_ExonAdaptor->fetch_all_by_Slice($self);
2164 =head2 get_all_KaryotypeBands
2167 Example : @kary_bands = @{$slice->get_all_KaryotypeBands};
2168 Description: Retrieves the karyotype bands which
this slice overlaps.
2169 Returntype : listref oif Bio::EnsEMBL::KaryotypeBands
2171 Caller : general, contigview
2176 sub get_all_KaryotypeBands {
2178 if (my $adaptor = $self->_get_CoreAdaptor(
'KaryotypeBand')) {
2179 return $adaptor->fetch_all_by_Slice($self);
2184 =head2 get_repeatmasked_seq
2186 Arg [1] : listref of strings $logic_names (optional)
2187 Arg [2] :
int $soft_masking_enable (optional)
2188 Arg [3] : hash reference $not_default_masking_cases (optional,
default is {})
2189 The values are 0 or 1
for hard and soft masking respectively
2190 The keys of the hash should be of 2 forms
2191 "repeat_class_" . $repeat_consensus->repeat_class,
2192 e.g.
"repeat_class_SINE/MIR"
2193 "repeat_name_" . $repeat_consensus->name
2194 e.g.
"repeat_name_MIR"
2195 depending on which base you want to apply the not
default
2196 masking either the repeat_class or repeat_name. Both can be
2197 specified in the same hash at the same time, but in that
case,
2198 repeat_name setting has priority over repeat_class. For example,
2199 you may have hard masking as
default, and you may want soft
2200 masking of all repeat_class SINE/MIR, but repeat_name AluSp
2201 (which are also from repeat_class SINE/MIR).
2202 Your hash will be something like {
"repeat_class_SINE/MIR" => 1,
2203 "repeat_name_AluSp" => 0}
2204 Example : $rm_slice = $slice->get_repeatmasked_seq();
2205 $softrm_slice = $slice->get_repeatmasked_seq([
'RepeatMask'],1);
2207 masked sequence instead of the regular sequence.
2208 Sequence returned by
this new slice will have repeat regions
2209 hardmasked by
default (sequence replaced by N) or
2210 or soft-masked when arg[2] = 1 (sequence in lowercase)
2211 Will only work with database connection to get repeat features.
2219 sub get_repeatmasked_seq {
2220 my ($self,$logic_names,$soft_mask,$not_default_masking_cases) = @_;
2223 (-START => $self->{
'start'},
2224 -END => $self->{
'end'},
2225 -STRAND => $self->{
'strand'},
2226 -ADAPTOR => $self->adaptor(),
2227 -SEQ => $self->{
'seq'},
2228 -SEQ_REGION_NAME => $self->{
'seq_region_name'},
2229 -SEQ_REGION_LENGTH => $self->{
'seq_region_length'},
2230 -COORD_SYSTEM => $self->{
'coord_system'},
2231 -REPEAT_MASK => $logic_names,
2232 -SOFT_MASK => $soft_mask,
2233 -NOT_DEFAULT_MASKING_CASES => $not_default_masking_cases);
2238 =head2 _mask_features
2240 Arg [1] : reference to a
string $dnaref
2241 Arg [2] : array_ref $repeats
2243 give the list of coordinates to replace with N or with
2245 Arg [3] :
int $soft_masking_enable (optional)
2246 Arg [4] : hash reference $not_default_masking_cases (optional,
default is {})
2247 The values are 0 or 1
for hard and soft masking respectively
2248 The keys of the hash should be of 2 forms
2249 "repeat_class_" . $repeat_consensus->repeat_class,
2250 e.g.
"repeat_class_SINE/MIR"
2251 "repeat_name_" . $repeat_consensus->name
2252 e.g.
"repeat_name_MIR"
2253 depending on which base you want to apply the not
default masking either
2254 the repeat_class or repeat_name. Both can be specified in the same hash
2255 at the same time, but in that
case, repeat_name setting has priority over
2256 repeat_class. For example, you may have hard masking as
default, and
2257 you may want soft masking of all repeat_class SINE/MIR,
2258 but repeat_name AluSp (which are also from repeat_class SINE/MIR).
2259 Your hash will be something like {
"repeat_class_SINE/MIR" => 1,
2260 "repeat_name_AluSp" => 0}
2262 Description: replaces
string positions described in the RepeatFeatures
2263 with Ns (
default setting), or with the lower
case equivalent
2264 (soft masking). The reference to a dna
string which is passed
2265 is changed in place.
2273 sub _mask_features {
2274 my ($self,$dnaref,$repeats,$soft_mask,$not_default_masking_cases) = @_;
2276 $soft_mask = 0 unless (defined $soft_mask);
2277 $not_default_masking_cases = {} unless (defined $not_default_masking_cases);
2279 # explicit CORE::length call, to avoid any confusion with the Slice
2281 my $dnalen = CORE::length($$dnaref);
2283 REP:
foreach my $old_f (@{$repeats}) {
2284 my $f = $old_f->transfer( $self );
2285 my $start = $f->start;
2287 my $length = ($end - $start) + 1;
2289 # check if we get repeat completely outside of expected slice range
2290 if ($end < 1 || $start > $dnalen) {
2291 # warning("Unexpected: Repeat completely outside slice coordinates.");
2295 # repeat partly outside slice range, so correct
2296 # the repeat start and length to the slice size if needed
2299 $length = ($end - $start) + 1;
2302 # repeat partly outside slice range, so correct
2303 # the repeat end and length to the slice size if needed
2304 if ($end > $dnalen) {
2306 $length = ($end - $start) + 1;
2312 # if we decide to define masking on the base of the repeat_type, we'll need
2313 # to add the following, and the other commented line few lines below.
2317 if ($f->isa(
'Bio::EnsEMBL::RepeatFeature')) {
2318 $rc_class =
"repeat_class_" . $f->repeat_consensus->repeat_class;
2319 $rc_name =
"repeat_name_" . $f->repeat_consensus->name;
2323 $masking_type = $not_default_masking_cases->{$rc_class}
if (defined $not_default_masking_cases->{$rc_class});
2324 $masking_type = $not_default_masking_cases->{$rc_name}
if (defined $not_default_masking_cases->{$rc_name});
2326 $masking_type = $soft_mask unless (defined $masking_type);
2328 if ($masking_type) {
2329 $padstr = lc substr ($$dnaref,$start,$length);
2331 $padstr =
'N' x $length;
2333 substr ($$dnaref,$start,$length) = $padstr;
2338 =head2 get_all_SearchFeatures
2340 Arg [1] : scalar $ticket_ids
2341 Example : $slice->get_all_SearchFeatures(
'BLA_KpUwwWi5gY');
2342 Description: Retrieves all search features
for stored blast
2343 results
for the ticket that overlap
this slice
2344 Returntype : listref of Bio::EnsEMBL::SeqFeatures
2346 Caller : general (webby!)
2351 sub get_all_SearchFeatures {
2356 throw(
"ticket argument is required");
2359 if(!$self->adaptor()) {
2360 warning(
"Cannot get SearchFeatures without an attached adaptor");
2364 my $sfa = $self->adaptor()->db()->get_db_adaptor(
'blast');
2366 my $offset = $self->start-1;
2368 my $features = $sfa ? $sfa->get_all_SearchFeatures($ticket, $self->seq_region_name, $self->start, $self->end) : [];
2370 foreach( @$features ) {
2371 $_->start( $_->start - $offset );
2372 $_->end( $_->end - $offset );
2378 =head2 get_all_AssemblyExceptionFeatures
2380 Example : $slice->get_all_AssemblyExceptionFeatures();
2381 Description: Retrieves all misc features which overlap
this slice. If
2382 a set code is provided only features which are members of
2383 the requested set are returned.
2384 Returntype : listref of Bio::EnsEMBL::AssemblyExceptionFeatures
2391 sub get_all_AssemblyExceptionFeatures {
2393 if(my $adaptor = $self->_get_CoreAdaptor(
'AssemblyExceptionFeature')) {
2394 return $adaptor->fetch_all_by_Slice($self);
2401 =head2 get_all_MiscFeatures
2403 Arg [1] :
string $set (optional)
2404 Arg [2] :
string $database (optional)
2405 Example : $slice->get_all_MiscFeatures(
'cloneset');
2406 Description: Retrieves all misc features which overlap
this slice. If
2407 a set code is provided only features which are members of
2408 the requested set are returned.
2409 Returntype : listref of Bio::EnsEMBL::MiscFeatures
2416 sub get_all_MiscFeatures {
2417 my ($self, $misc_set, $dbtype) = @_;
2418 if(my $adaptor = $self->_get_CoreAdaptor(
'MiscFeature', $dbtype)) {
2420 return $adaptor->fetch_all_by_Slice_and_set_code($self,$misc_set);
2422 return $adaptor->fetch_all_by_Slice($self);
2427 =head2 get_all_MarkerFeatures
2429 Arg [1] : (optional)
string logic_name
2430 The logic name of the marker features to retrieve
2431 Arg [2] : (optional)
int $priority
2432 Lower (exclusive) priority bound of the markers to retrieve
2433 Arg [3] : (optional)
int $map_weight
2434 Upper (exclusive) priority bound of the markers to retrieve
2435 Example : my @markers = @{$slice->get_all_MarkerFeatures(undef,50, 2)};
2436 Description: Retrieves all markers which lie on
this slice fulfilling the
2437 specified map_weight and priority parameters (
if supplied).
2438 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
2440 Caller : contigview, general
2445 sub get_all_MarkerFeatures {
2446 my ($self, $logic_name, $priority, $map_weight) = @_;
2447 if(my $adaptor = $self->_get_CoreAdaptor(
'MarkerFeature')) {
2448 return $adaptor->fetch_all_by_Slice_and_priority($self, $priority, $map_weight, $logic_name);
2454 =head2 get_MarkerFeatures_by_Name
2456 Arg [1] :
string marker Name
2457 The name (synonym) of the marker feature(s) to retrieve
2458 Example : my @markers = @{$slice->get_MarkerFeatures_by_Name(
'z1705')};
2459 Description: Retrieves all markers with
this ID
2460 Returntype : reference to a list of Bio::EnsEMBL::MarkerFeatures
2462 Caller : contigview, general
2467 sub get_MarkerFeatures_by_Name {
2468 my ($self, $name) = @_;
2469 if(my $adaptor = $self->_get_CoreAdaptor(
'MarkerFeature')) {
2470 return $adaptor->fetch_all_by_Slice_and_MarkerName($self, $name);
2476 =head2 get_all_compara_DnaAlignFeatures
2478 Arg [1] :
string $qy_species
2479 The name of the species to retrieve similarity features from
2480 Arg [2] :
string $qy_assembly
2481 The name of the assembly to retrieve similarity features from
2482 Arg [3] :
string $type
2483 The type of the alignment to retrieve similarity features from
2484 Arg [4] : <optional> compara dbadptor to use.
2485 Example : $fs = $slc->get_all_compara_DnaAlignFeatures(
'Mus musculus',
2488 Description: Retrieves a list of DNA-DNA Alignments to the species specified
2489 by the $qy_species argument.
2490 The compara database must be attached to the core database
2491 for this call to work correctly. As well the compara database
2492 must have the core dbadaptors
for both
this species, and the
2493 query species added to
function correctly.
2494 Returntype : reference to a list of Bio::EnsEMBL::DnaDnaAlignFeatures
2495 Exceptions : warning
if compara database is not available
2501 sub get_all_compara_DnaAlignFeatures {
2502 my ($self, $qy_species, $qy_assembly, $alignment_type, $compara_db) = @_;
2504 if(!$self->adaptor()) {
2505 warning(
"Cannot retrieve DnaAlignFeatures without attached adaptor");
2509 unless($qy_species && $alignment_type # && $qy_assembly
2511 throw(
"Query species and assembly and alignmemt type arguments are required");
2514 if(!defined($compara_db)){
2517 unless($compara_db) {
2518 warning(
"Compara database must be attached to core database or passed ".
2519 "as an argument to " .
2520 "retrieve compara information");
2524 my $dafa = $compara_db->get_DnaAlignFeatureAdaptor;
2525 return $dafa->fetch_all_by_Slice($self, $qy_species, $qy_assembly, $alignment_type);
2528 =head2 get_all_compara_Syntenies
2530 Arg [1] :
string $query_species e.g.
"Mus_musculus" or
"Mus musculus"
2531 Arg [2] :
string $method_link_type,
default is
"SYNTENY"
2532 Arg [3] : <optional> compara dbadaptor to use.
2533 Description: gets all the compara syntenyies
for a specfic species
2534 Returns : arrayref of Bio::EnsEMBL::Compara::SyntenyRegion
2539 sub get_all_compara_Syntenies {
2540 my ($self, $qy_species, $method_link_type, $compara_db) = @_;
2542 if(!$self->adaptor()) {
2543 warning(
"Cannot retrieve features without attached adaptor");
2547 unless($qy_species) {
2548 throw(
"Query species and assembly arguments are required");
2551 unless (defined $method_link_type) {
2552 $method_link_type =
"SYNTENY";
2555 if(!defined($compara_db)){
2558 unless($compara_db) {
2559 warning(
"Compara database must be attached to core database or passed ".
2560 "as an argument to " .
2561 "retrieve compara information");
2564 my $gdba = $compara_db->get_GenomeDBAdaptor();
2565 my $mlssa = $compara_db->get_MethodLinkSpeciesSetAdaptor();
2566 my $sra = $compara_db->get_SyntenyRegionAdaptor();
2568 my $this_gdb = $gdba->fetch_by_core_DBAdaptor($self->adaptor()->db());
2569 my $query_gdb = $gdba->fetch_by_registry_name($qy_species);
2571 if($this_gdb eq $query_gdb) {
2572 $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb]);
2574 $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($method_link_type, [$this_gdb, $query_gdb]);
2577 return $sra->fetch_all_by_MethodLinkSpeciesSet_Slice($mlss, $self);
2580 =head2 get_all_Haplotypes
2582 Arg [1] : (optional)
boolean $lite_flag
2583 if true lightweight haplotype objects are used
2584 Example : @haplotypes = $slice->get_all_Haplotypes;
2585 Description: Retrieves all of the haplotypes on
this slice. Only works
2586 if the haplotype adaptor has been attached to the core adaptor
2587 via $dba->add_db_adaptor(
'haplotype', $hdba);
2588 Returntype : listref of Bio::EnsEMBL::External::Haplotype::Haplotypes
2589 Exceptions : warning is Haplotype database is not available
2590 Caller : contigview, general
2595 sub get_all_Haplotypes {
2596 my($self, $lite_flag) = @_;
2598 if(!$self->adaptor()) {
2599 warning(
"Cannot retrieve features without attached adaptor");
2603 my $haplo_db = $self->adaptor->db->get_db_adaptor(
'haplotype');
2606 warning(
"Haplotype database must be attached to core database to " .
2607 "retrieve haplotype information" );
2611 my $haplo_adaptor = $haplo_db->get_HaplotypeAdaptor;
2613 my $haplotypes = $haplo_adaptor->fetch_all_by_Slice($self, $lite_flag);
2619 sub get_all_DASFactories {
2621 return [ $self->adaptor()->db()->_each_DASFeatureFactory ];
2624 sub get_all_DASFeatures_dsn {
2625 my ($self, $source_type, $dsn) = @_;
2627 if(!$self->adaptor()) {
2628 warning(
"Cannot retrieve features without attached adaptor");
2631 my @X = grep { $_->adaptor->dsn eq $dsn } $self->adaptor()->db()->_each_DASFeatureFactory;
2633 return [ $X[0]->fetch_all_Features( $self, $source_type ) ];
2636 =head2 get_all_DAS_Features
2639 Example : $features = $slice->get_all_DASFeatures;
2640 Description: Retrieves a hash reference to a hash of DAS feature
2641 sets, keyed by the DNS, NOTE the values of
this hash
2642 are an anonymous array containing:
2643 (1) a pointer to an array of features;
2644 (2) a pointer to the DAS stylesheet
2645 Returntype : hashref of Bio::SeqFeatures
2651 sub get_all_DAS_Features{
2654 $self->{_das_features} ||= {}; # Cache
2655 $self->{_das_styles} ||= {}; # Cache
2656 $self->{_das_segments} ||= {}; # Cache
2662 foreach my $dasfact( @{$self->get_all_DASFactories} ){
2663 my $dsn = $dasfact->adaptor->dsn;
2664 my $name = $dasfact->adaptor->name;
2665 # my $type = $dasfact->adaptor->type;
2666 my $url = $dasfact->adaptor->url;
2668 my ($type) = $dasfact->adaptor->mapping;
2669 if (ref $type eq
'ARRAY') {
2670 $type = shift @$type;
2672 $type ||= $dasfact->adaptor->type;
2673 # Construct a cache key : SOURCE_URL/TYPE
2674 # Need the type to handle sources that serve multiple types of features
2676 my $key = join(
'/', $name, $type);
2677 if( $self->{_das_features}->{$key} ){ # Use cached
2678 $das_features{$name} = $self->{_das_features}->{$key};
2679 $das_styles{$name} = $self->{_das_styles}->{$key};
2680 $das_segments{$name} = $self->{_das_segments}->{$key};
2681 }
else { # Get fresh data
2682 my ($featref, $styleref, $segref) = $dasfact->fetch_all_Features( $slice, $type );
2683 $self->{_das_features}->{$key} = $featref;
2684 $self->{_das_styles}->{$key} = $styleref;
2685 $self->{_das_segments}->{$key} = $segref;
2686 $das_features{$name} = $featref;
2687 $das_styles{$name} = $styleref;
2688 $das_segments{$name} = $segref;
2692 return (\%das_features, \%das_styles, \%das_segments);
2695 sub get_all_DASFeatures{
2696 my ($self, $source_type) = @_;
2699 if(!$self->adaptor()) {
2700 warning(
"Cannot retrieve features without attached adaptor");
2704 my %genomic_features =
map { ( $_->adaptor->dsn => [ $_->fetch_all_Features($self, $source_type) ] ) } $self->adaptor()->db()->_each_DASFeatureFactory;
2705 return \%genomic_features;
2709 sub old_get_all_DASFeatures{
2710 my ($self,@args) = @_;
2712 if(!$self->adaptor()) {
2713 warning(
"Cannot retrieve features without attached adaptor");
2717 my %genomic_features =
2718 map { ( $_->adaptor->dsn => [ $_->fetch_all_by_Slice($self) ] ) }
2719 $self->adaptor()->db()->_each_DASFeatureFactory;
2720 return \%genomic_features;
2725 =head2 get_all_ExternalFeatures
2727 Arg [1] : (optional)
string $track_name
2728 If specified only features from ExternalFeatureAdaptors with
2729 the track name $track_name are retrieved.
2730 If not set, all features from every ExternalFeatureAdaptor are
2732 Example : @x_features = @{$slice->get_all_ExternalFeatures}
2733 Description: Retrieves features on
this slice from external feature adaptors
2734 Returntype : listref of Bio::SeqFeatureI implementing objects in slice
2742 sub get_all_ExternalFeatures {
2743 my ($self, $track_name) = @_;
2745 if(!$self->adaptor()) {
2746 warning(
"Cannot retrieve features without attached adaptor");
2752 my $xfa_hash = $self->adaptor->db->get_ExternalFeatureAdaptors;
2753 my @xf_adaptors = ();
2756 #use a specific adaptor
2757 if(exists $xfa_hash->{$track_name}) {
2758 push @xf_adaptors, $xfa_hash->{$track_name};
2761 #use all of the adaptors
2762 push @xf_adaptors, values %$xfa_hash;
2766 foreach my $xfa (@xf_adaptors) {
2767 push @$features, @{$xfa->fetch_all_by_Slice($self)};
2774 =head2 get_all_DitagFeatures
2776 Arg [1] : (optional)
string ditag type
2777 Arg [1] : (optional)
string logic_name
2778 Example : @dna_dna_align_feats = @{$slice->get_all_DitagFeatures};
2779 Description: Retrieves the DitagFeatures of a specific type which overlap
2780 this slice. If type is not defined, all features are retrieved.
2781 Strandedness of the Slice is ignored.
2782 Returntype : listref of Bio::EnsEMBL::DitagFeatures
2783 Exceptions : warning
if slice does not have attached adaptor
2789 sub get_all_DitagFeatures {
2790 my ($self, $type, $logic_name) = @_;
2791 if(my $adaptor = $self->_get_CoreAdaptor(
'DitagFeature')) {
2792 return $adaptor->fetch_all_by_Slice($self, $type, $logic_name);
2797 # GENERIC FEATURES (See DBAdaptor.pm)
2799 =head2 get_generic_features
2801 Arg [1] : (optional) List of names of
generic feature types to
return.
2802 If no feature names are given, all
generic features are
2804 Example : my %features = %{$slice->get_generic_features()};
2805 Description: Gets
generic features via the
generic feature adaptors that
2806 have been added via DBAdaptor->add_GenricFeatureAdaptor (
if
2808 Returntype : Hash of named features.
2815 sub get_generic_features {
2817 my ($self, @names) = @_;
2819 if(!$self->adaptor()) {
2820 warning(
'Cannot retrieve features without attached adaptor');
2824 my $db = $self->adaptor()->db();
2826 my %features = (); #
this will hold the results
2828 # get the adaptors for each feature
2829 my %adaptors = %{$db->get_GenericFeatureAdaptors(@names)};
2831 foreach my $adaptor_name (keys(%adaptors)) {
2833 my $adaptor_obj = $adaptors{$adaptor_name};
2834 # get the features and add them to the hash
2835 my $features_ref = $adaptor_obj->fetch_all_by_Slice($self);
2836 # add each feature to the hash to be returned
2837 foreach my $feature (@$features_ref) {
2838 $features{$adaptor_name} = $feature;
2846 =head2 project_to_slice
2848 Arg [1] : Slice to project to.
2849 Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice);
2850 foreach my $segment ( @$chr_projection ){
2851 $chr_slice = $segment->to_Slice();
2852 print $clone_slice->seq_region_name().
':'. $segment->from_start().
'-'.
2853 $segment->from_end().
' -> '.$chr_slice->seq_region_name().
':'. $chr_slice->start().
2854 '-'.$chr_slice->end().
2855 $chr_slice->strand().
" length: ".($chr_slice->end()-$chr_slice->start()+1).
"\n";
2857 Description: Projection of slice to another specific slice. Needed
for where we have multiple mappings
2858 and we want to state which one to project to.
2860 can also be used as [$start,$end,$slice] triplets.
2867 sub project_to_slice {
2869 my $to_slice = shift;
2871 throw(
'Slice argument is required')
if(!$to_slice);
2873 my $slice_adaptor = $self->adaptor();
2875 if(!$slice_adaptor) {
2876 warning(
"Cannot project without attached adaptor.");
2881 my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor();
2883 my $cs = $to_slice->coord_system();
2884 my $slice_cs = $self->coord_system();
2885 my $to_slice_id = $to_slice->get_seq_region_id;
2889 # decompose this slice into its symlinked components.
2890 # this allows us to handle haplotypes and PARs
2891 my $normal_slice_proj =
2892 $slice_adaptor->fetch_normalized_slice_projection($self);
2893 foreach my $segment (@$normal_slice_proj) {
2894 my $normal_slice = $segment->[2];
2896 $slice_cs = $normal_slice->coord_system();
2898 my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor();
2899 my $asm_mapper = $asma->fetch_by_CoordSystems($slice_cs, $cs);
2900 next unless defined $asm_mapper;
2902 # perform the mapping between this slice and the requested system
2903 my @coords = $asm_mapper->map($normal_slice->seq_region_name(),
2904 $normal_slice->start(),
2905 $normal_slice->end(),
2906 $normal_slice->strand(),
2907 $slice_cs, undef, $to_slice, 1);
2909 #construct a projection from the mapping results and return it
2910 foreach my $coord (@coords) {
2911 my $original = $coord->{original};
2912 my $mapped = $coord->{mapped};
2915 next unless $mapped->isa(
'Bio::EnsEMBL::Mapper::Coordinate');
2917 my $mapped_start = $mapped->start();
2918 my $mapped_end = $mapped->end();
2919 my ($current_start, $current_end);
2920 if ($self->strand == 1) {
2921 $current_start = $original->start() - $self->start + 1;
2922 $current_end = $original->end() - $self->start + 1;
2924 $current_start = $self->end() - $original->end + 1;
2925 $current_end = $self->end - $original->start + 1;
2928 # for multiple mappings only get the correct one
2929 next unless $mapped->id == $to_slice_id;
2931 my $coord_cs = $mapped->coord_system();
2933 # If the normalised projection just ended up mapping to the
2934 # same coordinate system we were already in then we should just
2935 # return the original region. This can happen for example, if we
2936 # were on a PAR region on Y which refered to X and a projection to
2937 # 'toplevel' was requested.
2938 # if($coord_cs->equals($slice_cs)) {
2939 # # trim off regions which are not defined
2940 # return $self->_constrain_to_region();
2943 # create slices for the mapped-to coord system
2944 my $slice = $slice_adaptor->fetch_by_seq_region_id($mapped->id(),
2949 push @projection, bless([$current_start, $current_end, $slice],
2950 "Bio::EnsEMBL::ProjectionSegment");
2955 # delete the cache as we may want to map to different set next time and old
2956 # results will be cached.
2957 $mapper_aptr->delete_cache;
2959 return \@projection;
2963 =head2 get_all_synonyms
2965 Args [1] : String external_db_name The name of the database to retrieve
2967 Args [2] : (optional) Integer external_db_version Optionally restrict
2968 results from external_db_name to a specific version of
2969 the the specified external database
2970 Example : my @alternative_names = @{$slice->get_all_synonyms()};
2971 @alternative_names = @{$slice->get_all_synonyms(
'EMBL')};
2972 Description: get a list of alternative names
for this slice
2973 Returntype : reference to list of SeqRegionSynonym objects.
2980 sub get_all_synonyms {
2981 my ($self, $external_db_name, $external_db_version) = @_;
2983 if ( !defined( $self->{
'synonym'} ) ) {
2984 my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
2985 $self->{
'synonym'} =
2986 $adap->get_synonyms( $self->get_seq_region_id($self) );
2989 if(! $external_db_name) {
2990 return $self->{
'synonym'};
2992 my @args = ($external_db_version) ?
2993 ($external_db_name, $external_db_version) :
2994 ($external_db_name, undef, 1);
2995 my $external_db_id = $self->adaptor->db()->get_DBEntryAdaptor()->get_external_db_id(@args);
2996 if(!$external_db_id) {
2997 my $extra = ($external_db_version) ?
"and version $external_db_version " : q{};
2998 throw "The external database $external_db_name ${extra}did not result in a valid identifier";
3001 return [ grep { $_->external_db_id() == $external_db_id } @{$self->{synonym}} ];
3007 Example : $slice->add_synonym(
"alt_name");
3008 Description: add an alternative name
for this slice
3019 my $external_db_id = shift;
3021 my $adap = $self->adaptor->db->get_SeqRegionSynonymAdaptor();
3022 if ( !defined( $self->{
'synonym'} ) ) {
3023 $self->{
'synonym'} = $self->get_all_synonyms();
3027 -external_db_id => $external_db_id,
3028 -seq_region_id => $self->get_seq_region_id($self));
3030 push (@{$self->{
'synonym'}}, $new_syn);
3036 # package variable to minimize duplication
3037 my %region_so_mapping = (
3038 'chromosome' =>
'SO:0000340',
3039 'supercontig' =>
'SO:0000148',
3040 'scaffold' =>
'SO:0000148',
3041 'contig' =>
'SO:0000149'
3044 =head2 feature_so_acc
3046 Example : $slice_so_acc = $slice->feature_so_acc;
3047 Description : This method returns a
string containing the SO
accession number of the slice, based on the coordinate system name.
3048 Returns : string (Sequence Ontology
accession number)
3052 sub feature_so_acc {
3055 # return the region SO acc, or Slice acc
3056 return $region_so_mapping{$self->coord_system_name}
3059 =head2 feature_so_term
3061 Description: This method returns a
string containing the SO term of the slice, based on the coordinate system name
3062 Define constant SEQUENCE_ONTOLOGY in classes that require it, or
override it
for multiple possible values
for a
class.
3063 Returntype : String (Sequence Ontology term)
3064 Exceptions : Thrown
if caller SEQUENCE_ONTOLOGY is undefined and is not a
Bio::EnsEMBL::Slice
3068 sub feature_so_term {
3071 return defined($region_so_mapping{$self->coord_system_name}) ?
3076 =head2 summary_as_hash
3078 Example : $slice_summary = $slice->summary_as_hash();
3079 Description : Retrieves a textual summary of
this slice.
3080 Returns : hashref of descriptive strings
3083 sub summary_as_hash {
3086 my @aliases =
map { $_->name } @{$self->slice->get_all_synonyms()};
3088 $summary{
'seq_region_name'} = $self->seq_region_name;
3089 $summary{
'id'} = $self->seq_region_name;
3090 $summary{
'start'} = $self->start;
3091 $summary{
'end'} = $self->end;
3092 $summary{
'strand'} = 0;
3093 $summary{
'source'} = $self->source || $self->coord_system->version;
3094 $summary{
'Alias'} = \@aliases
if scalar(@aliases);
3095 $summary{
'Is_circular'} = $self->is_circular ?
"true" : undef;
3106 # Bioperl Bio::PrimarySeqI methods:
3111 Description: Included
for Bio::PrimarySeqI
interface compliance (0.7)
3115 sub
id { name(@_); }
3120 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
3124 sub display_id { name(@_); }
3129 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
3133 sub primary_id { name(@_); }
3138 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
3142 sub desc{
return $_[0]->coord_system->name().
' '.$_[0]->seq_region_name(); }
3147 Description: Included
for Bio::PrimarySeqI
interface compliance (0.7)
3151 sub moltype {
return 'dna'; }
3155 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
3159 sub alphabet {
return 'dna'; }
3162 =head2 accession_number
3164 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
3168 sub accession_number { name(@_); }