3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
37 $sa = $db->get_SliceAdaptor;
40 $sa->fetch_by_region(
'chromosome',
'X', 1_000_000, 2_000_000 );
42 # get some attributes of the slice
44 my $start = $slice->start();
45 my $end = $slice->end();
47 # get the sequence from the slice
48 my $seq = $slice->seq();
50 # get some features from the slice
51 foreach $gene ( @{ $slice->get_all_Genes } ) {
52 # do something with a gene
55 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
56 # do something with dna-dna alignments
61 A Slice
object represents a region of a genome. It can be used to
62 retrieve sequence or features from an area of interest.
68 package Bio::EnsEMBL::CircularSlice;
84 #use Bio::EnsEMBL::IndividualSlice;
85 #use Bio::EnsEMBL::IndividualSliceFactory;
89 use Scalar::Util qw(weaken isweak);
91 my $reg =
"Bio::EnsEMBL::Registry";
97 Arg [...] : List of named arguments
99 string SEQ_REGION_NAME,
102 int SEQ_REGION_LENGTH, (optional)
103 string SEQ (optional)
104 int STRAND, (optional, defaults to 1)
113 -seq_region_name =>
'X',
114 -seq_region_length => 12e6,
115 -adaptor => $slice_adaptor );
117 Description: Creates a
new slice
object. A slice represents a
118 region of sequence in a particular coordinate system.
119 Slices can be used to retrieve sequence and features
120 from an area of interest in a genome.
122 Coordinates start at 1 and are inclusive. Negative
123 coordinates or coordinates exceeding the length of
124 the seq_region are permitted. Start must be less
125 than or equal. to end regardless of the strand.
127 Slice objects are immutable. Once instantiated their
128 attributes (with the exception of the adaptor) may
129 not be altered. To change the attributes a
new slice
133 Exceptions :
throws if start, end, coordsystem or seq_region_name not
134 specified or not of the correct type
135 Caller : general, Bio::EnsEMBL::SliceAdaptor
143 #new can be called as a class or object method
144 my $class = ref($caller) || $caller;
146 my ( $seq, $coord_system, $seq_region_name, $seq_region_length,
147 $start, $end, $strand, $adaptor, $empty )
149 qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
150 START END STRAND ADAPTOR EMPTY) ],
153 if ( !defined($seq_region_name) ) {
154 throw(
'SEQ_REGION_NAME argument is required');
156 if ( !defined($start) ) {
throw(
'START argument is required') }
157 if ( !defined($end) ) {
throw(
'END argument is required') }
159 if ( !defined($seq_region_length) ) { $seq_region_length = $end }
161 if ( $seq_region_length <= 0 ) {
162 throw(
'SEQ_REGION_LENGTH must be > 0');
165 if ( defined($coord_system) ) {
166 assert_ref( $coord_system,
'Bio::EnsEMBL::CoordSystem' );
168 if ( $coord_system->is_top_level() ) {
169 throw(
'Cannot create circular slice on toplevel CoordSystem.');
172 warning(
"CircularSlice without coordinate system");
177 if ( $strand != 1 && $strand != -1 ) {
178 throw(
'STRAND argument must be -1 or 1');
181 if ( defined($adaptor) ) {
182 assert_ref( $adaptor,
'Bio::EnsEMBL::DBSQL::SliceAdaptor' );
185 my $seq1 = {
'coord_system' => $coord_system,
187 'seq_region_name' => $seq_region_name,
188 'seq_region_length' => $seq_region_length,
189 'start' => int($start),
191 'strand' => $strand };
194 $seq1->adaptor($adaptor);
202 Example : $cp = $slice->centrepoint();
203 Description: Returns the mid position of
this slice relative to the
204 start of the sequence region that it was created on.
205 Coordinates are inclusive and start at 1.
216 my ( $s, $e, $length ) =
217 ( $self->{
'start'}, $self->{
'end'}, $self->{
'seq_region_length'} );
220 return ( $s + $e )/2;
223 my $r1 = $length - $s;
225 my $r = ( $r1 + $r2 )/2;
228 if ( $m > $length ) {
238 Example : $length = $slice->length();
239 Description: Returns the length of
this slice in basepairs
250 if ( $self->{
'start'} < $self->{
'end'} ) {
251 return $self->{
'end'} - $self->{
'start'} + 1;
254 my $r1 = $self->{
'seq_region_length'} - $self->{
'start'};
255 my $r2 = $self->{
'end'};
256 my $ln = $r1 + $r2 + 1;
266 -COORD_SYSTEM => $self->{
'coord_system'},
267 -SEQ_REGION_NAME => $self->{
'seq_region_name'},
268 -SEQ_REGION_LENGTH => $self->{
'seq_region_length'},
269 -START => $self->{
'start'},
270 -END => $self->{
'seq_region_length'},
271 -STRAND => $self->{
'strand'},
272 -ADAPTOR => $self->adaptor() );
276 -COORD_SYSTEM => $self->{
'coord_system'},
277 -SEQ_REGION_NAME => $self->{
'seq_region_name'},
278 -SEQ_REGION_LENGTH => $self->{
'seq_region_length'},
280 -END => $self->{
'end'},
281 -STRAND => $self->{
'strand'},
282 -ADAPTOR => $self->adaptor() );
290 Example : print
"SEQUENCE = ", $slice->
seq();
291 Description: Returns the sequence of the region represented by
this
292 slice formatted as a
string.
303 # special case for in-between (insert) coordinates
304 return '' if ( $self->start() == $self->end() + 1 );
305 return $self->{
'seq'}
if ( $self->{
'seq'} );
307 if ( $self->adaptor() ) {
309 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
310 if ( $self->{
'start'} > $self->{
'end'} ) {
311 my $length = $self->{
'seq_region_length'};
313 my ($sl1, $sl2) = $self->_split;
316 $seqAdaptor->fetch_by_Slice_start_end_strand( $sl1, 1, $sl1->end - $sl1->start + 1, $sl1->strand) };
319 $seqAdaptor->fetch_by_Slice_start_end_strand( $sl2, 1, $sl2->end - $sl2->start + 1, $sl2->strand) };
321 return $seq1 . $seq2;
325 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, undef,
329 } ## end
if ( $self->adaptor() )
331 # no attached sequence, and no db, so just return Ns
332 return 'N' x $self->length();
337 Arg [1] :
int $startBasePair
338 relative to start of slice, which is 1.
339 Arg [2] :
int $endBasePair
340 relative to start of slice.
341 Arg [3] : (optional)
int $strand
342 The strand of the slice to obtain sequence from. Default
344 Description: returns
string of dna sequence
346 Exceptions : end should be at least as big as start
354 my ( $self, $start, $end, $strand ) = @_;
356 # handle 'between' case for insertions
357 return '' if ( $start == $end + 1 );
359 $strand = 1 unless ( defined $strand );
361 if ( $strand != -1 && $strand != 1 ) {
362 throw(
"Invalid strand [$strand] in call to Slice::subseq.");
365 my $length = $self->{
'seq_region_length'};
367 if ( $self->adaptor ) {
369 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
370 if ( $end < $start ) {
372 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, $start,
376 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, $end,
378 $subseq = $subseq1 . $subseq2;
382 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, $start,
386 ## check for gap at the beginning and pad it with Ns
388 $subseq =
"N" x ( 1 - $start );
391 $subseq .= substr( $self->seq(), $start - 1, $end - $start + 1 );
392 ## check for gap at the end and pad it with Ns
393 if ( $end > $self->length() ) {
394 $subseq .=
"N" x ( $end - $self->length() );
396 reverse_comp( \$subseq )
if ( $strand == -1 );
406 Arg [1] : (optional)
int $five_prime_expand
407 The number of basepairs to shift
this slices five_prime
408 coordinate by. Positive values make the slice larger,
409 negative make the slice smaller.
412 Arg [2] : (optional)
int $three_prime_expand
413 The number of basepairs to shift
this slices three_prime
414 coordinate by. Positive values make the slice larger,
415 negative make the slice smaller.
417 Arg [3] : (optional)
bool $force_expand
418 if set to 1, then the slice will be contracted even in the
case
419 when shifts $five_prime_expand and $three_prime_expand overlap.
420 In that
case $five_prime_expand and $three_prime_expand will be set
421 to a maximum possible number and that will result in the slice
422 which would have only 2pbs.
424 Arg [4] : (optional)
int* $fpref
425 The reference to a number of basepairs to shift
this slices five_prime
426 coordinate by. Normally it would be set to $five_prime_expand.
427 But in
case when $five_prime_expand shift can not be applied and
428 $force_expand is set to 1, then $$fpref will contain the maximum possible
430 Arg [5] : (optional)
int* $tpref
431 The reference to a number of basepairs to shift
this slices three_prime
432 coordinate by. Normally it would be set to $three_prime_expand.
433 But in
case when $five_prime_expand shift can not be applied and
434 $force_expand is set to 1, then $$tpref will contain the maximum possible
436 Example : my $expanded_slice = $slice->expand( 1000, 1000);
437 my $contracted_slice = $slice->expand(-1000,-1000);
438 my $shifted_right_slice = $slice->expand(-1000, 1000);
439 my $shifted_left_slice = $slice->expand( 1000,-1000);
440 my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift);
442 Description: Returns a slice which is a resized copy of
this slice. The
443 start and end are moved outwards from the center of the slice
444 if positive values are provided and moved inwards
if negative
445 values are provided. This slice remains unchanged. A slice
446 may not be contracted below 1bp but may grow to be arbitrarily
449 Exceptions : warning
if an attempt is made to contract the slice below 1bp
457 my $five_prime_shift = shift || 0;
458 my $three_prime_shift = shift || 0;
459 my $force_expand = shift || 0;
463 if ( $self->{
'seq'} ) {
465 "Cannot expand a slice which has a manually attached sequence ");
471 my $sshift = $five_prime_shift;
472 my $eshift = $three_prime_shift;
474 if ( $self->{
'strand'} != 1 ) {
475 $eshift = $five_prime_shift;
476 $sshift = $three_prime_shift;
479 $new_start = $self->{
'start'} - $sshift;
480 $new_end = $self->{
'end'} + $eshift;
482 # if($new_start > $new_end) {
483 # if ($force_expand) { # Apply max possible shift, if force_expand is set
484 # if ($sshift < 0) { # if we are contracting the slice from the start - move the start just before the end
485 # $new_start = $new_end - 1;
486 # $sshift = $self->{start} - $new_start;
489 # if($new_start > $new_end) { # if the slice still has a negative length - try to move the end
491 # $new_end = $new_start + 1;
492 # $eshift = $new_end - $self->{end};
495 # return the values by which the primes were actually shifted
496 # $$tpref = $self->{strand} == 1 ? $eshift : $sshift;
497 # $$fpref = $self->{strand} == 1 ? $sshift : $eshift;
499 # if($new_start > $new_end) {
500 # throw('Slice start cannot be greater than slice end');
504 #fastest way to copy a slice is to do a shallow hash copy
505 my %new_slice = %$self;
506 $new_slice{
'start'} = int($new_start);
507 $new_slice{
'end'} = int($new_end);
509 return bless \%new_slice, ref($self);
514 =head2 get_all_VariationFeatures
516 Args : $filter [optional]
517 Description:returns all variation features on
this slice. This
function will only work
518 correctly
if the variation database has been attached to the core database.
519 If $filter is
"genotyped" return genotyped Snps only... (nice likkle hack);
520 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
522 Caller : contigview, snpview
524 : Variation database is under development.
528 sub get_all_VariationFeatures {
533 if ( !$self->adaptor() ) {
534 warning(
'Cannot get variation features without attached adaptor');
540 -species => $self->adaptor()->db()->species,
541 -type =>
"VariationFeature" );
543 if ( $filter eq
'genotyped' ) {
544 return $vf_adaptor->fetch_all_genotyped_by_Slice($self);
546 return $vf_adaptor->fetch_all_by_Slice($self);
549 warning(
"Variation database must be attached to core database to "
550 .
"retrieve variation information" );
558 =head2 get_all_genotyped_VariationFeatures
561 Description: returns all variation features on
this slice that have been genotyped.
562 This
function will only work correctly
if the variation database has
563 been attached to the core database.
564 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
566 Caller : contigview, snpview, ldview
568 : Variation database is under development.
572 sub get_all_genotyped_VariationFeatures {
575 if ( !$self->adaptor() ) {
576 warning(
'Cannot get variation features without attached adaptor');
582 -species => $self->adaptor()->db()->species,
583 -type =>
"VariationFeature" );
586 return $vf_adaptor->fetch_all_genotyped_by_Slice($self);
588 warning(
"Variation database must be attached to core database to "
589 .
"retrieve variation information" );
595 =head2 get_all_DASFeatures
598 Example : $features = $slice->get_all_DASFeatures;
599 Description: Retrieves a hash reference to a hash of DAS feature
600 sets, keyed by the DNS, NOTE the values of
this hash
601 are an anonymous array containing:
602 (1) a pointer to an array of features;
603 (2) a pointer to the DAS stylesheet
604 Returntype : hashref of Bio::SeqFeatures
610 sub get_all_DASFeatures {
611 my ( $self, $source_type ) = @_;
613 if ( !$self->adaptor() ) {
614 warning(
"Cannot retrieve features without attached adaptor");
618 my %genomic_features =
map {
619 ( $_->adaptor->dsn =>
620 [ $_->fetch_all_Features( $self, $source_type ) ] )
621 } $self->adaptor()->db()->_each_DASFeatureFactory;
622 return \%genomic_features;
627 =head2 project_to_slice
629 Arg [1] : Slice to project to.
630 Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice);
631 foreach my $segment ( @$chr_projection ){
632 $chr_slice = $segment->to_Slice();
633 print $clone_slice->seq_region_name().
':'. $segment->from_start().
'-'.
634 $segment->from_end().
' -> '.$chr_slice->seq_region_name().
':'. $chr_slice->start().
635 '-'.$chr_slice->end().
636 $chr_slice->strand().
" length: ".($chr_slice->end()-$chr_slice->start()+1).
"\n";
638 Description: Projection of slice to another specific slice. Needed
for where we have multiple mappings
639 and we want to state which one to project to.
641 can also be used as [$start,$end,$slice] triplets.
648 sub project_to_slice {
650 my $to_slice = shift;
652 throw(
'Slice argument is required')
if ( !$to_slice );
654 my $slice_adaptor = $self->adaptor();
656 if ( !$slice_adaptor ) {
657 warning(
"Cannot project without attached adaptor.");
661 my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor();
663 my $cs = $to_slice->coord_system();
664 my $slice_cs = $self->coord_system();
667 my $current_start = 1;
669 # decompose this slice into its symlinked components.
670 # this allows us to handle haplotypes and PARs
671 my $normal_slice_proj =
672 $slice_adaptor->fetch_normalized_slice_projection($self);
673 foreach my $segment (@$normal_slice_proj) {
674 my $normal_slice = $segment->[2];
676 $slice_cs = $normal_slice->coord_system();
678 my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor();
679 my $asm_mapper = $asma->fetch_by_CoordSystems( $slice_cs, $cs );
681 # perform the mapping between this slice and the requested system
684 if ( defined $asm_mapper ) {
685 @coords = $asm_mapper->map( $normal_slice->seq_region_name(),
686 $normal_slice->start(),
687 $normal_slice->end(),
688 $normal_slice->strand(),
695 $normal_slice->end() );
698 #construct a projection from the mapping results and return it
699 foreach my $coord (@coords) {
700 my $coord_start = $coord->
start();
701 my $coord_end = $coord->end();
702 my $length = $coord_end - $coord_start + 1;
705 if ( $coord->isa(
'Bio::EnsEMBL::Mapper::Coordinate') ) {
706 my $coord_cs = $coord->coord_system();
708 # If the normalised projection just ended up mapping to the
709 # same coordinate system we were already in then we should just
710 # return the original region. This can happen for example, if we
711 # were on a PAR region on Y which refered to X and a projection to
712 # 'toplevel' was requested.
713 # if($coord_cs->equals($slice_cs)) {
714 # # trim off regions which are not defined
715 # return $self->_constrain_to_region();
718 #create slices for the mapped-to coord system
720 $slice_adaptor->fetch_by_seq_region_id( $coord->id(),
721 $coord_start, $coord_end, $coord->strand() );
723 my $current_end = $current_start + $length - 1;
726 bless( [ $current_start, $current_end, $slice ],
727 "Bio::EnsEMBL::ProjectionSegment" );
730 $current_start += $length;
731 } ## end
foreach my $coord (@coords)
732 } ## end
foreach my $segment (@$normal_slice_proj)
734 # delete the cache as we may want to map to different set next time and old
735 # results will be cached.
737 $mapper_aptr->delete_cache;
740 } ## end sub project_to_slice
743 # Bioperl Bio::PrimarySeqI methods:
748 Description: Included
for Bio::PrimarySeqI
interface compliance (0.7)
756 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
760 sub display_id { name(@_); }
764 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
768 sub primary_id { name(@_); }
772 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
777 return $_[0]->coord_system->name() .
' ' . $_[0]->seq_region_name();
782 Description: Included
for Bio::PrimarySeqI
interface compliance (0.7)
786 sub moltype {
return 'dna'; }
790 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
794 sub alphabet {
return 'dna'; }
796 =head2 accession_number
798 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
802 sub accession_number { name(@_); }
806 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
813 if ( !defined( $self->{
'circular'} ) ) {
815 grep { $_ } @{ $self->get_all_Attributes(
'circular_seq') };
816 $self->{
'circular'} = @attrs ? 1 : 0;
819 return $self->{
'circular'};