3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
37 $sa = $db->get_SliceAdaptor;
40 $sa->fetch_by_region(
'chromosome',
'X', 1_000_000, 2_000_000 );
42 # get some attributes of the slice
44 my $start = $slice->start();
45 my $end = $slice->end();
47 # get the sequence from the slice
48 my $seq = $slice->seq();
50 # get some features from the slice
51 foreach $gene ( @{ $slice->get_all_Genes } ) {
52 # do something with a gene
55 foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
56 # do something with dna-dna alignments
61 A Slice
object represents a region of a genome. It can be used to
62 retrieve sequence or features from an area of interest.
68 package Bio::EnsEMBL::CircularSlice;
85 #use Bio::EnsEMBL::IndividualSlice;
86 #use Bio::EnsEMBL::IndividualSliceFactory;
90 use Scalar::Util qw(weaken isweak);
92 my $reg =
"Bio::EnsEMBL::Registry";
98 Arg [...] : List of named arguments
100 string SEQ_REGION_NAME,
103 int SEQ_REGION_LENGTH, (optional)
104 string SEQ (optional)
105 int STRAND, (optional, defaults to 1)
114 -seq_region_name =>
'X',
115 -seq_region_length => 12e6,
116 -adaptor => $slice_adaptor );
118 Description: Creates a
new slice
object. A slice represents a
119 region of sequence in a particular coordinate system.
120 Slices can be used to retrieve sequence and features
121 from an area of interest in a genome.
123 Coordinates start at 1 and are inclusive. Negative
124 coordinates or coordinates exceeding the length of
125 the seq_region are permitted. Start must be less
126 than or equal. to end regardless of the strand.
128 Slice objects are immutable. Once instantiated their
129 attributes (with the exception of the adaptor) may
130 not be altered. To change the attributes a
new slice
134 Exceptions :
throws if start, end, coordsystem or seq_region_name not
135 specified or not of the correct type
136 Caller : general, Bio::EnsEMBL::SliceAdaptor
144 #new can be called as a class or object method
145 my $class = ref($caller) || $caller;
147 my ( $seq, $coord_system, $seq_region_name, $seq_region_length,
148 $start, $end, $strand, $adaptor, $empty )
150 qw(SEQ COORD_SYSTEM SEQ_REGION_NAME SEQ_REGION_LENGTH
151 START END STRAND ADAPTOR EMPTY) ],
154 if ( !defined($seq_region_name) ) {
155 throw(
'SEQ_REGION_NAME argument is required');
157 if ( !defined($start) ) {
throw(
'START argument is required') }
158 if ( !defined($end) ) {
throw(
'END argument is required') }
160 if ( !defined($seq_region_length) ) { $seq_region_length = $end }
162 if ( $seq_region_length <= 0 ) {
163 throw(
'SEQ_REGION_LENGTH must be > 0');
166 if ( defined($coord_system) ) {
167 assert_ref( $coord_system,
'Bio::EnsEMBL::CoordSystem' );
169 if ( $coord_system->is_top_level() ) {
170 throw(
'Cannot create circular slice on toplevel CoordSystem.');
173 warning(
"CircularSlice without coordinate system");
178 if ( $strand != 1 && $strand != -1 ) {
179 throw(
'STRAND argument must be -1 or 1');
182 if ( defined($adaptor) ) {
183 assert_ref( $adaptor,
'Bio::EnsEMBL::DBSQL::SliceAdaptor' );
186 my $seq1 = {
'coord_system' => $coord_system,
188 'seq_region_name' => $seq_region_name,
189 'seq_region_length' => $seq_region_length,
190 'start' => int($start),
192 'strand' => $strand };
195 $seq1->adaptor($adaptor);
203 Example : $cp = $slice->centrepoint();
204 Description: Returns the mid position of
this slice relative to the
205 start of the sequence region that it was created on.
206 Coordinates are inclusive and start at 1.
217 my ( $s, $e, $length ) =
218 ( $self->{
'start'}, $self->{
'end'}, $self->{
'seq_region_length'} );
221 return ( $s + $e )/2;
224 my $r1 = $length - $s;
226 my $r = ( $r1 + $r2 )/2;
229 if ( $m > $length ) {
239 Example : $length = $slice->length();
240 Description: Returns the length of
this slice in basepairs
251 if ( $self->{
'start'} < $self->{
'end'} ) {
252 return $self->{
'end'} - $self->{
'start'} + 1;
255 my $r1 = $self->{
'seq_region_length'} - $self->{
'start'};
256 my $r2 = $self->{
'end'};
257 my $ln = $r1 + $r2 + 1;
267 -COORD_SYSTEM => $self->{
'coord_system'},
268 -SEQ_REGION_NAME => $self->{
'seq_region_name'},
269 -SEQ_REGION_LENGTH => $self->{
'seq_region_length'},
270 -START => $self->{
'start'},
271 -END => $self->{
'seq_region_length'},
272 -STRAND => $self->{
'strand'},
273 -ADAPTOR => $self->adaptor() );
277 -COORD_SYSTEM => $self->{
'coord_system'},
278 -SEQ_REGION_NAME => $self->{
'seq_region_name'},
279 -SEQ_REGION_LENGTH => $self->{
'seq_region_length'},
281 -END => $self->{
'end'},
282 -STRAND => $self->{
'strand'},
283 -ADAPTOR => $self->adaptor() );
291 Example : print
"SEQUENCE = ", $slice->
seq();
292 Description: Returns the sequence of the region represented by
this
293 slice formatted as a
string.
304 # special case for in-between (insert) coordinates
305 return '' if ( $self->start() == $self->end() + 1 );
306 return $self->{
'seq'}
if ( $self->{
'seq'} );
308 if ( $self->adaptor() ) {
310 my $seqAdaptor = $self->adaptor()->db()->get_SequenceAdaptor();
311 if ( $self->{
'start'} > $self->{
'end'} ) {
312 my $length = $self->{
'seq_region_length'};
314 my ($sl1, $sl2) = $self->_split;
317 $seqAdaptor->fetch_by_Slice_start_end_strand( $sl1, 1, $sl1->end - $sl1->start + 1, $sl1->strand) };
320 $seqAdaptor->fetch_by_Slice_start_end_strand( $sl2, 1, $sl2->end - $sl2->start + 1, $sl2->strand) };
322 return $seq1 . $seq2;
326 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, undef,
330 } ## end
if ( $self->adaptor() )
332 # no attached sequence, and no db, so just return Ns
333 return 'N' x $self->length();
338 Arg [1] :
int $startBasePair
339 relative to start of slice, which is 1.
340 Arg [2] :
int $endBasePair
341 relative to start of slice.
342 Arg [3] : (optional)
int $strand
343 The strand of the slice to obtain sequence from. Default
345 Description: returns
string of dna sequence
347 Exceptions : end should be at least as big as start
355 my ( $self, $start, $end, $strand ) = @_;
357 # handle 'between' case for insertions
358 return '' if ( $start == $end + 1 );
360 $strand = 1 unless ( defined $strand );
362 if ( $strand != -1 && $strand != 1 ) {
363 throw(
"Invalid strand [$strand] in call to Slice::subseq.");
366 my $length = $self->{
'seq_region_length'};
368 if ( $self->adaptor ) {
370 my $seqAdaptor = $self->adaptor->db->get_SequenceAdaptor();
371 if ( $end < $start ) {
373 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, $start,
377 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, 1, $end,
379 $subseq = $subseq1 . $subseq2;
383 $seqAdaptor->fetch_by_Slice_start_end_strand( $self, $start,
387 ## check for gap at the beginning and pad it with Ns
389 $subseq =
"N" x ( 1 - $start );
392 $subseq .= substr( $self->seq(), $start - 1, $end - $start + 1 );
393 ## check for gap at the end and pad it with Ns
394 if ( $end > $self->length() ) {
395 $subseq .=
"N" x ( $end - $self->length() );
397 reverse_comp( \$subseq )
if ( $strand == -1 );
407 Arg [1] : (optional)
int $five_prime_expand
408 The number of basepairs to shift
this slices five_prime
409 coordinate by. Positive values make the slice larger,
410 negative make the slice smaller.
413 Arg [2] : (optional)
int $three_prime_expand
414 The number of basepairs to shift
this slices three_prime
415 coordinate by. Positive values make the slice larger,
416 negative make the slice smaller.
418 Arg [3] : (optional)
bool $force_expand
419 if set to 1, then the slice will be contracted even in the
case
420 when shifts $five_prime_expand and $three_prime_expand overlap.
421 In that
case $five_prime_expand and $three_prime_expand will be set
422 to a maximum possible number and that will result in the slice
423 which would have only 2pbs.
425 Arg [4] : (optional)
int* $fpref
426 The reference to a number of basepairs to shift
this slices five_prime
427 coordinate by. Normally it would be set to $five_prime_expand.
428 But in
case when $five_prime_expand shift can not be applied and
429 $force_expand is set to 1, then $$fpref will contain the maximum possible
431 Arg [5] : (optional)
int* $tpref
432 The reference to a number of basepairs to shift
this slices three_prime
433 coordinate by. Normally it would be set to $three_prime_expand.
434 But in
case when $five_prime_expand shift can not be applied and
435 $force_expand is set to 1, then $$tpref will contain the maximum possible
437 Example : my $expanded_slice = $slice->expand( 1000, 1000);
438 my $contracted_slice = $slice->expand(-1000,-1000);
439 my $shifted_right_slice = $slice->expand(-1000, 1000);
440 my $shifted_left_slice = $slice->expand( 1000,-1000);
441 my $forced_contracted_slice = $slice->expand(-1000,-1000, 1, \$five_prime_shift, \$three_prime_shift);
443 Description: Returns a slice which is a resized copy of
this slice. The
444 start and end are moved outwards from the center of the slice
445 if positive values are provided and moved inwards
if negative
446 values are provided. This slice remains unchanged. A slice
447 may not be contracted below 1bp but may grow to be arbitrarily
450 Exceptions : warning
if an attempt is made to contract the slice below 1bp
458 my $five_prime_shift = shift || 0;
459 my $three_prime_shift = shift || 0;
460 my $force_expand = shift || 0;
464 if ( $self->{
'seq'} ) {
466 "Cannot expand a slice which has a manually attached sequence ");
472 my $sshift = $five_prime_shift;
473 my $eshift = $three_prime_shift;
475 if ( $self->{
'strand'} != 1 ) {
476 $eshift = $five_prime_shift;
477 $sshift = $three_prime_shift;
480 $new_start = $self->{
'start'} - $sshift;
481 $new_end = $self->{
'end'} + $eshift;
483 # if($new_start > $new_end) {
484 # if ($force_expand) { # Apply max possible shift, if force_expand is set
485 # if ($sshift < 0) { # if we are contracting the slice from the start - move the start just before the end
486 # $new_start = $new_end - 1;
487 # $sshift = $self->{start} - $new_start;
490 # if($new_start > $new_end) { # if the slice still has a negative length - try to move the end
492 # $new_end = $new_start + 1;
493 # $eshift = $new_end - $self->{end};
496 # return the values by which the primes were actually shifted
497 # $$tpref = $self->{strand} == 1 ? $eshift : $sshift;
498 # $$fpref = $self->{strand} == 1 ? $sshift : $eshift;
500 # if($new_start > $new_end) {
501 # throw('Slice start cannot be greater than slice end');
505 #fastest way to copy a slice is to do a shallow hash copy
506 my %new_slice = %$self;
507 $new_slice{
'start'} = int($new_start);
508 $new_slice{
'end'} = int($new_end);
510 return bless \%new_slice, ref($self);
515 =head2 get_all_VariationFeatures
517 Args : $filter [optional]
518 Description:returns all variation features on
this slice. This
function will only work
519 correctly
if the variation database has been attached to the core database.
520 If $filter is
"genotyped" return genotyped Snps only... (nice likkle hack);
521 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
523 Caller : contigview, snpview
525 : Variation database is under development.
529 sub get_all_VariationFeatures {
534 if ( !$self->adaptor() ) {
535 warning(
'Cannot get variation features without attached adaptor');
541 -species => $self->adaptor()->db()->species,
542 -type =>
"VariationFeature" );
544 if ( $filter eq
'genotyped' ) {
545 return $vf_adaptor->fetch_all_genotyped_by_Slice($self);
547 return $vf_adaptor->fetch_all_by_Slice($self);
550 warning(
"Variation database must be attached to core database to "
551 .
"retrieve variation information" );
559 =head2 get_all_genotyped_VariationFeatures
562 Description: returns all variation features on
this slice that have been genotyped.
563 This
function will only work correctly
if the variation database has
564 been attached to the core database.
565 ReturnType : listref of Bio::EnsEMBL::Variation::VariationFeature
567 Caller : contigview, snpview, ldview
569 : Variation database is under development.
573 sub get_all_genotyped_VariationFeatures {
576 if ( !$self->adaptor() ) {
577 warning(
'Cannot get variation features without attached adaptor');
583 -species => $self->adaptor()->db()->species,
584 -type =>
"VariationFeature" );
587 return $vf_adaptor->fetch_all_genotyped_by_Slice($self);
589 warning(
"Variation database must be attached to core database to "
590 .
"retrieve variation information" );
596 =head2 get_all_DASFeatures
599 Example : $features = $slice->get_all_DASFeatures;
600 Description: Retrieves a hash reference to a hash of DAS feature
601 sets, keyed by the DNS, NOTE the values of
this hash
602 are an anonymous array containing:
603 (1) a pointer to an array of features;
604 (2) a pointer to the DAS stylesheet
605 Returntype : hashref of Bio::SeqFeatures
611 sub get_all_DASFeatures {
612 my ( $self, $source_type ) = @_;
614 if ( !$self->adaptor() ) {
615 warning(
"Cannot retrieve features without attached adaptor");
619 my %genomic_features =
map {
620 ( $_->adaptor->dsn =>
621 [ $_->fetch_all_Features( $self, $source_type ) ] )
622 } $self->adaptor()->db()->_each_DASFeatureFactory;
623 return \%genomic_features;
628 =head2 project_to_slice
630 Arg [1] : Slice to project to.
631 Example : my $chr_projection = $clone_slice->project_to_slice($chrom_slice);
632 foreach my $segment ( @$chr_projection ){
633 $chr_slice = $segment->to_Slice();
634 print $clone_slice->seq_region_name().
':'. $segment->from_start().
'-'.
635 $segment->from_end().
' -> '.$chr_slice->seq_region_name().
':'. $chr_slice->start().
636 '-'.$chr_slice->end().
637 $chr_slice->strand().
" length: ".($chr_slice->end()-$chr_slice->start()+1).
"\n";
639 Description: Projection of slice to another specific slice. Needed
for where we have multiple mappings
640 and we want to state which one to project to.
642 can also be used as [$start,$end,$slice] triplets.
649 sub project_to_slice {
651 my $to_slice = shift;
653 throw(
'Slice argument is required')
if ( !$to_slice );
655 my $slice_adaptor = $self->adaptor();
657 if ( !$slice_adaptor ) {
658 warning(
"Cannot project without attached adaptor.");
662 my $mapper_aptr = $slice_adaptor->db->get_AssemblyMapperAdaptor();
664 my $cs = $to_slice->coord_system();
665 my $slice_cs = $self->coord_system();
668 my $current_start = 1;
670 # decompose this slice into its symlinked components.
671 # this allows us to handle haplotypes and PARs
672 my $normal_slice_proj =
673 $slice_adaptor->fetch_normalized_slice_projection($self);
674 foreach my $segment (@$normal_slice_proj) {
675 my $normal_slice = $segment->[2];
677 $slice_cs = $normal_slice->coord_system();
679 my $asma = $self->adaptor->db->get_AssemblyMapperAdaptor();
680 my $asm_mapper = $asma->fetch_by_CoordSystems( $slice_cs, $cs );
682 # perform the mapping between this slice and the requested system
685 if ( defined $asm_mapper ) {
686 @coords = $asm_mapper->map( $normal_slice->seq_region_name(),
687 $normal_slice->start(),
688 $normal_slice->end(),
689 $normal_slice->strand(),
696 $normal_slice->end() );
699 #construct a projection from the mapping results and return it
700 foreach my $coord (@coords) {
701 my $coord_start = $coord->
start();
702 my $coord_end = $coord->end();
703 my $length = $coord_end - $coord_start + 1;
706 if ( $coord->isa(
'Bio::EnsEMBL::Mapper::Coordinate') ) {
707 my $coord_cs = $coord->coord_system();
709 # If the normalised projection just ended up mapping to the
710 # same coordinate system we were already in then we should just
711 # return the original region. This can happen for example, if we
712 # were on a PAR region on Y which refered to X and a projection to
713 # 'toplevel' was requested.
714 # if($coord_cs->equals($slice_cs)) {
715 # # trim off regions which are not defined
716 # return $self->_constrain_to_region();
719 #create slices for the mapped-to coord system
721 $slice_adaptor->fetch_by_seq_region_id( $coord->id(),
722 $coord_start, $coord_end, $coord->strand() );
724 my $current_end = $current_start + $length - 1;
727 bless( [ $current_start, $current_end, $slice ],
728 "Bio::EnsEMBL::ProjectionSegment" );
731 $current_start += $length;
732 } ## end
foreach my $coord (@coords)
733 } ## end
foreach my $segment (@$normal_slice_proj)
735 # delete the cache as we may want to map to different set next time and old
736 # results will be cached.
738 $mapper_aptr->delete_cache;
741 } ## end sub project_to_slice
744 # Bioperl Bio::PrimarySeqI methods:
749 Description: Included
for Bio::PrimarySeqI
interface compliance (0.7)
757 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
761 sub display_id { name(@_); }
765 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
769 sub primary_id { name(@_); }
773 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
778 return $_[0]->coord_system->name() .
' ' . $_[0]->seq_region_name();
783 Description: Included
for Bio::PrimarySeqI
interface compliance (0.7)
787 sub moltype {
return 'dna'; }
791 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
795 sub alphabet {
return 'dna'; }
797 =head2 accession_number
799 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
803 sub accession_number { name(@_); }
807 Description: Included
for Bio::PrimarySeqI
interface compliance (1.2)
814 if ( !defined( $self->{
'circular'} ) ) {
816 grep { $_ } @{ $self->get_all_Attributes(
'circular_seq') };
817 $self->{
'circular'} = @attrs ? 1 : 0;
820 return $self->{
'circular'};