3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
37 $sa = $db->get_SliceAdaptor();
40 $sa->fetch_by_region(
'chromosome',
'X', 1_000_000, 2_000_000 );
44 # get repeat masked sequence:
45 my $dna = $repeat_masked_slice->
seq();
46 $dna = $repeat_masked_slice->subseq( 1, 1000 );
51 repeat masked genomic sequence rather than normal genomic sequence.
57 package Bio::EnsEMBL::RepeatMaskedSlice;
69 @ISA = (
'Bio::EnsEMBL::Slice');
71 # The BLOCK_PWR is the lob_bin of the chunksize where you want your repeat features
72 # to be retrieved. This will create repeat feature retrieval calls that are likely
73 # to be on the same slice and hopefully create cache hits and less database traffic
80 Arg [-REPEAT_MASK] : The logic name of the repeats to be used
for masking.
81 If not provided, all repeats in the database are used.
82 Arg [...] : Named superclass arguments. See B<Bio::EnsEMBL::Slice>.
87 -SEQ_REGION_NAME => $seq_region,
88 -SEQ_REGION_LENGTH => $seq_region_length,
91 -REPEAT_MASK => [
'repeat_masker'],
93 -NOT_DEFAULT_MASKING_CASES => {
"repeat_class_SINE/MIR" => 1,
94 "repeat_name_AluSp" => 0});
95 Description: Creates a
Slice which behaves exactly as a normal slice but
96 that returns repeat masked sequence from the seq method.
106 my $class = ref($caller) || $caller;
108 my ($logic_names, $soft_mask, $not_default_masking_cases) = rearrange([
'REPEAT_MASK',
110 'NOT_DEFAULT_MASKING_CASES'], @_);
112 my $self = $class->SUPER::new(@_);
115 $logic_names ||= [
''];
116 if(ref($logic_names) ne
'ARRAY') {
117 throw(
"Reference to list of logic names argument expected.");
120 $self->{
'repeat_mask_logic_names'} = $logic_names;
121 $self->{
'soft_mask'} = $soft_mask;
122 $self->{
'not_default_masking_cases'} = $not_default_masking_cases;
123 $self->{
'not_default_masking_cases'} ||= {};
129 =head2 repeat_mask_logic_names
131 Arg [1] : reference to list of strings $logic_names (optional)
132 Example : $rm_slice->repeat_mask_logic_name([
'repeat_masker']);
133 Description: Getter/Setter
for the logic_names of the repeats that are used
134 to mask
this slices sequence.
135 Returntype : reference to list of strings
137 Caller : seq() method
142 sub repeat_mask_logic_names {
147 if(ref($array) ne
'ARRAY') {
148 throw(
'Reference to list of logic names argument expected.');
152 return $self->{
'repeat_mask_logic_names'};
158 Arg [1] :
boolean $soft_mask (optional)
159 Example : $rm_slice->soft_mask(0);
160 Description: Getter/Setter which is used to turn on/off softmasking of the
161 sequence returned by seq.
164 Caller : seq() method
171 $self->{
'soft_mask'} = shift
if(@_);
172 return $self->{
'soft_mask'} || 0;
175 =head2 not_default_masking_cases
177 Arg [1] : hash reference $not_default_masking_cases (optional,
default is {})
178 The values are 0 or 1
for hard and soft masking respectively
179 The keys of the hash should be of 2 forms
180 "repeat_class_" . $repeat_consensus->repeat_class,
181 e.g.
"repeat_class_SINE/MIR"
182 "repeat_name_" . $repeat_consensus->name
183 e.g.
"repeat_name_MIR"
184 depending on which base you want to apply the not
default masking either
185 the repeat_class or repeat_name. Both can be specified in the same hash
186 at the same time, but in that
case, repeat_name setting has priority over
187 repeat_class. For example, you may have hard masking as
default, and
188 you may want soft masking of all repeat_class SINE/MIR,
189 but repeat_name AluSp (which are also from repeat_class SINE/MIR)
190 Example : $rm_slice->not_default_masking_cases({
"repeat_class_SINE/MIR" => 1,
191 "repeat_name_AluSp" => 0});
192 Description: Getter/Setter which is used to escape some repeat
class or name from the default
194 Returntype : hash reference
196 Caller : seq() and subseq() methods
201 sub not_default_masking_cases {
203 $self->{
'not_default_masking_cases'} = shift
if (@_);
204 return $self->{
'not_default_masking_cases'};
210 Example : print $rmslice->seq(),
"\n";
211 Description: Retrieves the entire repeat masked sequence
for this slice.
212 See also the B<Bio::EnsEMBL::Slice> implementation of
this
225 # get all the features
227 my $repeats = $self->_get_repeat_features($self);
228 my $soft_mask = $self->soft_mask();
229 my $not_default_masking_cases = $self->not_default_masking_cases();
234 my $dna = $self->SUPER::seq(@_);
239 $self->_mask_features(\$dna,$repeats,$soft_mask,$not_default_masking_cases);
246 Example : print $rmslice->subseq(1, 1000);
247 Description: Retrieves a repeat masked sequence from a specified subregion
248 of
this slice. See also the B<Bio::EnsEMBL::Slice>
249 implementation of
this method.
263 my $subsequence_slice = $self->sub_Slice($start, $end, $strand);
265 # If frequent subseqs happen on repeatMasked sequence this results in
266 # a lot of feature retrieval from the database. To avoid this, features
267 # are only retrieved from subslices with fixed space boundaries.
268 # The access happens in block to make cache hits more likely
269 # ONLY DO IF WE ARE CACHING
272 if(! $self->adaptor()->db()->no_cache()) {
274 my $seq_region_slice = $self->seq_region_Slice();
275 # The blocksize can be defined on the top of this module.
276 my $block_min = ($subsequence_slice->start()-1) >> $BLOCK_PWR;
277 my $block_max = ($subsequence_slice->end()-1) >> $BLOCK_PWR;
279 my $sub_start = ($block_min << $BLOCK_PWR)+1;
280 my $sub_end = ($block_max+1)<<$BLOCK_PWR;
281 if ($sub_end > $seq_region_slice->length) {
282 $sub_end = $seq_region_slice->length ;
284 $subslice = $seq_region_slice->sub_Slice($sub_start, $sub_end);
287 $subslice = $subsequence_slice;
290 my $repeats = $self->_get_repeat_features($subslice);
291 my $soft_mask = $self->soft_mask();
292 my $not_default_masking_cases = $self->not_default_masking_cases();
293 my $dna = $subsequence_slice->SUPER::seq();
294 $subsequence_slice->_mask_features(\$dna,$repeats,$soft_mask,$not_default_masking_cases);
298 =head2 _get_repeat_features
301 Description : Gets repeat features
for the given slice
308 sub _get_repeat_features {
309 my ($self, $slice) = @_;
310 my $logic_names = $self->repeat_mask_logic_names();
312 foreach my $l (@$logic_names) {