ensembl-hive  2.8.1
FastaSequenceAdaptor.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =head1 CONTACT
19 
20  Please email comments or questions to the public Ensembl
21  developers list at <dev@ensembl.org>.
22 
23  Questions may also be sent to the Ensembl help desk at
24  <helpdesk@ensembl.org>.
25 
26 =cut
27 
28 =head1 NAME
29 
31 
32 =head1 DESCRIPTION
33 
34 This sequence adaptor extends the BaseSequenceAdaptor code to provide an
35 implementation of the .fai index lookup as defined by samtools. The code uses
36 this indexing system to access portions of sequence and translates Slice requests
37 into sensible locations for our FASTA query layer.
38 
39 The adaptor must be initalised with access to a Faidx compatible object and the FASTA
40 file backing must use the same seq_region_name as the querying slices otherwise
41 we cannot return the required data.
42 
43 =cut
44 
45 package Bio::EnsEMBL::DBSQL::FastaSequenceAdaptor;
46 
47 use strict;
48 use warnings;
49 
51 
52 use Bio::EnsEMBL::Utils::Exception qw/throw/;
53 use Bio::EnsEMBL::Utils::Scalar qw/assert_ref/;
54 use Bio::EnsEMBL::Utils::Sequence qw/reverse_comp/;
55 use English qw/-no_match_vars/;
56 require bytes;
57 
58 =head2 new
59 
60  Arg [1] : FileFaidx; $faindex. A FileFaidx object or a compatible version
61  Arg [2] : Integer; $chunk_power. Size of the region to cache
62  Arg [3] : Integer; $cache_size. Number of regions to cache
63  Description : Builds an instance of the FastaSequenceAdaptor
64 
65 =cut
66 
67 sub new {
68  my ($class, $faindex, $chunk_power, $cache_size) = @_;
69  my $self = $class->SUPER::new($chunk_power, $cache_size);
70  $self->faindex($faindex);
71  return $self;
72 }
73 
74 =head2 fetch_by_Slice_start_end_strand
75 
76  Arg [1] : Bio::EnsEMBL::Slice; $slice. Slice to fetch sequence for
77  Arg [2] : Integer; $start. Start of region to retrieve relative to the Slice (defaults to 1)
78  Arg [3] : Integer; $end. End of region to retreive relative to the Slice (defaults to length)
79  Arg [4] : Integer; $strand. Strand to fetch (defaults to 1)
80  Description : Fetches sequence for the given slice. Unlike the normal SequenceAdaptor we assume
81  Sequence is held in a FASTA file under the Slice's seq_region_name.
82  Exception : Thrown if we are given a circular slice
83 =cut
84 
85 sub fetch_by_Slice_start_end_strand {
86  my ( $self, $slice, $start, $end, $strand ) = @_;
87 
88  # Input checks
89  assert_ref($slice, 'Bio::EnsEMBL::Slice', 'slice');
90  if(defined $end && $start > $end && $slice->is_circular()) {
91  throw "Currently we do not support circular requests";
92  }
93 
94  #Get a new slice that spans the exact region to retrieve dna from.
95  #Then constrain to seq region if it's gone negative or over the end
96  $slice = $self->expand_Slice($slice, $start, $end, $strand);
97  $slice = $slice->constrain_to_seq_region();
98 
99  # This call is likely to barf if we try to query using a chr name
100  # we do not understand. Use can_access_Slice() to make sure
101  my $seq_ref = $self->_fetch_seq($slice->seq_region_name(), $slice->start(), $slice->length());
102  reverse_comp($seq_ref) if $strand == -1;
103  return $seq_ref;
104 }
105 
106 =head2 faindex
107 
108  Description : Holds a reference to the Faindex object to use for sequence access
109 
110 =cut
111 
112 sub faindex {
113  my ($self, $faindex) = @_;
114  if(defined $faindex) {
115  assert_ref($faindex, 'Bio::EnsEMBL::Utils::IO::FileFaidx', 'faidx');
116  $self->{faindex} = $faindex;
117  }
118  return $self->{faindex};
119 }
120 
121 =head2 can_access_Slice
122 
123  Description : Checks the lookup to see if we have access to the Slice given (using
124  seq region name as the ID). We reject any Circular Slice
125 
126 =cut
127 
128 sub can_access_Slice {
129  my ($self, $slice) = @_;
130  return 0 if $slice->is_circular();
131  return $self->faindex()->can_access_id($slice->seq_region_name());
132 }
133 
134 =head2 store
135 
136  Description : Unsupported operation. Please use a FASTA serialiser
137 
138 =cut
139 
140 sub store {
141  throw "Unsupported operation. Cannot store sequence in a fasta file";
142 }
143 
144 =head2 _fetch_raw_seq
145 
146  Description : Provides access to the underlying faindex object and returns a sequence scalar ref
147 
148 =cut
149 
150 sub _fetch_raw_seq {
151  my ($self, $id, $start, $length) = @_;
152  my $seq_ref = $self->faindex()->fetch_seq($id, $start, $length);
153  return $seq_ref;
154 }
155 
156 1;
Bio::EnsEMBL::Utils::Sequence
Definition: Sequence.pm:22
Bio::EnsEMBL::DBSQL::FastaSequenceAdaptor::faindex
public faindex()
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio::EnsEMBL::DBSQL::SequenceAdaptor
Definition: SequenceAdaptor.pm:22
Bio::EnsEMBL::DBSQL::BaseSequenceAdaptor
Definition: BaseSequenceAdaptor.pm:35
Bio::EnsEMBL::Utils::Scalar
Definition: Scalar.pm:66
Bio::EnsEMBL::Slice::constrain_to_seq_region
public Bio::EnsEMBL::Slice constrain_to_seq_region()
Bio::EnsEMBL::DBSQL::FastaSequenceAdaptor
Definition: FastaSequenceAdaptor.pm:20
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68