ensembl-hive  2.8.1
SimpleFeatureAdaptor.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
34 
35 =head1 SYNOPSIS
36 
37  my $registry = 'Bio::EnsEMBL::Registry';
38 
39  $registry->
40  load_registry_from_db( ...
41 
42  my $sfa =
43  $registry->get_adaptor('homo sapiens', 'core', 'SimpleFeature');
44 
45  print ref($sfa), "\n";
46 
47  my $sf_aref =
48  $sfa->fetch_all;
49 
50  print scalar @$sf_aref, "\n";
51 
52 =head1 DESCRIPTION
53 
54 Simple Feature Adaptor - database access for simple features
55 
56 =head1 METHODS
57 
58 =cut
59 
60 package Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor;
61 
62 use vars qw(@ISA);
63 use strict;
64 
67 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
68 
70 
71 
72 =head2 store
73 
74  Arg [1] : list of Bio::EnsEMBL::SimpleFeatures @sf
75  the simple features to store in the database
76  Example : $simple_feature_adaptor->store(@simple_feats);
77  Description: Stores a list of simple feature objects in the database
78  Returntype : none
79  Exceptions : thrown if @sf is not defined, if any of the features do not
80  have an attached slice.
81  or if any elements of @sf are not Bio::EnsEMBL::SimpleFeatures
82  Caller : general
83  Status : Stable
84 
85 =cut
86 
87 sub store{
88  my ($self,@sf) = @_;
89 
90  if( scalar(@sf) == 0 ) {
91  throw("Must call store with list of SimpleFeatures");
92  }
93 
94  my $sth = $self->prepare
95  ("INSERT INTO simple_feature (seq_region_id, seq_region_start, " .
96  "seq_region_end, seq_region_strand, " .
97  "display_label, analysis_id, score) " .
98  "VALUES (?,?,?,?,?,?,?)");
99 
100  my $db = $self->db();
101  my $analysis_adaptor = $db->get_AnalysisAdaptor();
102 
103  FEATURE: foreach my $sf ( @sf ) {
104 
105  if( !ref $sf || !$sf->isa("Bio::EnsEMBL::SimpleFeature") ) {
106  throw("SimpleFeature must be an Ensembl SimpleFeature, " .
107  "not a [".ref($sf)."]");
108  }
109 
110  if($sf->is_stored($db)) {
111  warning("SimpleFeature [".$sf->dbID."] is already stored" .
112  " in this database.");
113  next FEATURE;
114  }
115 
116  if(!defined($sf->analysis)) {
117  throw("An analysis must be attached to the features to be stored.");
118  }
119 
120  #store the analysis if it has not been stored yet
121  if(!$sf->analysis->is_stored($db)) {
122  $analysis_adaptor->store($sf->analysis());
123  }
124 
125  my $original = $sf;
126  my $seq_region_id;
127  ($sf, $seq_region_id) = $self->_pre_store($sf);
128 
129  $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
130  $sth->bind_param(2,$sf->start,SQL_INTEGER);
131  $sth->bind_param(3,$sf->end,SQL_INTEGER);
132  $sth->bind_param(4,$sf->strand,SQL_TINYINT);
133  $sth->bind_param(5,$sf->display_label,SQL_VARCHAR);
134  $sth->bind_param(6,$sf->analysis->dbID,SQL_INTEGER);
135  $sth->bind_param(7,$sf->score,SQL_DOUBLE);
136 
137  $sth->execute();
138 
139  $original->dbID($self->last_insert_id('simple_feature_id', undef, 'simple_feature'));
140  $original->adaptor($self);
141  }
142 }
143 
144 
145 =head2 _tables
146 
147  Arg [1] : none
148  Example : none
149  Description: PROTECTED implementation of superclass abstract method
150  returns the names, aliases of the tables to use for queries
151  Returntype : list of listrefs of strings
152  Exceptions : none
153  Caller : internal
154  Status : Stable
155 
156 =cut
157 
158 sub _tables {
159  my $self = shift;
160 
161  return ['simple_feature', 'sf'];
162 }
163 
164 
165 =head2 _columns
166 
167  Arg [1] : none
168  Example : none
169  Description: PROTECTED implementation of superclass abstract method
170  returns a list of columns to use for queries
171  Returntype : list of strings
172  Exceptions : none
173  Caller : internal
174  Status : Stable
175 
176 =cut
177 
178 sub _columns {
179  my $self = shift;
180 
181  return qw( sf.simple_feature_id
182  sf.seq_region_id sf.seq_region_start sf.seq_region_end
183  sf.seq_region_strand sf.display_label sf.analysis_id sf.score );
184 }
185 
186 
187 =head2 _objs_from_sth
188 
189  Arg [1] : hash reference $hashref
190  Example : none
191  Description: PROTECTED implementation of superclass abstract method.
192  creates SimpleFeatures from an executed DBI statement handle.
193  Returntype : list reference to Bio::EnsEMBL::SimpleFeature objects
194  Exceptions : none
195  Caller : internal
196  Status : Stable
197 
198 =cut
199 
200 sub _objs_from_sth {
201  my ($self, $sth, $mapper, $dest_slice) = @_;
202 
203  #
204  # This code is ugly because an attempt has been made to remove as many
205  # function calls as possible for speed purposes. Thus many caches and
206  # a fair bit of gymnastics is used.
207  #
208 
209  my $sa = $self->db()->get_SliceAdaptor();
210  my $aa = $self->db()->get_AnalysisAdaptor();
211 
212  my @features;
213  my %analysis_hash;
214  my %slice_hash;
215  my %sr_name_hash;
216  my %sr_cs_hash;
217 
218  my(
219  $simple_feature_id, $seq_region_id, $seq_region_start,
220  $seq_region_end, $seq_region_strand, $display_label,
221  $analysis_id, $score);
222 
223  $sth->bind_columns(\(
224  $simple_feature_id, $seq_region_id, $seq_region_start,
225  $seq_region_end, $seq_region_strand, $display_label,
226  $analysis_id, $score));
227 
228  my $dest_slice_start;
229  my $dest_slice_end;
230  my $dest_slice_strand;
231  my $dest_slice_length;
232  my $dest_slice_cs;
233  my $dest_slice_sr_name;
234  my $dest_slice_sr_id;
235  my $asma;
236 
237  if ($dest_slice) {
238  $dest_slice_start = $dest_slice->start();
239  $dest_slice_end = $dest_slice->end();
240  $dest_slice_strand = $dest_slice->strand();
241  $dest_slice_length = $dest_slice->length();
242  $dest_slice_cs = $dest_slice->coord_system();
243  $dest_slice_sr_name = $dest_slice->seq_region_name();
244  $dest_slice_sr_id = $dest_slice->get_seq_region_id();
245  $asma = $self->db->get_AssemblyMapperAdaptor();
246  }
247 
248  FEATURE: while($sth->fetch()) {
249 
250  #get the analysis object
251  my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
252  $analysis_hash{$analysis_id} = $analysis;
253 
254  #need to get the internal_seq_region, if present
255  $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
256  my $slice = $slice_hash{"ID:".$seq_region_id};
257 
258  if (!$slice) {
259  $slice = $sa->fetch_by_seq_region_id($seq_region_id);
260  $slice_hash{"ID:".$seq_region_id} = $slice;
261  $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
262  $sr_cs_hash{$seq_region_id} = $slice->coord_system();
263  }
264 
265  #obtain a mapper if none was defined, but a dest_seq_region was
266  if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
267  $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
268  }
269 
270  my $sr_name = $sr_name_hash{$seq_region_id};
271  my $sr_cs = $sr_cs_hash{$seq_region_id};
272 
273  #
274  # remap the feature coordinates to another coord system
275  # if a mapper was provided
276  #
277 
278  if ($mapper) {
279 
280  if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
281  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
282  $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
283 
284  } else {
285  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
286  $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs);
287  }
288 
289  #skip features that map to gaps or coord system boundaries
290  next FEATURE if (!defined($seq_region_id));
291 
292  #get a slice in the coord system we just mapped to
293  $slice = $slice_hash{"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
294  }
295 
296  #
297  # If a destination slice was provided convert the coords.
298  #
299  if (defined($dest_slice)) {
300  my $seq_region_len = $dest_slice->seq_region_length();
301 
302  if ( $dest_slice_strand == 1 ) {
303  $seq_region_start = $seq_region_start - $dest_slice_start + 1;
304  $seq_region_end = $seq_region_end - $dest_slice_start + 1;
305 
306  if ( $dest_slice->is_circular ) {
307  # Handle circular chromosomes.
308 
309  if ( $seq_region_start > $seq_region_end ) {
310  # Looking at a feature overlapping the chromosome origin.
311 
312  if ( $seq_region_end > $dest_slice_start ) {
313  # Looking at the region in the beginning of the chromosome
314  $seq_region_start -= $seq_region_len;
315  }
316  if ( $seq_region_end < 0 ) {
317  $seq_region_end += $seq_region_len;
318  }
319  } else {
320  if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
321  # Looking at the region overlapping the chromosome
322  # origin and a feature which is at the beginning of the
323  # chromosome.
324  $seq_region_start += $seq_region_len;
325  $seq_region_end += $seq_region_len;
326  }
327  }
328  }
329  } else {
330 
331  my $start = $dest_slice_end - $seq_region_end + 1;
332  my $end = $dest_slice_end - $seq_region_start + 1;
333 
334  if ($dest_slice->is_circular()) {
335 
336  if ($dest_slice_start > $dest_slice_end) {
337  # slice spans origin or replication
338 
339  if ($seq_region_start >= $dest_slice_start) {
340  $end += $seq_region_len;
341  $start += $seq_region_len if $seq_region_end > $dest_slice_start;
342 
343  } elsif ($seq_region_start <= $dest_slice_end) {
344  # do nothing
345  } elsif ($seq_region_end >= $dest_slice_start) {
346  $start += $seq_region_len;
347  $end += $seq_region_len;
348 
349  } elsif ($seq_region_end <= $dest_slice_end) {
350  $end += $seq_region_len if $end < 0;
351 
352  } elsif ($seq_region_start > $seq_region_end) {
353  $end += $seq_region_len;
354  }
355 
356  } else {
357 
358  if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
359  # do nothing
360  } elsif ($seq_region_start > $seq_region_end) {
361  if ($seq_region_start <= $dest_slice_end) {
362  $start -= $seq_region_len;
363  } elsif ($seq_region_end >= $dest_slice_start) {
364  $end += $seq_region_len;
365  }
366  }
367  }
368  }
369 
370  $seq_region_start = $start;
371  $seq_region_end = $end;
372  $seq_region_strand *= -1;
373 
374  } ## end else [ if ( $dest_slice_strand...)]
375 
376  # Throw away features off the end of the requested slice or on
377  # different seq_region.
378  if ($seq_region_end < 1
379  || $seq_region_start > $dest_slice_length
380  || ($dest_slice_sr_id != $seq_region_id)) {
381  next FEATURE;
382  }
383  $slice = $dest_slice;
384  }
385 
386  push( @features,
387  $self->_create_feature_fast(
388  'Bio::EnsEMBL::SimpleFeature', {
389  'start' => $seq_region_start,
390  'end' => $seq_region_end,
391  'strand' => $seq_region_strand,
392  'slice' => $slice,
393  'analysis' => $analysis,
394  'adaptor' => $self,
395  'dbID' => $simple_feature_id,
396  'display_label' => $display_label,
397  'score' => $score
398  } ) );
399 
400  }
401 
402  return \@features;
403 }
404 
405 
406 =head2 list_dbIDs
407 
408  Arg [1] : none
409  Example : @feature_ids = @{$simple_feature_adaptor->list_dbIDs()};
410  Description: Gets an array of internal ids for all simple features in the current db
411  Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
412  Returntype : list of ints
413  Exceptions : none
414  Caller : ?
415  Status : Stable
416 
417 =cut
418 
419 sub list_dbIDs {
420  my ($self, $ordered) = @_;
421 
422  return $self->_list_dbIDs("simple_feature", undef, $ordered);
423 }
424 
425 1;
Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
Definition: BaseFeatureAdaptor.pm:24
Bio::EnsEMBL::Feature
Definition: Feature.pm:47
Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor
Definition: SimpleFeatureAdaptor.pm:31
Bio::EnsEMBL::SimpleFeature
Definition: SimpleFeature.pm:31
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68