ensembl-hive  2.7.0
SimpleFeatureAdaptor.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
34 
35 =head1 SYNOPSIS
36 
37  my $registry = 'Bio::EnsEMBL::Registry';
38 
39  $registry->
40  load_registry_from_db( ...
41 
42  my $sfa =
43  $registry->get_adaptor('homo sapiens', 'core', 'SimpleFeature');
44 
45  print ref($sfa), "\n";
46 
47  my $sf_aref =
48  $sfa->fetch_all;
49 
50  print scalar @$sf_aref, "\n";
51 
52 =head1 DESCRIPTION
53 
54 Simple Feature Adaptor - database access for simple features
55 
56 =head1 METHODS
57 
58 =cut
59 
60 package Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor;
61 use vars qw(@ISA);
62 use strict;
63 
66 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
67 
69 
70 
71 =head2 store
72 
73  Arg [1] : list of Bio::EnsEMBL::SimpleFeatures @sf
74  the simple features to store in the database
75  Example : $simple_feature_adaptor->store(@simple_feats);
76  Description: Stores a list of simple feature objects in the database
77  Returntype : none
78  Exceptions : thrown if @sf is not defined, if any of the features do not
79  have an attached slice.
80  or if any elements of @sf are not Bio::EnsEMBL::SimpleFeatures
81  Caller : general
82  Status : Stable
83 
84 =cut
85 
86 sub store{
87  my ($self,@sf) = @_;
88 
89  if( scalar(@sf) == 0 ) {
90  throw("Must call store with list of SimpleFeatures");
91  }
92 
93  my $sth = $self->prepare
94  ("INSERT INTO simple_feature (seq_region_id, seq_region_start, " .
95  "seq_region_end, seq_region_strand, " .
96  "display_label, analysis_id, score) " .
97  "VALUES (?,?,?,?,?,?,?)");
98 
99  my $db = $self->db();
100  my $analysis_adaptor = $db->get_AnalysisAdaptor();
101 
102  FEATURE: foreach my $sf ( @sf ) {
103 
104  if( !ref $sf || !$sf->isa("Bio::EnsEMBL::SimpleFeature") ) {
105  throw("SimpleFeature must be an Ensembl SimpleFeature, " .
106  "not a [".ref($sf)."]");
107  }
108 
109  if($sf->is_stored($db)) {
110  warning("SimpleFeature [".$sf->dbID."] is already stored" .
111  " in this database.");
112  next FEATURE;
113  }
114 
115  if(!defined($sf->analysis)) {
116  throw("An analysis must be attached to the features to be stored.");
117  }
118 
119  #store the analysis if it has not been stored yet
120  if(!$sf->analysis->is_stored($db)) {
121  $analysis_adaptor->store($sf->analysis());
122  }
123 
124  my $original = $sf;
125  my $seq_region_id;
126  ($sf, $seq_region_id) = $self->_pre_store($sf);
127 
128  $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
129  $sth->bind_param(2,$sf->start,SQL_INTEGER);
130  $sth->bind_param(3,$sf->end,SQL_INTEGER);
131  $sth->bind_param(4,$sf->strand,SQL_TINYINT);
132  $sth->bind_param(5,$sf->display_label,SQL_VARCHAR);
133  $sth->bind_param(6,$sf->analysis->dbID,SQL_INTEGER);
134  $sth->bind_param(7,$sf->score,SQL_DOUBLE);
135 
136  $sth->execute();
137 
138  $original->dbID($self->last_insert_id('simple_feature_id', undef, 'simple_feature'));
139  $original->adaptor($self);
140  }
141 }
142 
143 
144 =head2 _tables
145 
146  Arg [1] : none
147  Example : none
148  Description: PROTECTED implementation of superclass abstract method
149  returns the names, aliases of the tables to use for queries
150  Returntype : list of listrefs of strings
151  Exceptions : none
152  Caller : internal
153  Status : Stable
154 
155 =cut
156 
157 sub _tables {
158  my $self = shift;
159 
160  return ['simple_feature', 'sf'];
161 }
162 
163 
164 =head2 _columns
165 
166  Arg [1] : none
167  Example : none
168  Description: PROTECTED implementation of superclass abstract method
169  returns a list of columns to use for queries
170  Returntype : list of strings
171  Exceptions : none
172  Caller : internal
173  Status : Stable
174 
175 =cut
176 
177 sub _columns {
178  my $self = shift;
179 
180  return qw( sf.simple_feature_id
181  sf.seq_region_id sf.seq_region_start sf.seq_region_end
182  sf.seq_region_strand sf.display_label sf.analysis_id sf.score );
183 }
184 
185 
186 =head2 _objs_from_sth
187 
188  Arg [1] : hash reference $hashref
189  Example : none
190  Description: PROTECTED implementation of superclass abstract method.
191  creates SimpleFeatures from an executed DBI statement handle.
192  Returntype : list reference to Bio::EnsEMBL::SimpleFeature objects
193  Exceptions : none
194  Caller : internal
195  Status : Stable
196 
197 =cut
198 
199 sub _objs_from_sth {
200  my ($self, $sth, $mapper, $dest_slice) = @_;
201 
202  #
203  # This code is ugly because an attempt has been made to remove as many
204  # function calls as possible for speed purposes. Thus many caches and
205  # a fair bit of gymnastics is used.
206  #
207 
208  my $sa = $self->db()->get_SliceAdaptor();
209  my $aa = $self->db()->get_AnalysisAdaptor();
210 
211  my @features;
212  my %analysis_hash;
213  my %slice_hash;
214  my %sr_name_hash;
215  my %sr_cs_hash;
216 
217  my(
218  $simple_feature_id, $seq_region_id, $seq_region_start,
219  $seq_region_end, $seq_region_strand, $display_label,
220  $analysis_id, $score);
221 
222  $sth->bind_columns(\(
223  $simple_feature_id, $seq_region_id, $seq_region_start,
224  $seq_region_end, $seq_region_strand, $display_label,
225  $analysis_id, $score));
226 
227  my $dest_slice_start;
228  my $dest_slice_end;
229  my $dest_slice_strand;
230  my $dest_slice_length;
231  my $dest_slice_cs;
232  my $dest_slice_sr_name;
233  my $dest_slice_sr_id;
234  my $asma;
235 
236  if ($dest_slice) {
237  $dest_slice_start = $dest_slice->start();
238  $dest_slice_end = $dest_slice->end();
239  $dest_slice_strand = $dest_slice->strand();
240  $dest_slice_length = $dest_slice->length();
241  $dest_slice_cs = $dest_slice->coord_system();
242  $dest_slice_sr_name = $dest_slice->seq_region_name();
243  $dest_slice_sr_id = $dest_slice->get_seq_region_id();
244  $asma = $self->db->get_AssemblyMapperAdaptor();
245  }
246 
247  FEATURE: while($sth->fetch()) {
248 
249  #get the analysis object
250  my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
251  $analysis_hash{$analysis_id} = $analysis;
252 
253  #need to get the internal_seq_region, if present
254  $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
255  my $slice = $slice_hash{"ID:".$seq_region_id};
256 
257  if (!$slice) {
258  $slice = $sa->fetch_by_seq_region_id($seq_region_id);
259  $slice_hash{"ID:".$seq_region_id} = $slice;
260  $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
261  $sr_cs_hash{$seq_region_id} = $slice->coord_system();
262  }
263 
264  #obtain a mapper if none was defined, but a dest_seq_region was
265  if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
266  $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
267  }
268 
269  my $sr_name = $sr_name_hash{$seq_region_id};
270  my $sr_cs = $sr_cs_hash{$seq_region_id};
271 
272  #
273  # remap the feature coordinates to another coord system
274  # if a mapper was provided
275  #
276 
277  if ($mapper) {
278 
279  if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
280  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
281  $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
282 
283  } else {
284  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
285  $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs);
286  }
287 
288  #skip features that map to gaps or coord system boundaries
289  next FEATURE if (!defined($seq_region_id));
290 
291  #get a slice in the coord system we just mapped to
292  $slice = $slice_hash{"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
293  }
294 
295  #
296  # If a destination slice was provided convert the coords.
297  #
298  if (defined($dest_slice)) {
299  my $seq_region_len = $dest_slice->seq_region_length();
300 
301  if ( $dest_slice_strand == 1 ) {
302  $seq_region_start = $seq_region_start - $dest_slice_start + 1;
303  $seq_region_end = $seq_region_end - $dest_slice_start + 1;
304 
305  if ( $dest_slice->is_circular ) {
306  # Handle circular chromosomes.
307 
308  if ( $seq_region_start > $seq_region_end ) {
309  # Looking at a feature overlapping the chromosome origin.
310 
311  if ( $seq_region_end > $dest_slice_start ) {
312  # Looking at the region in the beginning of the chromosome
313  $seq_region_start -= $seq_region_len;
314  }
315  if ( $seq_region_end < 0 ) {
316  $seq_region_end += $seq_region_len;
317  }
318  } else {
319  if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
320  # Looking at the region overlapping the chromosome
321  # origin and a feature which is at the beginning of the
322  # chromosome.
323  $seq_region_start += $seq_region_len;
324  $seq_region_end += $seq_region_len;
325  }
326  }
327  }
328  } else {
329 
330  my $start = $dest_slice_end - $seq_region_end + 1;
331  my $end = $dest_slice_end - $seq_region_start + 1;
332 
333  if ($dest_slice->is_circular()) {
334 
335  if ($dest_slice_start > $dest_slice_end) {
336  # slice spans origin or replication
337 
338  if ($seq_region_start >= $dest_slice_start) {
339  $end += $seq_region_len;
340  $start += $seq_region_len if $seq_region_end > $dest_slice_start;
341 
342  } elsif ($seq_region_start <= $dest_slice_end) {
343  # do nothing
344  } elsif ($seq_region_end >= $dest_slice_start) {
345  $start += $seq_region_len;
346  $end += $seq_region_len;
347 
348  } elsif ($seq_region_end <= $dest_slice_end) {
349  $end += $seq_region_len if $end < 0;
350 
351  } elsif ($seq_region_start > $seq_region_end) {
352  $end += $seq_region_len;
353  }
354 
355  } else {
356 
357  if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
358  # do nothing
359  } elsif ($seq_region_start > $seq_region_end) {
360  if ($seq_region_start <= $dest_slice_end) {
361  $start -= $seq_region_len;
362  } elsif ($seq_region_end >= $dest_slice_start) {
363  $end += $seq_region_len;
364  }
365  }
366  }
367  }
368 
369  $seq_region_start = $start;
370  $seq_region_end = $end;
371  $seq_region_strand *= -1;
372 
373  } ## end else [ if ( $dest_slice_strand...)]
374 
375  # Throw away features off the end of the requested slice or on
376  # different seq_region.
377  if ($seq_region_end < 1
378  || $seq_region_start > $dest_slice_length
379  || ($dest_slice_sr_id != $seq_region_id)) {
380  next FEATURE;
381  }
382  $slice = $dest_slice;
383  }
384 
385  push( @features,
386  $self->_create_feature_fast(
387  'Bio::EnsEMBL::SimpleFeature', {
388  'start' => $seq_region_start,
389  'end' => $seq_region_end,
390  'strand' => $seq_region_strand,
391  'slice' => $slice,
392  'analysis' => $analysis,
393  'adaptor' => $self,
394  'dbID' => $simple_feature_id,
395  'display_label' => $display_label,
396  'score' => $score
397  } ) );
398 
399  }
400 
401  return \@features;
402 }
403 
404 
405 =head2 list_dbIDs
406 
407  Arg [1] : none
408  Example : @feature_ids = @{$simple_feature_adaptor->list_dbIDs()};
409  Description: Gets an array of internal ids for all simple features in the current db
410  Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
411  Returntype : list of ints
412  Exceptions : none
413  Caller : ?
414  Status : Stable
415 
416 =cut
417 
418 sub list_dbIDs {
419  my ($self, $ordered) = @_;
420 
421  return $self->_list_dbIDs("simple_feature", undef, $ordered);
422 }
423 
424 1;
Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
Definition: BaseFeatureAdaptor.pm:24
Bio::EnsEMBL::Feature
Definition: Feature.pm:47
Bio::EnsEMBL::DBSQL::SimpleFeatureAdaptor
Definition: SimpleFeatureAdaptor.pm:31
Bio::EnsEMBL::SimpleFeature
Definition: SimpleFeature.pm:31
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68