ensembl-hive  2.8.1
RepeatFeatureAdaptor.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
34 
35 =head1 SYNOPSIS
36 
37  $rfa = $database_adaptor->get_RepeatFeatureAdaptor();
38 
39  my $repeat = $rfa->fetch_by_dbID(1234);
40  my @repeats = @{ $rfa->fetch_all_by_Slice($slice) };
41 
42 =head1 DESCRIPTION
43 
44 This is an adaptor for the retrieval and storage of RepeatFeature
45 objects from the database. Most of the implementation is in the
46 superclass BaseFeatureAdaptor.
47 
48 =head1 METHODS
49 
50 =cut
51 
52 package Bio::EnsEMBL::DBSQL::RepeatFeatureAdaptor;
53 
54 use strict;
57 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
58 use Bio::EnsEMBL::Utils::Scalar qw/wrap_array/;
59 
60 use vars qw(@ISA);
61 
63 
64 
65 =head2 fetch_all_by_Slice
66 
67  Arg [1] : Bio::EnsEMBL::Slice $slice
68  Arg [2] : (optional) string $logic_name
69  Limits RepeatFeatures obtained to those having an Analysis with
70  of the specified logic_name. If no logic name is specified
71  Repeats of all analysis types are retrieved.
72  Arg [3] : (optional) string/array $repeat_type
73  Limits RepeatFeatures obtained to those of specified
74  repeat_type
75  Example : @rfeats = @{$rfa->fetch_all_by_Slice($slice, undef, 'Type II Transposons')};
76  @rfeats = @{$rfa->fetch_all_by_Slice($slice, undef, ['Type II Transposons', 'RNA repeats'])};
77  Description: Retrieves repeat features overlapping the area designated by
78  the provided slice argument. Returned features will be in
79  in the same coordinate system as the provided slice and will
80  have coordinates relative to the slice start.
81  Returntype : reference to a list of Bio::EnsEMBL::RepeatFeatures.
82  Exceptions : throw on bad argument
83  Caller : Slice::get_all_RepeatFeatures
84  Status : Stable
85 
86 =cut
87 
88 sub fetch_all_by_Slice {
89  my $self = shift;
90  my $slice = shift;
91  my $logic_name = shift;
92  my $repeat_type = shift;
93 
94  my $constraint = '';
95 
96  # MySQL was optimising the query the incorrect way when joining to
97  # the repeat_consensus table on type
98  $self->_straight_join(1);
99 
100  if($repeat_type) {
101  my $rta = wrap_array($repeat_type);
102  if(scalar(@{$rta}) > 1) {
103  $constraint .= sprintf('rc.repeat_type IN (%s)', join(q{,}, map {"'${_}'"} @{$rta}));
104  }
105  else {
106  $constraint .= "rc.repeat_type = '${repeat_type}'";
107  }
108  }
109 
110  my $result =
111  $self->fetch_all_by_Slice_constraint($slice,$constraint,$logic_name);
112 
113 
114  $self->_straight_join(0);
115 
116  return $result;
117 }
118 
119 
120 # _tablename
121 #
122 # Arg [1] : none
123 # Example : none
124 # Description: PROTECTED Implementation of abstract superclass method to
125 # provide the name of the tables to query
126 # Returntype : string
127 # Exceptions : none
128 # Caller : internal
129 
130 
131 sub _tables {
132  my $self = shift;
133 
134  return (['repeat_feature', 'r'], ['repeat_consensus', 'rc']);
135 }
136 
137 
138 # _columns
139 #
140 # Arg [1] : none
141 # Example : none
142 # Description: PROTECTED Implementation of abstract superclass method to
143 # provide the name of the columns to query
144 # Returntype : list of strings
145 # Exceptions : none
146 # Caller : internal
147 
148 sub _columns {
149  my $self = shift;
150 
151  return qw (r.repeat_feature_id
152  r.seq_region_id
153  r.seq_region_start
154  r.seq_region_end
155  r.seq_region_strand
156  r.repeat_consensus_id
157  r.repeat_start
158  r.repeat_end
159  r.analysis_id
160  r.score
161  rc.repeat_name
162  rc.repeat_class
163  rc.repeat_type
164  rc.repeat_consensus);
165 }
166 
167 
168 # _default_where_clause
169 # Arg [1] : none
170 # Example : none
171 # Description: Overrides superclass method to provide an additional
172 # table joining constraint before the SQL query is performed.
173 # Returntype : string
174 # Exceptions : none
175 # Caller : generic_fetch
176 #
177 
178 sub _default_where_clause {
179  my $self = shift;
180 
181  return 'r.repeat_consensus_id = rc.repeat_consensus_id';
182 }
183 
184 
185 
186 # Description: PROTECTED implementation of abstract superclass method.
187 # responsible for the creation of RepeatFeatures from a
188 # hashref generated from an SQL query
189 
190 sub _objs_from_sth {
191  my ($self, $sth, $mapper, $dest_slice) = @_;
192 
193  #
194  # This code is ugly because an attempt has been made to remove as many
195  # function calls as possible for speed purposes. Thus many caches and
196  # a fair bit of gymnastics is used.
197  #
198 
199  my $rca = $self->db()->get_RepeatConsensusAdaptor();
200  my $sa = $self->db()->get_SliceAdaptor();
201  my $aa = $self->db()->get_AnalysisAdaptor();
202 
203  my @features;
204  my %rc_hash;
205  my %analysis_hash;
206  my %slice_hash;
207  my %sr_name_hash;
208  my %sr_cs_hash;
209 
210  my($repeat_feature_id, $seq_region_id, $seq_region_start, $seq_region_end,
211  $seq_region_strand, $repeat_consensus_id, $repeat_start, $repeat_end,
212  $analysis_id, $score, $repeat_name, $repeat_class, $repeat_type,
213  $repeat_consensus);
214 
215  $sth->bind_columns( \$repeat_feature_id, \$seq_region_id, \$seq_region_start,
216  \$seq_region_end, \$seq_region_strand,
217  \$repeat_consensus_id, \$repeat_start,\$repeat_end,
218  \$analysis_id, \$score, \$repeat_name, \$repeat_class,
219  \$repeat_type, \$repeat_consensus );
220 
221  my $dest_slice_start;
222  my $dest_slice_end;
223  my $dest_slice_strand;
224  my $dest_slice_length;
225  my $dest_slice_cs;
226  my $dest_slice_sr_name;
227  my $dest_slice_sr_id;
228  my $asma;
229 
230  if ($dest_slice) {
231  $dest_slice_start = $dest_slice->start();
232  $dest_slice_end = $dest_slice->end();
233  $dest_slice_strand = $dest_slice->strand();
234  $dest_slice_length = $dest_slice->length();
235  $dest_slice_cs = $dest_slice->coord_system();
236  $dest_slice_sr_name = $dest_slice->seq_region_name();
237  $dest_slice_sr_id = $dest_slice->get_seq_region_id();
238  $asma = $self->db->get_AssemblyMapperAdaptor();
239  }
240 
241  FEATURE: while($sth->fetch()) {
242  #create a repeat consensus object
243 
244  my $rc = $rc_hash{$repeat_consensus_id} ||=
246  ({'dbID' => $repeat_consensus_id,
247  'adaptor' => $rca,
248  'name' => $repeat_name,
249  'repeat_class' => $repeat_class,
250  'repeat_type' => $repeat_type,
251  'repeat_consensus' => $repeat_consensus,
252  'length' => length($repeat_consensus)});
253 
254  #get the analysis object
255  my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
256  $analysis_hash{$analysis_id} = $analysis;
257 
258  #need to get the internal_seq_region, if present
259  $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
260  my $slice = $slice_hash{"ID:".$seq_region_id};
261 
262  if (!$slice) {
263  $slice = $sa->fetch_by_seq_region_id($seq_region_id);
264  $slice_hash{"ID:".$seq_region_id} = $slice;
265  $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
266  $sr_cs_hash{$seq_region_id} = $slice->coord_system();
267  }
268 
269  #obtain a mapper if none was defined, but a dest_seq_region was
270  if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
271  $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
272  }
273 
274  my $sr_name = $sr_name_hash{$seq_region_id};
275  my $sr_cs = $sr_cs_hash{$seq_region_id};
276 
277  #
278  # remap the feature coordinates to another coord system
279  # if a mapper was provided
280  #
281 
282  if ($mapper) {
283 
284  if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
285  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
286  $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
287 
288  } else {
289  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
290  $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs);
291  }
292 
293  #skip features that map to gaps or coord system boundaries
294  next FEATURE if (!defined($seq_region_id));
295 
296  #get a slice in the coord system we just mapped to
297  $slice = $slice_hash{"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
298  }
299 
300  #
301  # If a destination slice was provided convert the coords.
302  #
303  if (defined($dest_slice)) {
304  my $seq_region_len = $dest_slice->seq_region_length();
305 
306  if ( $dest_slice_strand == 1 ) {
307  $seq_region_start = $seq_region_start - $dest_slice_start + 1;
308  $seq_region_end = $seq_region_end - $dest_slice_start + 1;
309 
310  if ( $dest_slice->is_circular ) {
311  # Handle circular chromosomes.
312 
313  if ( $seq_region_start > $seq_region_end ) {
314  # Looking at a feature overlapping the chromosome origin.
315 
316  if ( $seq_region_end > $dest_slice_start ) {
317  # Looking at the region in the beginning of the chromosome
318  $seq_region_start -= $seq_region_len;
319  }
320  if ( $seq_region_end < 0 ) {
321  $seq_region_end += $seq_region_len;
322  }
323  } else {
324  if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
325  # Looking at the region overlapping the chromosome
326  # origin and a feature which is at the beginning of the
327  # chromosome.
328  $seq_region_start += $seq_region_len;
329  $seq_region_end += $seq_region_len;
330  }
331  }
332  }
333  } else {
334 
335  my $start = $dest_slice_end - $seq_region_end + 1;
336  my $end = $dest_slice_end - $seq_region_start + 1;
337 
338  if ($dest_slice->is_circular()) {
339 
340  if ($dest_slice_start > $dest_slice_end) {
341  # slice spans origin or replication
342 
343  if ($seq_region_start >= $dest_slice_start) {
344  $end += $seq_region_len;
345  $start += $seq_region_len if $seq_region_end > $dest_slice_start;
346 
347  } elsif ($seq_region_start <= $dest_slice_end) {
348  # do nothing
349  } elsif ($seq_region_end >= $dest_slice_start) {
350  $start += $seq_region_len;
351  $end += $seq_region_len;
352 
353  } elsif ($seq_region_end <= $dest_slice_end) {
354  $end += $seq_region_len if $end < 0;
355 
356  } elsif ($seq_region_start > $seq_region_end) {
357  $end += $seq_region_len;
358  }
359 
360  } else {
361 
362  if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
363  # do nothing
364  } elsif ($seq_region_start > $seq_region_end) {
365  if ($seq_region_start <= $dest_slice_end) {
366  $start -= $seq_region_len;
367  } elsif ($seq_region_end >= $dest_slice_start) {
368  $end += $seq_region_len;
369  }
370  }
371  }
372  }
373 
374  $seq_region_start = $start;
375  $seq_region_end = $end;
376  $seq_region_strand *= -1;
377 
378  } ## end else [ if ( $dest_slice_strand...)]
379 
380  # Throw away features off the end of the requested slice or on
381  # different seq_region.
382  if ($seq_region_end < 1
383  || $seq_region_start > $dest_slice_length
384  || ($dest_slice_sr_id != $seq_region_id)) {
385  next FEATURE;
386  }
387  $slice = $dest_slice;
388  }
389 
390  # Finally, create the new RepeatFeature.
391  push( @features,
392  $self->_create_feature_fast( 'Bio::EnsEMBL::RepeatFeature', {
393  'dbID' => $repeat_feature_id,
394  'analysis' => $analysis,
395  'start' => $seq_region_start,
396  'end' => $seq_region_end,
397  'strand' => $seq_region_strand,
398  'score' => $score,
399  'hstart' => $repeat_start,
400  'hend' => $repeat_end,
401  'repeat_consensus' => $rc,
402  'adaptor' => $self,
403  'slice' => $slice
404  } ) );
405 
406  }
407 
408  return \@features;
409 }
410 
411 
412 =head2 store
413 
414  Arg [1] : list of Bio::EnsEMBL::RepeatFeatures $repeat_feature_id
415  the list of repeat features to store in the database
416  Example : $repeat_feature_adaptor->store(@repeat_features);
417  Description: stores a repeat feature in the database
418  Returntype : none
419  Exceptions : if the repeat features do not have attached sequences
420  or if repeat_consensus are not present
421  Caller : general
422  Status : Stable
423 
424 =cut
425 
426 sub store {
427  my( $self, @repeats ) = @_;
428 
429  my $db = $self->db();
430  my $rca = $db->get_RepeatConsensusAdaptor();
431  my $sa = $db->get_SliceAdaptor();
432  my ($cons, $db_id);
433 
434  my $sth = $self->prepare(qq{
435  INSERT into repeat_feature( repeat_feature_id
436  , seq_region_id
437  , seq_region_start
438  , seq_region_end
439  , seq_region_strand
440  , repeat_consensus_id
441  , repeat_start
442  , repeat_end
443  , score
444  , analysis_id )
445  VALUES(NULL, ?,?,?,?,?,?,?,?,?)
446  });
447 
448  FEATURE: foreach my $rf (@repeats) {
449  if(!ref($rf) || !$rf->isa('Bio::EnsEMBL::RepeatFeature')) {
450  throw('Expected RepeatFeature argument not [' . ref($rf) .'].');
451  }
452 
453  if($rf->is_stored($db)) {
454  warning("RepeatFeature [".$rf->dbID."] is already stored in this DB.");
455  next FEATURE;
456  }
457 
458  my $cons = $rf->repeat_consensus();
459  throw("Must have a RepeatConsensus attached") if(!defined($cons));
460 
461  # for tandem repeats - simply store consensus and repeat
462  # one pair per hit. don't need to check consensi stored
463  # already. consensus has name and class set to 'trf'
464 
465  if ($cons->repeat_class eq 'trf') {
466 
467  # Look for matches already stored
468  my @match = @{$rca->fetch_all_by_class_seq('trf', $cons->repeat_consensus)};
469  if (@match) {
470  $cons->dbID($match[0]->dbID());
471  }
472  else {
473  $rca->store($cons);
474  }
475 
476  } elsif ($cons->repeat_class eq 'Simple_repeat') {
477 
478  my $rcon = $cons->name;
479  $rcon =~ s/\((\S+)\)n/$1/; # get repeat element
480  $cons->repeat_consensus($rcon);
481 
482  # Look for matches already stored
483  my $match = $rca->fetch_by_name_class($cons->name, 'Simple_repeat');
484  if ($match) {
485  $cons->dbID($match->dbID());
486  }
487  else {
488  $rca->store($cons);
489  }
490 
491  } else {
492 
493  # for other repeats - need to see if a consensus is stored already
494  if(!$cons->dbID) {
495  my $match = ($rca->fetch_by_name($cons->name));
496 
497  if($match) {
498  #set the consensus dbID to be the same as the database one
499  $cons->dbID($match->dbID());
500  } else {
501  # if we don't match a consensus already stored create a fake one
502  # and set consensus to 'N' as null seq not allowed
503  # FIXME: not happy with this, but ho hum ...
504  warning("Can't find " . $cons->name . "\n");
505  $cons->repeat_consensus("N");
506  $rca->store($cons);
507  }
508  }
509 
510  #if (@match > 1) {
511  #multiple consensi were matched
512  # $self->warn(@match . " consensi for " . $cons->name . "\n");
513  #}
514  }
515 
516  my $slice = $rf->slice();
517  if(!ref($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice'))) {
518  throw("RepeatFeature cannot be stored without an associated slice.");
519  }
520 
521  #store the analysis if it has not been stored yet
522  if(!$rf->analysis->is_stored($db)) {
523  $db->get_AnalysisAdaptor->store($rf->analysis());
524  }
525 
526  my $original = $rf;
527  my $seq_region_id;
528  ($rf, $seq_region_id) = $self->_pre_store($rf);
529 
530  $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
531  $sth->bind_param(2,$rf->start,SQL_INTEGER);
532  $sth->bind_param(3,$rf->end,SQL_INTEGER);
533  $sth->bind_param(4,$rf->strand,SQL_TINYINT);
534  $sth->bind_param(5,$rf->repeat_consensus->dbID,SQL_INTEGER);
535  $sth->bind_param(6,$rf->hstart,SQL_INTEGER);
536  $sth->bind_param(7,$rf->hend,SQL_INTEGER);
537  $sth->bind_param(8,$rf->score,SQL_DOUBLE);
538  $sth->bind_param(9,$rf->analysis->dbID,SQL_INTEGER);
539 
540  $sth->execute();
541 
542  my $db_id = $self->last_insert_id('repeat_feature_id', undef, 'repeat_feature')
543  or throw("Didn't get an insertid from the INSERT statement");
544 
545  $original->dbID($db_id);
546  $original->adaptor($self);
547  }
548 }
549 
550 
551 =head2 list_dbIDs
552 
553  Arg [1] : none
554  Example : @feature_ids = @{$repeat_feature_adaptor->list_dbIDs()};
555  Description: Gets an array of internal ids for all repeat features in the current db
556  Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
557  Returntype : list of ints
558  Exceptions : none
559  Caller : ?
560  Status : Stable
561 
562 =cut
563 
564 sub list_dbIDs {
565  my ($self, $ordered) = @_;
566 
567  return $self->_list_dbIDs("repeat_feature", undef, $ordered);
568 }
569 
570 1;
571 
572 
573 
574 
575 
map
public map()
Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
Definition: BaseFeatureAdaptor.pm:24
Bio::EnsEMBL::DBSQL::BaseAdaptor::fetch_by_dbID
public Bio::EnsEMBL::Feature fetch_by_dbID()
Bio::EnsEMBL::DBSQL::RepeatFeatureAdaptor
Definition: RepeatFeatureAdaptor.pm:24
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio::EnsEMBL::Utils::Scalar
Definition: Scalar.pm:66
Bio::EnsEMBL::RepeatFeature
Definition: RepeatFeature.pm:45
Bio::EnsEMBL::Analysis
Definition: Analysis.pm:36
Bio::EnsEMBL::Storable::new_fast
public Instance new_fast()
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68
Bio::EnsEMBL::RepeatConsensus
Definition: RepeatConsensus.pm:5