3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
37 $rfa = $database_adaptor->get_RepeatFeatureAdaptor();
40 my @repeats = @{ $rfa->fetch_all_by_Slice($slice) };
44 This is an adaptor
for the retrieval and storage of
RepeatFeature
45 objects from the database. Most of the implementation is in the
52 package Bio::EnsEMBL::DBSQL::RepeatFeatureAdaptor;
65 =head2 fetch_all_by_Slice
68 Arg [2] : (optional)
string $logic_name
69 Limits RepeatFeatures obtained to those having an
Analysis with
70 of the specified logic_name. If no logic name is specified
71 Repeats of all analysis types are retrieved.
72 Arg [3] : (optional)
string/array $repeat_type
73 Limits RepeatFeatures obtained to those of specified
75 Example : @rfeats = @{$rfa->fetch_all_by_Slice($slice, undef,
'Type II Transposons')};
76 @rfeats = @{$rfa->fetch_all_by_Slice($slice, undef, [
'Type II Transposons',
'RNA repeats'])};
77 Description: Retrieves repeat features overlapping the area designated by
78 the provided slice argument. Returned features will be in
79 in the same coordinate system as the provided slice and will
80 have coordinates relative to the slice start.
81 Returntype : reference to a list of Bio::EnsEMBL::RepeatFeatures.
82 Exceptions :
throw on bad argument
83 Caller : Slice::get_all_RepeatFeatures
88 sub fetch_all_by_Slice {
91 my $logic_name = shift;
92 my $repeat_type = shift;
96 # MySQL was optimising the query the incorrect way when joining to
97 # the repeat_consensus table on type
98 $self->_straight_join(1);
101 my $rta = wrap_array($repeat_type);
102 if(scalar(@{$rta}) > 1) {
103 $constraint .= sprintf(
'rc.repeat_type IN (%s)', join(q{,},
map {
"'${_}'"} @{$rta}));
106 $constraint .=
"rc.repeat_type = '${repeat_type}'";
111 $self->fetch_all_by_Slice_constraint($slice,$constraint,$logic_name);
114 $self->_straight_join(0);
124 # Description: PROTECTED Implementation of abstract superclass method to
125 # provide the name of the tables to query
126 # Returntype : string
134 return ([
'repeat_feature',
'r'], [
'repeat_consensus',
'rc']);
142 # Description: PROTECTED Implementation of abstract superclass method to
143 # provide the name of the columns to query
144 # Returntype : list of strings
151 return qw (r.repeat_feature_id
156 r.repeat_consensus_id
164 rc.repeat_consensus);
168 # _default_where_clause
171 # Description: Overrides superclass method to provide an additional
172 # table joining constraint before the SQL query is performed.
173 # Returntype : string
175 # Caller : generic_fetch
178 sub _default_where_clause {
181 return 'r.repeat_consensus_id = rc.repeat_consensus_id';
186 # Description: PROTECTED implementation of abstract superclass method.
187 # responsible for the creation of RepeatFeatures from a
188 # hashref generated from an SQL query
191 my ($self, $sth, $mapper, $dest_slice) = @_;
194 # This code is ugly because an attempt has been made to remove as many
195 # function calls as possible for speed purposes. Thus many caches and
196 # a fair bit of gymnastics is used.
199 my $rca = $self->db()->get_RepeatConsensusAdaptor();
200 my $sa = $self->db()->get_SliceAdaptor();
201 my $aa = $self->db()->get_AnalysisAdaptor();
210 my($repeat_feature_id, $seq_region_id, $seq_region_start, $seq_region_end,
211 $seq_region_strand, $repeat_consensus_id, $repeat_start, $repeat_end,
212 $analysis_id, $score, $repeat_name, $repeat_class, $repeat_type,
215 $sth->bind_columns( \$repeat_feature_id, \$seq_region_id, \$seq_region_start,
216 \$seq_region_end, \$seq_region_strand,
217 \$repeat_consensus_id, \$repeat_start,\$repeat_end,
218 \$analysis_id, \$score, \$repeat_name, \$repeat_class,
219 \$repeat_type, \$repeat_consensus );
221 my $dest_slice_start;
223 my $dest_slice_strand;
224 my $dest_slice_length;
226 my $dest_slice_sr_name;
227 my $dest_slice_sr_id;
231 $dest_slice_start = $dest_slice->start();
232 $dest_slice_end = $dest_slice->end();
233 $dest_slice_strand = $dest_slice->strand();
234 $dest_slice_length = $dest_slice->length();
235 $dest_slice_cs = $dest_slice->coord_system();
236 $dest_slice_sr_name = $dest_slice->seq_region_name();
237 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
238 $asma = $self->db->get_AssemblyMapperAdaptor();
241 FEATURE:
while($sth->fetch()) {
242 #create a repeat consensus object
244 my $rc = $rc_hash{$repeat_consensus_id} ||=
246 ({
'dbID' => $repeat_consensus_id,
248 'name' => $repeat_name,
249 'repeat_class' => $repeat_class,
250 'repeat_type' => $repeat_type,
251 'repeat_consensus' => $repeat_consensus,
252 'length' => length($repeat_consensus)});
254 #get the analysis object
255 my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
256 $analysis_hash{$analysis_id} = $analysis;
258 #need to get the internal_seq_region, if present
259 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
260 my $slice = $slice_hash{
"ID:".$seq_region_id};
263 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
264 $slice_hash{
"ID:".$seq_region_id} = $slice;
265 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
266 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
269 #obtain a mapper if none was defined, but a dest_seq_region was
270 if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
271 $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
274 my $sr_name = $sr_name_hash{$seq_region_id};
275 my $sr_cs = $sr_cs_hash{$seq_region_id};
278 # remap the feature coordinates to another coord system
279 # if a mapper was provided
284 if (defined $dest_slice && $mapper->isa(
'Bio::EnsEMBL::ChainedAssemblyMapper') ) {
285 ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
286 $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
289 ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
290 $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs);
293 #skip features that map to gaps or coord system boundaries
294 next FEATURE
if (!defined($seq_region_id));
296 #get a slice in the coord system we just mapped to
297 $slice = $slice_hash{
"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
301 # If a destination slice was provided convert the coords.
303 if (defined($dest_slice)) {
304 my $seq_region_len = $dest_slice->seq_region_length();
306 if ( $dest_slice_strand == 1 ) {
307 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
308 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
310 if ( $dest_slice->is_circular ) {
311 # Handle circular chromosomes.
313 if ( $seq_region_start > $seq_region_end ) {
314 # Looking at a feature overlapping the chromosome origin.
316 if ( $seq_region_end > $dest_slice_start ) {
317 # Looking at the region in the beginning of the chromosome
318 $seq_region_start -= $seq_region_len;
320 if ( $seq_region_end < 0 ) {
321 $seq_region_end += $seq_region_len;
324 if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
325 # Looking at the region overlapping the chromosome
326 # origin and a feature which is at the beginning of the
328 $seq_region_start += $seq_region_len;
329 $seq_region_end += $seq_region_len;
335 my $start = $dest_slice_end - $seq_region_end + 1;
336 my $end = $dest_slice_end - $seq_region_start + 1;
338 if ($dest_slice->is_circular()) {
340 if ($dest_slice_start > $dest_slice_end) {
341 # slice spans origin or replication
343 if ($seq_region_start >= $dest_slice_start) {
344 $end += $seq_region_len;
345 $start += $seq_region_len
if $seq_region_end > $dest_slice_start;
347 } elsif ($seq_region_start <= $dest_slice_end) {
349 } elsif ($seq_region_end >= $dest_slice_start) {
350 $start += $seq_region_len;
351 $end += $seq_region_len;
353 } elsif ($seq_region_end <= $dest_slice_end) {
354 $end += $seq_region_len
if $end < 0;
356 } elsif ($seq_region_start > $seq_region_end) {
357 $end += $seq_region_len;
362 if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
364 } elsif ($seq_region_start > $seq_region_end) {
365 if ($seq_region_start <= $dest_slice_end) {
366 $start -= $seq_region_len;
367 } elsif ($seq_region_end >= $dest_slice_start) {
368 $end += $seq_region_len;
374 $seq_region_start = $start;
375 $seq_region_end = $end;
376 $seq_region_strand *= -1;
378 } ## end
else [
if ( $dest_slice_strand...)]
380 # Throw away features off the end of the requested slice or on
381 # different seq_region.
382 if ($seq_region_end < 1
383 || $seq_region_start > $dest_slice_length
384 || ($dest_slice_sr_id != $seq_region_id)) {
387 $slice = $dest_slice;
390 # Finally, create the new RepeatFeature.
392 $self->_create_feature_fast(
'Bio::EnsEMBL::RepeatFeature', {
393 'dbID' => $repeat_feature_id,
394 'analysis' => $analysis,
395 'start' => $seq_region_start,
396 'end' => $seq_region_end,
397 'strand' => $seq_region_strand,
399 'hstart' => $repeat_start,
400 'hend' => $repeat_end,
401 'repeat_consensus' => $rc,
414 Arg [1] : list of Bio::EnsEMBL::RepeatFeatures $repeat_feature_id
415 the list of repeat features to store in the database
416 Example : $repeat_feature_adaptor->store(@repeat_features);
417 Description: stores a repeat feature in the database
419 Exceptions :
if the repeat features
do not have attached sequences
420 or
if repeat_consensus are not present
427 my( $self, @repeats ) = @_;
429 my $db = $self->db();
430 my $rca = $db->get_RepeatConsensusAdaptor();
431 my $sa = $db->get_SliceAdaptor();
434 my $sth = $self->prepare(qq{
435 INSERT into repeat_feature( repeat_feature_id
440 , repeat_consensus_id
445 VALUES(NULL, ?,?,?,?,?,?,?,?,?)
448 FEATURE:
foreach my $rf (@repeats) {
449 if(!ref($rf) || !$rf->isa(
'Bio::EnsEMBL::RepeatFeature')) {
450 throw(
'Expected RepeatFeature argument not [' . ref($rf) .
'].');
453 if($rf->is_stored($db)) {
454 warning(
"RepeatFeature [".$rf->dbID.
"] is already stored in this DB.");
458 my $cons = $rf->repeat_consensus();
459 throw(
"Must have a RepeatConsensus attached")
if(!defined($cons));
461 # for tandem repeats - simply store consensus and repeat
462 # one pair per hit. don't need to check consensi stored
463 # already. consensus has name and class set to 'trf'
465 if ($cons->repeat_class eq
'trf') {
467 # Look for matches already stored
468 my @match = @{$rca->fetch_all_by_class_seq(
'trf', $cons->repeat_consensus)};
470 $cons->dbID($match[0]->dbID());
476 } elsif ($cons->repeat_class eq
'Simple_repeat') {
478 my $rcon = $cons->name;
479 $rcon =~ s/\((\S+)\)n/$1/; # get repeat element
480 $cons->repeat_consensus($rcon);
482 # Look for matches already stored
483 my $match = $rca->fetch_by_name_class($cons->name,
'Simple_repeat');
485 $cons->dbID($match->dbID());
493 # for other repeats - need to see if a consensus is stored already
495 my $match = ($rca->fetch_by_name($cons->name));
498 #set the consensus dbID to be the same as the database one
499 $cons->dbID($match->dbID());
501 # if we don't match a consensus already stored create a fake one
502 # and set consensus to 'N' as null seq not allowed
503 # FIXME: not happy with this, but ho hum ...
504 warning(
"Can't find " . $cons->name .
"\n");
505 $cons->repeat_consensus(
"N");
511 #multiple consensi were matched
512 # $self->warn(@match . " consensi for " . $cons->name . "\n");
516 my $slice = $rf->slice();
517 if(!ref($slice) || !($slice->isa(
"Bio::EnsEMBL::Slice") or $slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
518 throw(
"RepeatFeature cannot be stored without an associated slice.");
521 #store the analysis if it has not been stored yet
522 if(!$rf->analysis->is_stored($db)) {
523 $db->get_AnalysisAdaptor->store($rf->analysis());
528 ($rf, $seq_region_id) = $self->_pre_store($rf);
530 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
531 $sth->bind_param(2,$rf->start,SQL_INTEGER);
532 $sth->bind_param(3,$rf->end,SQL_INTEGER);
533 $sth->bind_param(4,$rf->strand,SQL_TINYINT);
534 $sth->bind_param(5,$rf->repeat_consensus->dbID,SQL_INTEGER);
535 $sth->bind_param(6,$rf->hstart,SQL_INTEGER);
536 $sth->bind_param(7,$rf->hend,SQL_INTEGER);
537 $sth->bind_param(8,$rf->score,SQL_DOUBLE);
538 $sth->bind_param(9,$rf->analysis->dbID,SQL_INTEGER);
542 my $db_id = $self->last_insert_id(
'repeat_feature_id', undef,
'repeat_feature')
543 or
throw(
"Didn't get an insertid from the INSERT statement");
545 $original->dbID($db_id);
546 $original->adaptor($self);
554 Example : @feature_ids = @{$repeat_feature_adaptor->list_dbIDs()};
555 Description: Gets an array of
internal ids
for all repeat features in the current db
556 Arg[1] : <optional>
int. not 0
for the ids to be sorted by the seq_region.
557 Returntype : list of ints
565 my ($self, $ordered) = @_;
567 return $self->_list_dbIDs(
"repeat_feature", undef, $ordered);