3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
37 $mfa = $database_adaptor->get_MiscFeatureAdaptor();
39 # retrieve a misc feature by its dbID
42 # retrieve all misc features in a given region
43 my @misc_feats = @{ $mfa->fetch_all_by_Slice($slice) };
45 # retrieve all misc features in a given region with a given set code
47 @{ $mfa->fetch_all_by_Slice_and_set_code(
'cloneset') };
49 # store some misc features in the database
50 $mfa->store(@misc_features);
54 This is an adaptor
for the retrieval and storage of MiscFeatures.
55 Misc Features are extremely
generic features that can be added with
56 minimal effort to the database. Currently misc features are used to
57 describe the locations of clone sets and tiling path information,
58 but arbitrary features can be stored. Misc features are grouped
59 into sets and can be fetched according to their grouping
using the
60 fetch_all_by_Slice_and_set_code and fetch_all_by_set_code methods.
61 MiscFeatures may belong to more than one set.
67 package Bio::EnsEMBL::DBSQL::MiscFeatureAdaptor;
82 =head2 fetch_all_by_Slice_and_set_code
85 A slice representing the region to fetch from
86 Arg [2...] :
string $set_code
87 The code of the set to retrieve features from
88 Example : @feats = @{$mfa->fetch_all_by_Slice_and_set_code(
'cloneset')};
89 Description: Retrieves a set of MiscFeatures which have a particular set code
90 and which lie in a particular region. All features with the
91 provide set code and which overlap the given slice are returned.
92 Returntype : listref of Bio::EnsEMBL::MiscFeatures
93 Exceptions :
throw if set_code is not provided
94 warning
if no set
for provided set code exists
100 sub fetch_all_by_Slice_and_set_code {
104 throw(
'Set code argument is required.') unless @_;
106 my $msa = $self->db->get_MiscSetAdaptor();
109 foreach my $set_code (@_) {
110 my $set = $msa->fetch_by_code($set_code);
112 $max_len = $set->longest_feature
if $set->longest_feature > $max_len;
113 push @sets, $set->dbID;
115 warning(
"No misc_set with code [$set_code] exists");
120 $constraint =
" mfms.misc_set_id in ( @{[join ',',@sets]} ) ";
121 } elsif( @sets == 1 ) {
122 $constraint =
" mfms.misc_set_id = $sets[0] ";
127 $self->_max_feature_length($max_len);
129 my $results = $self->fetch_all_by_Slice_constraint($slice, $constraint);
131 $self->_max_feature_length(undef);
138 =head2 fetch_all_by_attribute_type_value
140 Arg [1] :
string $attrib_type_code
141 The code of the attribute type to fetch features
for
142 Arg [2] : (optional)
string $attrib_value
143 The value of the attribute to fetch features
for
145 #get all misc features that have an embl
accession
146 @feats = @{$mfa->fetch_all_by_attrib_type_value(
'embl_acc')};
147 #get the misc feature with synonym 'AL014121'
148 ($feat)=@{$mfa->fetch_all_by_attrib_type_value(
'synonym',
'AL014121');
149 Description: Retrieves MiscFeatures which have a particular attribute.
150 If the attribute value argument is also provided only
151 features which have the attribute AND a particular value
152 are returned. The features are returned in their native
153 coordinate system (i.e. the coordinate system that they
155 Returntype : listref of Bio::EnsEMBL::MiscFeatures
156 Exceptions :
throw if attrib_type code arg is not provided
162 sub fetch_all_by_attribute_type_value {
164 my $attrib_type_code = shift;
165 my $attrib_value = shift;
167 throw(
"Attrib type code argument is required.")
168 if ( !$attrib_type_code );
170 # Need to do 2 queries so that all of the ids come back with the
171 # features. The problem with adding attrib constraints to filter the
172 # misc_features which come back is that not all of the attributes will
183 WHERE ma.attrib_type_id = at.attrib_type_id
185 AND ma.misc_feature_id = mf.misc_feature_id
186 AND mf.seq_region_id = sr.seq_region_id
187 AND sr.coord_system_id = cs.coord_system_id
188 AND cs.species_id = ?);
191 $sql .=
" AND ma.value = ?";
194 my $sth = $self->prepare($sql);
196 $sth->bind_param( 1, $attrib_type_code, SQL_VARCHAR );
197 $sth->bind_param( 2, $self->species_id(), SQL_INTEGER );
199 $sth->bind_param( 3, $attrib_value, SQL_VARCHAR );
204 my @ids =
map { $_->[0] } @{ $sth->fetchall_arrayref() };
208 # Construct constraints from the list of ids. Split ids into groups
209 # of 1000 to ensure that the query is not too big.
212 my @subset = splice( @ids, 0, 1000 );
213 if ( @subset == 1 ) {
214 push @constraints,
"mf.misc_feature_id = $subset[0]";
216 my $id_str = join(
',', @subset );
217 push @constraints,
"mf.misc_feature_id in ($id_str)";
222 foreach my $constraint (@constraints) {
223 push @results, @{ $self->generic_fetch($constraint) };
227 } ## end sub fetch_all_by_attribute_type_value
230 =head2 fetch_by_attribute_set_value
232 Arg [1] :
string $attrib_type_code
233 The code of the attribute type to fetch features
for
234 Arg [2] : (optional)
string $attrib_value
235 The value of the attribute to fetch features
for
236 Arg [3] : (optional)
string $misc_set
237 The name of the set to which the feature belongs
239 $feat = $mfa->fetch_by_attribute_set_value(
'clone',
'RP11-411G9',
'tilepath');
240 # Get the clone belonging to the tilepath
241 Description: Retrieves MiscFeatures which have a particular attribute.
242 If the attribute value argument is also provided only
243 features which have the attribute AND a particular value
244 are returned. The features are returned in their native
245 coordinate system (i.e. the coordinate system that they
247 Returntype : listref of Bio::EnsEMBL::MiscFeatures
248 Exceptions :
throw if attrib_type code arg is not provided
254 sub fetch_by_attribute_set_value {
256 my $attrib_type_code = shift;
257 my $attrib_value = shift;
258 my $misc_set = shift;
260 throw(
"Attrib type code argument is required.")
261 if ( !$attrib_type_code );
263 # Need to do 2 queries so that all of the ids come back with the
264 # features. The problem with adding attrib constraints to filter the
265 # misc_features which come back is that not all of the attributes will
274 misc_feature_misc_set mfs,
278 WHERE ma.attrib_type_id = at.attrib_type_id
280 AND ma.misc_feature_id = mf.misc_feature_id
281 AND mf.misc_feature_id = mfs.misc_feature_id
282 AND mfs.misc_set_id = ms.misc_set_id
283 AND mf.seq_region_id = sr.seq_region_id
284 AND sr.coord_system_id = cs.coord_system_id
287 AND cs.species_id = ?);
289 my $sth = $self->prepare($sql);
291 $sth->bind_param( 1, $attrib_type_code, SQL_VARCHAR );
292 $sth->bind_param( 2, $attrib_value, SQL_VARCHAR );
293 $sth->bind_param( 3, $misc_set, SQL_VARCHAR );
294 $sth->bind_param( 4, $self->species_id(), SQL_INTEGER );
298 my ($id) = $sth->fetchrow_array();
306 my $constraint =
"mf.misc_feature_id = $id";
308 my ($result) = @{$self->generic_fetch($constraint)};
311 } ## end sub fetch_by_attribute_set_value
318 # Description: PROTECTED Implementation of abstract superclass method to
319 # provide the name of the tables to query
320 # Returntype : string
328 return ([
'misc_feature',
'mf'],
329 [
'misc_feature_misc_set',
'mfms'],
330 [
'misc_attrib',
'ma'],
331 [
'attrib_type',
'at']);
339 # Description: PROTECTED Implementation of abstract superclass method to
340 # provide the name of the columns to query
341 # Returntype : list of strings
348 #warning _objs_from_sth implementation depends on ordering
349 return qw (mf.misc_feature_id
363 # _default_where_clause
367 # Description: Overrides superclass method to provide an additional
368 # table joining constraint before the SQL query is performed.
369 # Returntype : string
371 # Caller : generic_fetch
373 sub _default_where_clause {
384 [
'misc_feature_misc_set',
'mf.misc_feature_id = mfms.misc_feature_id'],
385 [
'misc_attrib',
'mf.misc_feature_id = ma.misc_feature_id'],
386 [
'attrib_type',
'ma.attrib_type_id = at.attrib_type_id']);
393 return " ORDER BY mf.misc_feature_id";
399 # Arg [1] : StatementHandle $sth
401 # Description: PROTECTED implementation of abstract superclass method.
402 # responsible for the creation of MiscFeatures from a
403 # hashref generated from an SQL query
404 # Returntype : listref of Bio::EnsEMBL::MiscFeatures
409 my ($self, $sth, $mapper, $dest_slice) = @_;
412 # This code is ugly because an attempt has been made to remove as many
413 # function calls as possible for speed purposes. Thus many caches and
414 # a fair bit of gymnastics is used.
417 my $sa = $self->db()->get_SliceAdaptor();
418 my $msa = $self->db()->get_MiscSetAdaptor();
427 $misc_feature_id, $seq_region_id, $seq_region_start,
428 $seq_region_end, $seq_region_strand,
429 $attrib_value, $attrib_type_code, $misc_set_id,
430 $attrib_type_name, $attrib_type_description );
432 $sth->bind_columns( \$misc_feature_id, \$seq_region_id, \$seq_region_start,
433 \$seq_region_end, \$seq_region_strand,
434 \$attrib_value, \$attrib_type_code,\$misc_set_id,
435 \$attrib_type_name, \$attrib_type_description );
437 my $dest_slice_start;
439 my $dest_slice_strand;
440 my $dest_slice_length;
442 my $dest_slice_sr_name;
443 my $dest_slice_sr_id;
447 $dest_slice_start = $dest_slice->start();
448 $dest_slice_end = $dest_slice->end();
449 $dest_slice_strand = $dest_slice->strand();
450 $dest_slice_length = $dest_slice->length();
451 $dest_slice_cs = $dest_slice->coord_system();
452 $dest_slice_sr_name = $dest_slice->seq_region_name();
453 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
454 $asma = $self->db->get_AssemblyMapperAdaptor();
464 FEATURE:
while($sth->fetch()) {
465 #if this feature is not being used, skip all rows related to it
466 next
if($throw_away == $misc_feature_id);
468 if ($current == $misc_feature_id) {
469 #still working on building up attributes and sets for current feature
471 #if there is a misc_set, add it to the current feature
473 my $misc_set = $ms_hash{$misc_set_id} ||=
474 $msa->fetch_by_dbID($misc_set_id);
475 if ( ! exists $feat_misc_sets->{$misc_set->{
'code'}} ) {
476 $feat->add_MiscSet( $misc_set );
477 $feat_misc_sets->{$misc_set->{
'code'}} = $misc_set;
481 #if there is a new attribute add it to the current feature
482 if ($attrib_value && $attrib_type_code && !$seen_attribs->{
"$attrib_type_code:$attrib_value"}) {
484 ( -CODE => $attrib_type_code,
485 -NAME => $attrib_type_name,
486 -DESC => $attrib_type_description,
487 -VALUE => $attrib_value
491 $feat_attribs ||= [];
492 push @$feat_attribs, $attrib;
493 $seen_attribs->{
"$attrib_type_code:$attrib_value"} = 1;
498 #start working on a new feature, discard references to last one
501 $feat_misc_sets = {};
505 $current = $misc_feature_id;
506 #need to get the internal_seq_region, if present
507 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
508 my $slice = $slice_hash{
"ID:".$seq_region_id};
511 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
512 $slice_hash{
"ID:".$seq_region_id} = $slice;
513 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
514 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
517 if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
518 $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
521 my $sr_name = $sr_name_hash{$seq_region_id};
522 my $sr_cs = $sr_cs_hash{$seq_region_id};
525 # remap the feature coordinates to another coord system
526 # if a mapper was provided
530 if (defined $dest_slice && $mapper->isa(
'Bio::EnsEMBL::ChainedAssemblyMapper') ) {
531 ($seq_region_id, $seq_region_start,$seq_region_end, $seq_region_strand) =
532 $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
535 ($seq_region_id, $seq_region_start,$seq_region_end, $seq_region_strand) =
536 $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,$seq_region_strand, $sr_cs);
539 #skip features that map to gaps or coord system boundaries
540 if(!defined($seq_region_id)) {
541 $throw_away = $misc_feature_id;
545 #get a slice in the coord system we just mapped to
546 $slice = $slice_hash{
"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
550 # If a destination slice was provided convert the coords
553 my $seq_region_len = $dest_slice->seq_region_length();
555 if ($dest_slice_strand == 1) { # Positive strand
556 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
557 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
559 if ($dest_slice->is_circular()) {
560 # Handle cicular chromosomes.
562 if ($seq_region_start > $seq_region_end) {
563 # Looking at a feature overlapping the chromsome origin.
565 if ($seq_region_end > $dest_slice_start) {
566 # Looking at the region in the beginning of the chromosome.
567 $seq_region_start -= $seq_region_len;
569 if ($seq_region_end < 0) {
570 $seq_region_end += $seq_region_len;
573 if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
574 # Looking at the region overlapping the chromosome
575 # origin and a feature which is at the beginning of the
577 $seq_region_start += $seq_region_len;
578 $seq_region_end += $seq_region_len;
581 } ## end
if ($dest_slice->is_circular...)
582 }
else { # Negative strand
584 my $start = $dest_slice_end - $seq_region_end + 1;
585 my $end = $dest_slice_end - $seq_region_start + 1;
587 if ($dest_slice->is_circular()) {
589 if ($dest_slice_start > $dest_slice_end) {
590 # slice spans origin or replication
592 if ($seq_region_start >= $dest_slice_start) {
593 $end += $seq_region_len;
594 $start += $seq_region_len
if $seq_region_end > $dest_slice_start;
596 } elsif ($seq_region_start <= $dest_slice_end) {
598 } elsif ($seq_region_end >= $dest_slice_start) {
599 $start += $seq_region_len;
600 $end += $seq_region_len;
602 } elsif ($seq_region_end <= $dest_slice_end) {
603 $end += $seq_region_len
if $end < 0;
605 } elsif ($seq_region_start > $seq_region_end) {
606 $end += $seq_region_len;
610 if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
612 } elsif ($seq_region_start > $seq_region_end) {
613 if ($seq_region_start <= $dest_slice_end) {
614 $start -= $seq_region_len;
615 } elsif ($seq_region_end >= $dest_slice_start) {
616 $end += $seq_region_len;
622 $seq_region_start = $start;
623 $seq_region_end = $end;
624 $seq_region_strand *= -1;
626 } ## end
else [
if ($dest_slice_strand...)]
628 #throw away features off the end of the requested slice
629 if ($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
630 $dest_slice_sr_id != $seq_region_id) {
631 #flag this feature as one to throw away
632 $throw_away = $misc_feature_id;
636 $slice = $dest_slice;
640 if ($attrib_value && $attrib_type_code) {
642 ( -CODE => $attrib_type_code,
643 -NAME => $attrib_type_name,
644 -DESC => $attrib_type_description,
645 -VALUE => $attrib_value
647 $feat_attribs = [$attrib];
648 $seen_attribs->{
"$attrib_type_code:$attrib_value"} = 1;
652 $self->_create_feature_fast(
'Bio::EnsEMBL::MiscFeature', {
653 'start' => $seq_region_start,
654 'end' => $seq_region_end,
655 'strand' => $seq_region_strand,
658 'dbID' => $misc_feature_id,
659 'attributes' => $feat_attribs ||= []
662 push @features, $feat;
665 #get the misc_set object
666 my $misc_set = $ms_hash{$misc_set_id} ||=
667 $msa->fetch_by_dbID($misc_set_id);
668 if ( ! exists $feat_misc_sets->{$misc_set->{
'code'}} ) {
669 $feat->add_MiscSet( $misc_set );
670 $feat_misc_sets->{$misc_set->{
'code'}} = $misc_set;
684 Example : @feature_ids = @{$misc_feature_adaptor->list_dbIDs()};
685 Description: Gets an array of
internal ids
for all misc_features in the
687 Arg[1] : <optional>
int. not 0
for the ids to be sorted by the seq_region.
688 Returntype : list of ints
696 my ($self,$ordered) = @_;
698 return $self->_list_dbIDs(
"misc_feature",undef,$ordered);
704 Arg [1] : list of Bio::EnsEMBL::MiscFeatures @misc_features
705 Example : $misc_feature_adaptor->store(@misc_features);
706 Description: Stores a list of MiscFeatures in
this database. The stored
707 features will have their
709 Exceptions :
throw on invalid arguments
710 warning
if misc feature is already stored in
this database
711 throw if start/end/strand attribs are not valid
719 my @misc_features = @_;
721 my $db = $self->db();
723 my $feature_sth = $self->prepare(
724 q{INSERT INTO misc_feature (
729 ) VALUES (?, ?, ?, ?)
732 my $insert_ignore = $self->insert_ignore_clause();
733 my $feature_set_sth = $self->prepare(
734 qq{${insert_ignore} INTO misc_feature_misc_set (
740 my $msa = $db->get_MiscSetAdaptor();
741 my $aa = $db->get_AttributeAdaptor();
744 foreach my $mf (@misc_features) {
745 if(!ref($mf) || !$mf->isa(
'Bio::EnsEMBL::MiscFeature')) {
746 throw(
"List of MiscFeature arguments expeceted");
749 if($mf->is_stored($db)) {
750 warning(
"MiscFeature [" .$mf->dbID.
"] is already stored in database.");
754 # do some checking of the start/end and convert to seq_region coords
757 ($mf, $seq_region_id) = $self->_pre_store($mf);
759 # store the actual MiscFeature
760 $feature_sth->bind_param(1,$seq_region_id,SQL_INTEGER);
761 $feature_sth->bind_param(2,$mf->start,SQL_INTEGER);
762 $feature_sth->bind_param(3,$mf->end,SQL_INTEGER);
763 $feature_sth->bind_param(4,$mf->strand,SQL_TINYINT);
764 $feature_sth->execute();
766 my $dbID = $self->last_insert_id(
'misc_feature_id', undef,
'misc_feature');
771 # store all the attributes
772 my $attribs = $mf->get_all_Attributes();
773 $aa->store_on_MiscFeature($mf, $attribs);
775 # store all the sets that have not been stored yet
776 my $sets = $mf->get_all_MiscSets();
777 foreach my $set (@$sets) {
778 $msa->store($set)
if(!$set->is_stored($db));
780 # update the misc_feat_misc_set table to store the set relationship
781 $feature_set_sth->bind_param(1,$dbID,SQL_INTEGER);
782 $feature_set_sth->bind_param(2,$set->dbID,SQL_INTEGER);
784 $feature_set_sth->execute();