3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
37 my $assembly_exception_feature_adaptor =
38 $database_adaptor->get_AssemblyExceptionFeatureAdaptor();
40 @assembly_exception_features =
45 Assembly Exception
Feature Adaptor - database access
for assembly
52 package Bio::EnsEMBL::DBSQL::AssemblyExceptionFeatureAdaptor;
56 no warnings qw(uninitialized);
66 # set the number of slices you'd like to cache
67 our $ASSEMBLY_EXCEPTION_FEATURE_CACHE_SIZE = 100;
71 Arg [1] : list of args @args
72 Superclass constructor arguments
74 Description: Constructor which just initializes
internal cache structures
77 Caller : implementing subclass constructors
84 my $class = ref($caller) || $caller;
86 my $self = $class->SUPER::new(@_);
88 # initialize an LRU cache for slices
90 tie(%cache,
'Bio::EnsEMBL::Utils::Cache',
91 $ASSEMBLY_EXCEPTION_FEATURE_CACHE_SIZE);
93 $self->{
'_aexc_slice_cache'} = \%cache;
101 Example : my @axfs = @{$axfa->fetch_all()};
102 Description: Retrieves all assembly exception features which are in the
103 database and builds
internal caches of the features.
104 Returntype : reference to list of Bio::EnsEMBL::AssemblyExceptionFeatures
106 Caller : fetch_by_dbID, fetch_by_Slice
114 # this is the "global" cache for all assembly exception features in the db
115 if(defined($self->{
'_aexc_cache'})) {
116 return $self->{
'_aexc_cache'};
120 SELECT ae.assembly_exception_id,
125 ae.exc_seq_region_id,
126 ae.exc_seq_region_start,
127 ae.exc_seq_region_end,
129 FROM assembly_exception ae,
132 WHERE cs.species_id = ?
133 AND sr.coord_system_id = cs.coord_system_id
134 AND sr.seq_region_id = ae.seq_region_id);
136 my $sth = $self->prepare($statement);
138 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
142 my ($ax_id, $sr_id, $sr_start, $sr_end,
143 $x_type, $x_sr_id, $x_sr_start, $x_sr_end, $ori);
145 $sth->bind_columns(\$ax_id, \$sr_id, \$sr_start, \$sr_end,
146 \$x_type, \$x_sr_id, \$x_sr_start, \$x_sr_end, \$ori);
149 my $sa = $self->db()->get_SliceAdaptor();
151 $self->{
'_aexc_dbID_cache'} = {};
153 while($sth->fetch()) {
154 my $slice = $sa->fetch_by_seq_region_id($sr_id);
155 my $x_slice = $sa->fetch_by_seq_region_id($x_sr_id);
157 # each row creates TWO features, each of which has alternate_slice
158 # pointing to the "other" one
163 '-start' => $sr_start,
168 '-alternate_slice' => $x_slice->sub_Slice($x_sr_start, $x_sr_end),
172 $self->{
'_aexc_dbID_cache'}->{$ax_id} = $a;
176 '-start' => $x_sr_start,
180 '-slice' => $x_slice,
181 '-alternate_slice' => $slice->sub_Slice($sr_start, $sr_end),
182 '-type' =>
"$x_type REF" );
187 $self->{
'_aexc_cache'} = \@features;
196 Example : my $axf = $axfa->fetch_by_dbID(3);
197 Description: Retrieves a single assembly exception feature via its
internal
198 identifier. Note that
this only retrieves one of the two
199 assembly exception features which are represented by a single
200 row in the assembly_exception table.
212 if(!exists($self->{
'_aexc_dbID_cache'})) {
213 # force loading of cache
217 return $self->{
'_aexc_dbID_cache'}->{$dbID};
221 =head2 fetch_all_by_Slice
224 Example : my @axfs = @{$axfa->fetch_all_by_Slice($slice)};
225 Description: Retrieves all assembly exception features which overlap the
226 provided slice. The returned features will be in coordinate
228 Returntype : reference to list of Bio::EnsEMBL::AssemblyException features
230 Caller : Feature::get_all_alt_locations, general
235 sub fetch_all_by_Slice {
239 my $key= uc($slice->name());
241 # return features from the slice cache if present
242 if(exists($self->{
'_aexc_slice_cache'}->{$key})) {
243 return $self->{
'_aexc_slice_cache'}->{$key};
246 my $all_features = $self->fetch_all();
248 my $mcc = $self->db()->get_MetaCoordContainer();
249 my $css = $mcc->fetch_all_CoordSystems_by_feature_type(
'assembly_exception');
253 my $ma = $self->db()->get_AssemblyMapperAdaptor();
255 foreach my $cs (@$css) {
257 if($cs->equals($slice->coord_system)) {
260 $mapper = $ma->fetch_by_CoordSystems($cs,$slice->coord_system());
263 push @features, @{ $self->_remap($all_features, $mapper, $slice) };
266 $self->{
'_aexc_slice_cache'}->{$key} = \@features;
273 # Given a list of features checks if they are in the correct coord system
274 # by looking at the first features slice. If they are not then they are
275 # converted and placed on the slice.
277 # Note that this is a re-implementation of a method with the same name in
278 # BaseFeatureAdaptor, and in contrast to the latter which maps features in
279 # place, this method returns a remapped copy of each feature. The reason for
280 # this is to get around conflicts with caching.
283 my ($self, $features, $mapper, $slice) = @_;
285 # check if any remapping is actually needed
286 if(@$features && (!$features->[0]->isa(
'Bio::EnsEMBL::Feature') ||
287 $features->[0]->slice == $slice)) {
290 # remapping has not been done, we have to do our own conversion from
295 my $slice_start = $slice->start();
296 my $slice_end = $slice->end();
297 my $slice_strand = $slice->strand();
298 my $slice_cs = $slice->coord_system();
300 my ($seq_region, $start, $end, $strand, $seq_region_name);
302 my $slice_seq_region = $slice->seq_region_name();
304 foreach my $f (@$features) {
305 # since feats were obtained in contig coords, attached seq is a contig
306 my $fslice = $f->slice();
308 throw(
"Feature does not have attached slice.\n");
310 my $fseq_region = $fslice->seq_region_name();
311 my $fcs = $fslice->coord_system();
312 if(!$slice_cs->equals($fcs)) {
313 # slice of feature in different coord system, mapping required
314 ($seq_region, $start, $end, $strand) =
315 $mapper->fastmap($fseq_region,$f->start(),$f->end(),$f->strand(),$fcs);
316 # undefined start means gap
317 next
if(!defined $start);
319 my $slice_adaptor = $self->db()->get_SliceAdaptor();
320 $seq_region_name = $slice_adaptor->fetch_by_seq_region_id($seq_region)->seq_region_name;
323 $start = $f->start();
325 $strand = $f->strand();
326 $seq_region_name = $f->slice->seq_region_name();
329 # maps to region outside desired area
330 next
if ($start > $slice_end) || ($end < $slice_start) ||
331 ($slice_seq_region ne $seq_region_name);
333 # create new copies of successfully mapped feaatures with shifted start,
335 my ($new_start, $new_end);
336 my $seq_region_len = $slice->seq_region_length();
338 if ($slice_strand == 1) { # Positive strand
340 $new_start = $start - $slice_start + 1;
341 $new_end = $end - $slice_start + 1;
343 if ($slice->is_circular()) {
344 # Handle circular chromosomes.
346 if ($new_start > $new_end) {
347 # Looking at a feature overlapping the chromsome origin.
348 if ($new_end > $slice_start) {
349 # Looking at the region in the beginning of the chromosome.
350 $new_start -= $seq_region_len;
353 $new_end += $seq_region_len;
356 if ( $slice_start > $slice_end && $new_end < 0) {
357 # Looking at the region overlapping the chromosome
358 # origin and a feature which is at the beginning of the
360 $new_start += $seq_region_len;
361 $new_end += $seq_region_len;
364 } ## end
if ($dest_slice->is_circular...)
365 }
else { # Negative strand
367 $new_start = $slice_end - $end + 1;
368 $new_end = $slice_end - $start + 1;
370 if ($slice->is_circular()) {
372 if ($slice_start > $slice_end) {
373 # slice spans origin or replication
375 if ($start >= $slice_start) {
376 $new_end += $seq_region_len;
377 $new_start += $seq_region_len
if $end > $slice_start;
379 } elsif ($start <= $slice_end) {
381 } elsif ($end >= $slice_start) {
382 $new_start += $seq_region_len;
383 $new_end += $seq_region_len;
385 } elsif ($end <= $slice_end) {
386 $new_end += $seq_region_len
if $new_end < 0;
387 } elsif ($start > $end) {
388 $new_end += $seq_region_len;
395 if ($start <= $slice_end and $end >= $slice_start) {
397 } elsif ($start > $end) {
398 if ($start <= $slice_end) {
400 $new_start -= $seq_region_len;
402 } elsif ($end >= $slice_start) {
403 $new_end += $seq_region_len;
412 } ## end
else [
if ($dest_slice_strand...)]
415 '-start' => $new_start,
417 '-strand' => $strand * $slice_strand,
420 '-alternate_slice' => $f->alternate_slice,
423 } # end
foreach assembly exception
430 Arg[1] : Bio::EnsEMBL::AssemblyException $asx
431 Arg[2] : Bio::EnsEMBL::AssemblyException $asx2
435 $asx_seq_region_id = $asx_adaptor->store($asx);
436 Description: This stores a assembly exception feature in the
437 assembly_exception table and returns the assembly_exception_id.
438 Needs 2 features: one pointing to the Assembly_exception, and the
439 other pointing to the region in the reference that is being mapped to
440 Will check that start, end and type are defined, and the alternate
441 slice is present as well.
443 Exceptions:
throw if assembly exception not defined (needs start, end,
444 type and alternate_slice) of
if $asx not a Bio::EnsEMBL::AssemblyException
456 if (! $asx->isa(
'Bio::EnsEMBL::AssemblyExceptionFeature')){
457 throw(
"$asx is not a Ensembl assemlby exception -- not stored");
459 #if already present, return ID in the database
460 my $db = $self->db();
461 if ($asx->is_stored($db)){
464 #do some checkings for the object
465 #at the moment, the orientation is always 1
466 if (! $asx->start || ! $asx->end ){
467 throw(
"Assembly exception does not have coordinates");
469 if ($asx->type !~ /PAR|HAP|PATCH_NOVEL|PATCH_FIX/){
470 throw(
"Only types of assembly exception features valid are PAR, HAP, PATCH_FIX or PATCH_NOVEL");
472 if ( !($asx->alternate_slice->isa(
'Bio::EnsEMBL::Slice')) ){
473 throw(
"Alternate slice should be a Bio::EnsEMBL::Slice");
475 #now check the other Assembly exception feature, the one pointing to the REF
477 if (!$asx2->isa(
'Bio::EnsEMBL::AssemblyExceptionFeature')){
478 throw(
"$asx2 is not a Ensembl assemlby exception -- not stored");
480 if (! $asx2->start || ! $asx2->end ){
481 throw(
"Assembly exception does not have coordinates");
483 if ($asx2->type !~ /HAP REF|PAR REF|PATCH_NOVEL REF|PATCH_FIX REF/){
484 throw(
"$asx2 should have type of assembly exception features HAP REF, PAR REF, PATCH_FIX REF or PATCH_NOVEL REF");
486 if (! ($asx2->alternate_slice->isa(
'Bio::EnsEMBL::Slice')) ){
487 throw(
"Alternate slice should be a Bio::EnsEMBL::Slice");
489 #finally check that both features are pointing to each other slice
490 if ($asx->slice != $asx2->alternate_slice || $asx->alternate_slice != $asx2->slice){
491 throw(
"Slice and alternate slice in both features are not pointing to each other");
495 INSERT INTO assembly_exception( seq_region_id, seq_region_start,
497 exc_type, exc_seq_region_id,
498 exc_seq_region_start, exc_seq_region_end,
500 VALUES (?, ?, ?, ?, ?, ?, ?, 1)
503 my $asx_st = $self->prepare($asx_sql);
505 my $asx_seq_region_id;
506 my $asx2_seq_region_id;
508 my $original2 = $asx2;
509 #check all feature information
510 ($asx, $asx_seq_region_id) = $self->_pre_store($asx);
511 ($asx2, $asx2_seq_region_id) = $self->_pre_store($asx2);
514 $asx_st->bind_param(1, $asx_seq_region_id, SQL_INTEGER);
515 $asx_st->bind_param(2, $asx->start(), SQL_INTEGER);
516 $asx_st->bind_param(3, $asx->end(), SQL_INTEGER);
517 $asx_st->bind_param(4, $asx->type(), SQL_VARCHAR);
518 $asx_st->bind_param(5, $asx2_seq_region_id, SQL_INTEGER);
519 $asx_st->bind_param(6, $asx2->start(), SQL_INTEGER);
520 $asx_st->bind_param(7, $asx2->end(), SQL_INTEGER);
523 $asx_id = $self->last_insert_id(
'assembly_exception_id', undef,
'assembly_exception');
525 #finally, update the dbID and adaptor of the asx and asx2
526 $original->adaptor($self);
527 $original->dbID($asx_id);
528 $original2->adaptor($self);
529 $original2->dbID($asx_id);
530 #and finally update dbID cache with new assembly exception
531 $self->{
'_aexc_dbID_cache'}->{$asx_id} = $original;
532 #and update the other caches as well
533 push @{$self->{
'_aexc_slice_cache'}->{uc($asx->slice->name)}},$original, $original2;
534 push @{$self->{
'_aexc_cache'}}, $original, $original2;
540 # Helper function containing some common feature storing functionality
542 # Given a Feature this will return a copy (or the same feature if no changes
543 # to the feature are needed) of the feature which is relative to the start
544 # of the seq_region it is on. The seq_region_id of the seq_region it is on
547 # This method will also ensure that the database knows which coordinate
548 # systems that this feature is stored in.
549 # Since this adaptor doesn't inherit from BaseFeatureAdaptor, we need to copy
557 if(!ref($feature) || !$feature->isa(
'Bio::EnsEMBL::Feature')) {
558 throw(
'Expected Feature argument.');
561 $self->_check_start_end_strand($feature->start(),$feature->end(),
565 my $db = $self->db();
567 my $slice_adaptor = $db->get_SliceAdaptor();
568 my $slice = $feature->slice();
570 if(!ref($slice) || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice')) ) {
571 throw(
'Feature must be attached to Slice to be stored.');
574 # make sure feature coords are relative to start of entire seq_region
576 if($slice->start != 1 || $slice->strand != 1) {
577 #move feature onto a slice of the entire seq_region
578 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
579 $slice->seq_region_name(),
583 $slice->coord_system->version());
585 $feature = $feature->transfer($slice);
588 throw(
'Could not transfer Feature to slice of ' .
589 'entire seq_region prior to storing');
593 # Ensure this type of feature is known to be stored in this coord system.
594 my $cs = $slice->coord_system;
596 my $mcc = $db->get_MetaCoordContainer();
598 $mcc->add_feature_type($cs,
'assembly_exception', $feature->length);
600 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
602 if(!$seq_region_id) {
603 throw(
'Feature is associated with seq_region which is not in this DB.');
606 return ($feature, $seq_region_id);
610 # helper function used to validate start/end/strand and
611 # hstart/hend/hstrand etc.
613 sub _check_start_end_strand {
620 # Make sure that the start, end, strand are valid
622 if(
int($start) != $start) {
623 throw(
"Invalid Feature start [$start]. Must be integer.");
625 if(
int($end) != $end) {
626 throw(
"Invalid Feature end [$end]. Must be integer.");
628 if(
int($strand) != $strand || $strand < -1 || $strand > 1) {
629 throw(
"Invalid Feature strand [$strand]. Must be -1, 0 or 1.");
632 throw(
"Invalid Feature start/end [$start/$end]. Start must be less " .
633 "than or equal to end.");
641 Arg [1] : $asx Bio::EnsEMBL::AssemblyFeatureException
642 Example : $asx_adaptor->remove($asx);
643 Description: This removes a assembly exception feature from the database.
645 Exceptions : thrown
if $asx arg does not implement dbID(), or
if
646 $asx->dbID is not a
true value
652 #again, this method is generic in BaseFeatureAdaptor, but since this class
653 #is not inheriting, need to copy&paste
655 my ($self, $feature) = @_;
657 if(!$feature || !ref($feature) || !$feature->isa(
'Bio::EnsEMBL::AssemblyExceptionFeature')) {
658 throw(
'AssemblyExceptionFeature argument is required');
661 if(!$feature->is_stored($self->db)) {
662 throw(
"This feature is not stored in this database");
665 my $asx_id = $feature->dbID();
666 my $key = uc($feature->slice->name);
667 my $sth = $self->prepare(
"DELETE FROM assembly_exception WHERE assembly_exception_id = ?");
668 $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
672 #and finally update dbID cache
673 delete $self->{
'_aexc_dbID_cache'}->{$asx_id};
674 #and remove from cache feature
676 foreach my $asx (@{$self->{
'_aexc_slice_cache'}->{$key}}){
677 if ($asx->dbID != $asx_id){
678 push @features, $asx;
681 $self->{
'_aexc_slice_cache'}->{$key} = \@features;
683 foreach my $asx (@{$self->{
'_aexc_cache'}}){
684 if ($asx->dbID != $asx_id){
685 push @features, $asx;
688 $self->{
'_aexc_cache'} = \@features;
690 #unset the feature dbID ad adaptor
691 $feature->dbID(undef);
692 $feature->adaptor(undef);