3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
38 Abstract
class - should not be instantiated. Implementation of
39 abstract methods must be performed by subclasses.
43 This is a base adaptor
for feature adaptors. This base
class is simply a way
44 of eliminating code duplication through the implementation of methods
45 common to all feature adaptors.
51 package Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
53 use vars qw(@ISA @EXPORT);
65 @EXPORT = (@{$DBI::EXPORT_TAGS{
'sql_types'}});
67 our $SLICE_FEATURE_CACHE_SIZE = 4;
68 our $MAX_SPLIT_QUERY_SEQ_REGIONS = 3;
69 our $SILENCE_CACHE_WARNINGS = 0;
73 Arg [1] : list of args @args
74 Superclass constructor arguments
76 Description: Constructor which warns
if caching has been switched off
77 Returntype : Bio::EnsEMBL::BaseFeatureAdaptor
79 Caller : implementing subclass constructors
85 my ($class, @args) = @_;
86 my $self = $class->SUPER::new(@args);
87 if ( defined $self->db->no_cache() && $self->db->no_cache() && ! $SILENCE_CACHE_WARNINGS) {
88 warning(
"You are using the API without caching most recent features. "
89 .
"Performance might be affected." );
94 =head2 start_equals_end
96 Arg [1] : (optional)
boolean $newval
97 Example : $bfa->start_equals_end(1);
98 Description: Getter/Setter
for the start_equals_end flag. If set
99 to
true sub _slice_fetch will use a simplified sql to retrieve 1bp slices.
102 Caller : EnsemblGenomes variation DB build
107 sub start_equals_end {
108 my ( $self, $value ) = @_;
110 if ( defined($value) ) {
111 $self->{
'start_equals_end'} = $value;
113 return $self->{
'start_equals_end'};
121 $registry->get_adaptor(
'Mus musculus',
'Core',
124 $registry->get_adaptor(
'Mus musculus',
'Core',
128 $sa->fetch_by_region(
'Chromosome',
'1', 1e8,
131 my $genes = $ga->fetch_all_by_Slice($slice);
135 Description : Empties the feature cache associated with
this
140 Status : At risk (under development)
146 $self->_clear_slice_feature_cache();
147 if(!$self->_no_id_cache()) {
148 $self->_id_cache()->clear_cache();
153 sub _clear_slice_feature_cache {
155 %{$self->{_slice_feature_cache}} = ();
159 =head2 _slice_feature_cache
161 Description : Returns the feature cache
if we are allowed to cache and
162 will build it
if we need to. We will never
return a reference
163 to the hash to avoid unintentional
auto-vivfying caching
170 sub _slice_feature_cache {
172 return if $self->db()->no_cache();
173 if(! exists $self->{_slice_feature_cache}) {
174 tie my %cache,
'Bio::EnsEMBL::Utils::Cache', $SLICE_FEATURE_CACHE_SIZE;
175 $self->{_slice_feature_cache} = \%cache;
177 return $self->{_slice_feature_cache};
180 =head2 fetch_all_by_Slice
183 the slice from which to obtain features
184 Arg [2] : (optional)
string $logic_name
185 the logic name of the type of features to obtain
186 Example : $fts = $a->fetch_all_by_Slice($slice,
'Swall');
187 Description: Returns a listref of features created from the database
188 which are on the Slice defined by $slice. If $logic_name is
189 defined only features with an analysis of type $logic_name
191 NOTE: only features that are entirely on the slice
's seq_region
192 will be returned (i.e. if they hang off the start/end of a
193 seq_region they will be discarded). Features can extend over the
194 slice boundaries though (in cases where you have a slice that
195 doesn't span the whole seq_region).
196 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
203 sub fetch_all_by_Slice {
204 my ($self, $slice, $logic_name) = @_;
205 #fetch by constraint with empty constraint
206 return $self->fetch_all_by_Slice_constraint($slice,
'', $logic_name);
211 =head2 fetch_Iterator_by_Slice_method
213 Arg [1] : CODE ref of Slice fetch method
214 Arg [2] : ARRAY ref of parameters
for Slice fetch method
215 Arg [3] : Optional int: Slice index in parameters array
216 Arg [4] : Optional int: Slice chunk size. Default=500000
217 Example : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice_method
218 ($feature_adaptor->can(
'fetch_all_by_Slice_Arrays'),
219 \@fetch_method_params,
223 while(my $feature = $slice_iter->next && defined $feature){
227 Description: Creates an Iterator which chunks the query Slice to facilitate
228 large Slice queries which would have previously
run out of memory
230 Exceptions : Throws
if mandatory params not valid
236 #Does not support Collections. See Funcgen ResultFeatureAdaptor::fetch_collection_Iterator_by_Slice_method
238 sub fetch_Iterator_by_Slice_method{
239 my ($self, $slice_method_ref, $params_ref, $slice_idx, $chunk_size) = @_;
241 if(! ( defined $slice_method_ref &&
242 ref($slice_method_ref) eq
'CODE')
244 throw(
'Must pass a valid Slice fetch method CODE ref');
247 if (! ($params_ref &&
248 ref($params_ref) eq
'ARRAY')) {
249 #Don't need to check size here so long as we have valid Slice
250 throw(
'You must pass a method params ARRAYREF');
253 $slice_idx = 0
if(! defined $slice_idx);
254 my $slice = $params_ref->[$slice_idx];
255 $chunk_size ||= 1000000;
259 my $start = 1; #local coord
for sub slice
260 my $end = $slice->length;
261 my $num_overlaps = 0;
266 while (scalar(@feat_cache) == 0 &&
269 my $new_end = ($start + $chunk_size - 1);
271 if ($new_end >= $end) {
272 # this is our last chunk
277 #Chunk by sub slicing
278 my $sub_slice = $slice->sub_Slice($start, $new_end);
279 $params_ref->[$slice_idx] = $sub_slice;
280 @feat_cache = @{ $slice_method_ref->($self, @$params_ref)};
282 #Remove & count overlapping features
283 splice(@feat_cache, 0, $num_overlaps)
if($num_overlaps);
286 if (scalar(@feat_cache) > 0) {
288 my $feat_end = $feat_cache[$#feat_cache]->seq_region_end;
289 my $slice_end = $sub_slice->end;
291 for ($i = $#feat_cache; $i >=0; $i--) {
292 $feat_end = $feat_cache[$i]->seq_region_end;
294 if ($feat_end > $slice_end) {
303 # update the start coordinate
304 $start = $new_end + 1;
307 #this maybe returning from an undef cache
308 #Need to sub this out even more?
309 return shift @feat_cache;
316 =head2 fetch_Iterator_by_Slice
319 Arg [2] : Optional string: logic name of analysis
320 Arg [3] : Optional int: Chunk size to iterate over. Default is 500000
321 Example : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice($slice);
323 while(my $feature = $slice_iter->next && defined $feature){
327 Description: Creates an Iterator which chunks the query Slice to facilitate
328 large Slice queries which would have previously
run out of memory
336 sub fetch_Iterator_by_Slice{
337 my ($self, $slice, $logic_name, $chunk_size) = @_;
339 my $method_ref = $self->can(
'fetch_all_by_Slice');
341 return $self->fetch_Iterator_by_Slice_method($method_ref, [$slice, $logic_name], 0, $chunk_size);
345 =head2 fetch_all_by_Slice_and_score
348 the slice from which to obtain features
349 Arg [2] : (optional)
float $score
350 lower bound of the the score of the features retrieved
351 Arg [3] : (optional)
string $logic_name
352 the logic name of the type of features to obtain
353 Example : $fts = $a->fetch_all_by_Slice_and_score($slice,90,
'Swall');
354 Description: Returns a list of features created from the database which are
355 are on the Slice defined by $slice and which have a score
356 greater than $score. If $logic_name is defined,
357 only features with an analysis of type $logic_name will be
359 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
366 sub fetch_all_by_Slice_and_score {
367 my ( $self, $slice, $score, $logic_name ) = @_;
370 if ( defined($score) ) {
371 # Get the synonym of the primary_table
372 my @tabs = $self->_tables();
373 my $syn = $tabs[0]->[1];
375 $constraint = sprintf(
"%s.score > %s",
377 $self->dbc()->db_handle()->quote( $score, SQL_FLOAT ) );
381 $self->fetch_all_by_Slice_constraint( $slice, $constraint,
386 =head2 fetch_all_by_Slice_constraint
389 the slice from which to obtain features
390 Arg [2] : (optional)
string $constraint
391 An SQL query constraint (i.e. part of the WHERE clause)
392 Arg [3] : (optional)
string $logic_name
393 the logic name of the type of features to obtain
394 Example : $fs = $a->fetch_all_by_Slice_constraint($slc,
'perc_ident > 5');
395 Description: Returns a listref of features created from the database which
396 are on the Slice defined by $slice and fulfill the SQL
397 constraint defined by $constraint. If logic name is defined,
398 only features with an analysis of type $logic_name will be
400 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
401 Exceptions : thrown
if $slice is not defined
407 sub fetch_all_by_Slice_constraint {
408 my ( $self, $slice, $constraint, $logic_name ) = @_;
412 #Pull out here as we need to remember them & reset accordingly
413 my $bind_params = $self->bind_param_generic_fetch();
416 || !( $slice->isa(
'Bio::EnsEMBL::Slice')
417 or $slice->isa(
'Bio::EnsEMBL::LRGSlice') ) )
419 throw(
"Bio::EnsEMBL::Slice argument expected.");
424 $self->_logic_name_to_constraint( $constraint, $logic_name );
426 # If the logic name was invalid, undef was returned
427 if ( !defined($constraint) ) {
return [] }
432 # Will only use feature_cache if hasn't been no_cache attribute set
434 !( defined( $self->db()->no_cache() ) && $self->db()->no_cache() ) )
437 #strain test and add to constraint if so to stop caching.
438 if ( $slice->isa(
'Bio::EnsEMBL::Variation::StrainSlice') ) {
440 $self->dbc()->db_handle()->quote( $slice->strain_name() );
442 if ( $constraint ne
"" ) {
443 $constraint .=
" AND $string = $string ";
445 $constraint .=
" $string = $string ";
449 # Check the cache and return the cached results if we have already
450 # done this query. The cache key is the made up from the slice
451 # name, the constraint, and the bound parameters (if there are any).
452 $key = uc( join(
':', $slice->name(), $constraint ) );
454 if ( defined($bind_params) ) {
456 . join(
':',
map { $_->[0] .
'/' . $_->[1] } @{$bind_params} );
459 $cache = $self->_slice_feature_cache();
460 if ( exists( $cache->{$key} ) ) {
461 # Clear the bound parameters and return the cached data.
462 $self->{
'_bind_param_generic_fetch'} = ();
463 #IMPORTANT: NEVER EVER RETURN A COPY OF THE DATA STRUCTURE.
464 # This will hold arrays of values. Since we've been doing
465 # this for so long people are expecting multiple calls
466 # to fetch_by_SliceXXXXX() methods to return the same
468 return $cache->{$key};
470 } ## end
if ( !( defined( $self...)))
473 my $proj_ref = $self->_get_and_filter_Slice_projections($slice);
474 my $bounds = $self->_generate_feature_bounds($slice);
476 # fetch features for the primary slice AND all symlinked slices
477 foreach my $seg (@{$proj_ref}) {
479 $self->_bind_param_generic_fetch($bind_params);
480 my $offset = $seg->from_start();
481 my $seg_slice = $seg->to_Slice();
483 $self->_slice_fetch( $seg_slice, $constraint );
485 # If this was a symlinked slice offset the feature coordinates as
487 if ( $seg_slice->name() ne $slice->name() ) {
490 foreach my $f ( @{$features} ) {
491 if ( $offset != 1 ) {
492 $f->{
'start'} += $offset - 1;
493 $f->{
'end'} += $offset - 1;
496 # discard boundary crossing features from symlinked regions
497 foreach my $bound (@{$bounds}) {
498 if ( $f->{
'start'} < $bound && $f->{
'end'} >= $bound ) {
503 $f->{
'slice'} = $slice;
507 push( @result, @{$features} );
509 } ## end
foreach my $seg (@proj)
511 # Will only use feature_cache when set attribute no_cache in DBAdaptor
512 if ( defined($key) ) {
513 $cache->{$key} = \@result;
517 } ## end sub fetch_all_by_Slice_constraint
520 =head2 fetch_all_by_logic_name
522 Arg [1] :
string $logic_name
523 the logic name of the type of features to obtain
524 Example : $fs = $a->fetch_all_by_logic_name(
'foobar');
525 Description: Returns a listref of features created from the database.
526 only features with an analysis of type $logic_name will
527 be returned. If the logic name is invalid (not in the
528 analysis table), a reference to an empty list will be
530 Returntype : listref of Bio::EnsEMBL::SeqFeatures
531 Exceptions : thrown
if no $logic_name
537 sub fetch_all_by_logic_name {
538 my ( $self, $logic_name ) = @_;
540 if ( !defined($logic_name) ) {
541 throw(
"Need a logic_name");
544 my $constraint = $self->_logic_name_to_constraint(
'', $logic_name );
546 if ( !defined($constraint) ) {
547 warning(
"Invalid logic name: $logic_name");
551 return $self->generic_fetch($constraint);
554 =head2 fetch_all_by_stable_id_list
556 Arg [1] :
string $logic_name
557 the logic name of the type of features to obtain
559 the slice from which to obtain features
560 Example : $fs = $a->fetch_all_by_stable_id_list([
"ENSG00001",
"ENSG00002", ...]);
561 Description: Returns a listref of features identified by their stable IDs.
562 This method only fetches features of the same type as the calling
564 Results are constrained to a slice
if the slice is provided.
566 Exceptions : thrown
if no stable ID list is provided.
572 # Adapted from BaseAdaptor->uncached_fetch_all_by_dbID_list
573 sub fetch_all_by_stable_id_list {
574 my ( $self, $id_list_ref, $slice ) = @_;
576 return $self->_uncached_fetch_all_by_id_list($id_list_ref,$slice,
"stable_id");
579 # Method that creates an object. Called by the _objs_from_sth() method
580 # in the sub-classes (the various feature adaptors). Overridden by the
581 # feature collection classes.
583 sub _create_feature {
584 my ( $self, $feature_type, $args ) = @_;
585 return $feature_type->
new( %{$args} );
588 # This is the same as the above, but calls the new_fast() constructor of
591 sub _create_feature_fast {
592 my ( $self, $feature_type, $args ) = @_;
593 return $feature_type->
new_fast($args);
596 =head2 count_by_Slice_constraint
599 Arg [2] : String Custom SQL constraint
600 Arg [3] : String Logic name to search by
601 Description : Finds all features with at least partial overlap to the given
602 slice and sums them up.
603 Explanation of workings with projections:
605 |-------------------------Constraint Slice---------------------------------|
608 | |--Segment 2, on desired Coordinate System--| |
609 | | |---Segment 3----|
610 #Feature 1# #Feature 2# #Feature 3# |
611 | #####################Feature 4#################### |
614 Feature 1 is overlapping the original constraint. Counted in Segment 1
615 Feature 2,3 and 4 are counted when inspecting Segment 2
616 Feature 5 is counted in Segment 1
621 sub count_by_Slice_constraint {
622 my ($self, $slice, $constraint, $logic_name) = @_;
624 assert_ref($slice,
'Bio::EnsEMBL::Slice',
'slice');
628 #Remember the bind params
629 my $bind_params = $self->bind_param_generic_fetch();
632 my @tables = $self->_tables;
633 my ($table_name, $table_synonym) = @{ $tables[0] };
635 # Basic constraints limit the feature hits to the starting Slice constraint
637 $constraint = $self->_logic_name_to_constraint( $constraint, $logic_name );
639 return $count
if ! defined $constraint;
642 my $sa = $slice->adaptor();
643 my $projections = $self->_get_and_filter_Slice_projections($slice);
645 my $segment_count = @{$projections};
646 #Manual loop to support look-ahead/behind
647 for (my $i = 0; $i < $segment_count; $i++) {
648 my $seg = $projections->[$i];
649 my $seg_slice = $seg->to_Slice();
650 $self->_bind_param_generic_fetch($bind_params);
652 # We cannot filter boundary crossing features in code, so we constrain the
653 # SQL. We detect when we are not on the original query Slice and manually
654 # filter by the segment Slice's start and end. If we are on "earlier" (5')
655 # projection segments, we only limit by the *end* and when we are on the later
656 # projection segments, we filter by the *start*.
658 # This is the same as fetch_all_by_Slice_constraint()'s in-memory filtering
659 # except we need to alter projected features in that code with an offset
660 my $local_constraint = $constraint;
661 if ( $seg_slice->name() ne $slice->name() ) {
662 my ($limit_start, $limit_end) = (1,1); #fully constrained by
default, flags are reset every iteration
665 } elsif ($i == ($segment_count - 1)) {
666 $limit_end = 0; #don
't check end as we are on the final projection
668 $local_constraint .= ' AND
' if $local_constraint;
672 # Restrict features to the end of this projection segment when after the named segment
673 push(@conditionals, sprintf('%1$s.seq_region_end <= %2$d
', $table_synonym, $seg_slice->end));
676 #Do not cross the start boundary on this projection segment
677 push(@conditionals, sprintf('%1$s.seq_region_start >= %2$d
', $table_synonym, $seg_slice->start));
680 $local_constraint .= join(q{ AND }, @conditionals);
683 my $count_array = $self->_get_by_Slice($seg_slice, $local_constraint, 'count
');
684 $count += $_ for @$count_array;
690 =head2 _get_and_filter_Slice_projections
692 Arg [1] : Bio::EnsEMBL::Slice
693 Description : Delegates onto SliceAdaptor::fetch_normalized_slice_projection()
695 Returntype : ArrayRef Bio::EnsEMBL::ProjectionSegment; Returns an array
696 of projected segments
699 sub _get_and_filter_Slice_projections {
700 my ($self, $slice) = @_;
701 my $sa = $slice->adaptor();
702 my $filter_projections = 1;
703 return $sa->fetch_normalized_slice_projection($slice, $filter_projections);
706 =head2 _generate_feature_bounds
708 Arg [1] : Bio::EnsEMBL::Slice
709 Description : Performs a projection of Slice and records the bounds
710 of that projection. This can be used later on to restrict
711 Features which overlap into unwanted areas such as
712 regions which exist on another HAP/PAR region.
714 Bounds are defined as projection_start - slice_start + 1.
715 Example : my $bounds = $self->_generate_feature_bounds($slice);
716 Returntype : ArrayRef Integer; Returns the location of the bounds.
719 sub _generate_feature_bounds {
720 my ($self, $slice) = @_;
721 my $sa = $slice->adaptor();
722 # construct list of Hap/PAR boundaries for entire seq region
725 my $sr_id = $slice->get_seq_region_id();
726 my $ent_slice = $sa->fetch_by_seq_region_id($sr_id);
727 if ( $slice->strand() == -1 ) {
728 $ent_slice = $ent_slice->invert();
731 my @ent_proj = @{ $sa->fetch_normalized_slice_projection($ent_slice) };
732 shift(@ent_proj); # skip first; 1st does not have bounds normally; may change if we ever have a patch a pos 1
734 @bounds = map { $_->from_start() - $slice->start() + 1 } @ent_proj;
740 Arg [0] : Bio::EnsEMBL::Slice to find all the features within
741 Arg [1] : SQL constraint string
742 Arg [2] : Type of query to run. Default behaviour is to select, but
743 'count
' is also valid
744 Description: Abstracted logic from _slice_fetch
745 Returntype : Listref of Bio::EnsEMBL::Feature, or integers for counting mode
749 my ($self, $slice, $orig_constraint, $query_type) = @_;
751 # features can be scattered across multiple coordinate systems
752 my @tables = $self->_tables;
753 my ($table_name, $table_synonym) = @{ $tables[0] };
755 my @feature_coord_systems;
757 my $meta_values = $self->db->get_MetaContainer->list_value_by_key( $table_name."build.level");
758 if ( @$meta_values and $slice->is_toplevel() ) {
759 push @feature_coord_systems, $slice->coord_system();
761 @feature_coord_systems = @{ $self->db->get_MetaCoordContainer->fetch_all_CoordSystems_by_feature_type($table_name)};
764 my $assembly_mapper_adaptor = $self->db->get_AssemblyMapperAdaptor();
765 my @pan_coord_features;
767 COORD_SYSTEM: foreach my $coord_system (@feature_coord_systems) {
768 my @query_accumulator;
769 # Build up a combination of query constraints that will quickly establish the result set
770 my $constraint = $orig_constraint;
771 if ( $coord_system->equals( $slice->coord_system ) ) {
772 my $max_len = $self->_max_feature_length
773 || $self->db->get_MetaCoordContainer
774 ->fetch_max_length_by_CoordSystem_feature_type( $coord_system,$table_name );
777 if ( $slice->adaptor ) {
778 $seq_region_id = $slice->adaptor->get_seq_region_id($slice);
780 $seq_region_id = $self->db->get_SliceAdaptor->get_seq_region_id($slice);
783 my @seq_region_ids = ($seq_region_id);
785 my $ext_seq_region_id = $self->get_seq_region_id_external($seq_region_id);
787 if ( $ext_seq_region_id == $seq_region_id ) { last }
789 push( @seq_region_ids, $ext_seq_region_id );
790 $seq_region_id = $ext_seq_region_id;
793 $constraint .= " AND " if ($constraint);
795 $constraint .= ${table_synonym}.".seq_region_id IN (". join( ',
', @seq_region_ids ) . ") AND ";
797 #faster query for 1bp slices where SNP data is not compressed
798 if ( $self->start_equals_end && $slice->start == $slice->end ) {
799 $constraint .= " AND ".$table_synonym.".seq_region_start = ".$slice->end .
800 " AND ".$table_synonym.".seq_region_end = ".$slice->start;
803 if ( !$slice->is_circular() ) {
804 # Deal with the default case of a non-circular chromosome.
805 $constraint .= $table_synonym.".seq_region_start <= ".$slice->end." AND "
806 .$table_synonym.".seq_region_end >= ".$slice->start;
809 my $min_start = $slice->start - $max_len;
810 $constraint .= " AND ".$table_synonym.".seq_region_start >= ".$min_start;
814 # Deal with the case of a circular chromosome.
815 if ( $slice->start > $slice->end ) {
816 $constraint .= " ( ".$table_synonym.".seq_region_start >= ".$slice->start
817 . " OR ".$table_synonym.".seq_region_start <= ".$slice->end
818 . " OR ".$table_synonym.".seq_region_end >= ".$slice->start
819 . " OR ".$table_synonym.".seq_region_end <= ".$slice->end
820 . " OR ".$table_synonym.".seq_region_start > ".$table_synonym.".seq_region_end)";
822 $constraint .= " ((".$table_synonym.".seq_region_start <= ".$slice->end
823 . " AND ".$table_synonym.".seq_region_end >= ".$slice->start.") "
824 . "OR (".$table_synonym.".seq_region_start > ".$table_synonym.".seq_region_end"
825 . " AND (".$table_synonym.".seq_region_start <= ".$slice->end
826 . " OR ".$table_synonym.".seq_region_end >= ".$slice->start.")))";
830 push @query_accumulator, [$constraint,undef,$slice]; # $mapper intentionally absent here.
833 #coordinate systems do not match
834 $mapper = $assembly_mapper_adaptor->fetch_by_CoordSystems( $slice->coord_system(), $coord_system );
835 next unless defined $mapper;
837 # Get list of coordinates and corresponding internal ids for
838 # regions the slice spans
839 my @coords = $mapper->map( $slice->seq_region_name, $slice->start, $slice->end,
840 $slice->strand, $slice->coord_system );
844 next COORD_SYSTEM if ( !@coords );
846 my @ids = map { $_->id() } @coords;
847 #coords are now id rather than name
850 && $slice->coord_system->name() ne 'lrg
') {
851 $constraint = $orig_constraint;
852 my $id_str = join( ',
', @ids );
853 $constraint .= " AND " if ($constraint);
854 $constraint .= $table_synonym.".seq_region_id IN ($id_str)";
856 push @query_accumulator, [$constraint,$mapper,$slice];
859 $self->_max_feature_length()
860 || $self->db->get_MetaCoordContainer
861 ->fetch_max_length_by_CoordSystem_feature_type($coord_system, $table_name)
864 my $length = @coords;
865 for ( my $i = 0; $i < $length; $i++ ) {
866 $constraint = $orig_constraint;
867 $constraint .= " AND " if ($constraint);
868 $constraint .= $table_synonym.".seq_region_id = "
870 . $table_synonym.".seq_region_start <= "
871 . $coords[$i]->end() . " AND "
872 . $table_synonym.".seq_region_end >= "
873 . $coords[$i]->start();
876 my $min_start = $coords[$i]->start() - $max_len;
877 $constraint .= " AND ".$table_synonym.".seq_region_start >= ".$min_start;
880 push @query_accumulator, [$constraint,$mapper,$slice];
881 } # end multi-query cycle
884 } # end else (coord sytems not matching)
886 #Record the bind params if we have to do multiple queries
887 my $bind_params = $self->bind_param_generic_fetch();
889 foreach my $query (@query_accumulator) {
890 my ($local_constraint,$local_mapper,$local_slice) = @$query;
891 $self->_bind_param_generic_fetch($bind_params);
892 if ($query_type and $query_type eq 'count
') {
893 push @pan_coord_features, $self->generic_count($local_constraint);
896 my $features = $self->generic_fetch( $local_constraint, $local_mapper, $local_slice );
897 $features = $self->_remap( $features, $local_mapper, $local_slice );
898 push @pan_coord_features, @$features;
903 $self->{_bind_param_generic_fetch} = undef;
904 return \@pan_coord_features;
907 # helper function used by fetch_all_by_Slice_constraint method
910 my ( $self, $slice, $orig_constraint ) = @_;
912 my $features = $self->_get_by_Slice($slice,$orig_constraint);
915 } ## end sub _slice_fetch
918 #for a given seq_region_id, gets the one used in an external database, if present, otherwise, returns the internal one
919 sub get_seq_region_id_external {
920 my ( $self, $sr_id ) = @_;
921 my $cs_a = $self->db()->get_CoordSystemAdaptor();
922 return ( exists( $cs_a->{'_internal_seq_region_mapping
'}->{$sr_id} )
923 ? $cs_a->{'_internal_seq_region_mapping
'}->{$sr_id}
927 #for a given seq_region_id and coord_system, gets the one used in the internal (core) database
928 sub get_seq_region_id_internal{
929 my ( $self, $sr_id ) = @_;
930 my $cs_a = $self->db()->get_CoordSystemAdaptor();
931 return ( exists $cs_a->{'_external_seq_region_mapping
'}->{$sr_id}
932 ? $cs_a->{'_external_seq_region_mapping
'}->{$sr_id}
937 # Helper function containing some common feature storing functionality
939 # Given a Feature this will return a copy (or the same feature if no changes
940 # to the feature are needed) of the feature which is relative to the start
941 # of the seq_region it is on. The seq_region_id of the seq_region it is on
944 # This method will also ensure that the database knows which coordinate
945 # systems that this feature is stored in.
953 throw('Expected Feature argument.
');
955 my $slice = $feature->slice();
957 $self->_check_start_end_strand($feature->start(),$feature->end(),
958 $feature->strand(), $slice);
961 my $db = $self->db();
963 my $slice_adaptor = $db->get_SliceAdaptor();
966 throw('Feature must be attached to Slice to be stored.
');
969 # make sure feature coords are relative to start of entire seq_region
971 if($slice->start != 1 || $slice->strand != 1) {
972 #move feature onto a slice of the entire seq_region
973 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
974 $slice->seq_region_name(),
978 $slice->coord_system->version());
980 $feature = $feature->transfer($slice);
983 throw('Could not transfer Feature to slice of
' .
984 'entire seq_region prior to storing
');
988 # Ensure this type of feature is known to be stored in this coord system.
989 my $cs = $slice->coord_system;
991 my ($tab) = $self->_tables();
992 my $tabname = $tab->[0];
994 my $mcc = $db->get_MetaCoordContainer();
996 $mcc->add_feature_type($cs, $tabname, $feature->length);
998 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
1000 if(!$seq_region_id) {
1001 throw('Feature is associated with seq_region which is not in
this DB.
');
1004 return ($feature, $seq_region_id);
1008 # The same function as _pre_store
1009 # This one is used to store user uploaded features in XXX_userdata db
1011 sub _pre_store_userdata {
1013 my $feature = shift;
1015 if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature
')) {
1016 throw('Expected Feature argument.
');
1019 my $slice = $feature->slice();
1020 my $slice_adaptor = $slice->adaptor;
1022 $self->_check_start_end_strand($feature->start(),$feature->end(),
1023 $feature->strand(), $slice);
1027 throw('Feature must be attached to Slice to be stored.
');
1030 # make sure feature coords are relative to start of entire seq_region
1032 if($slice->start != 1 || $slice->strand != 1) {
1033 #move feature onto a slice of the entire seq_region
1034 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
1035 $slice->seq_region_name(),
1039 $slice->coord_system->version());
1041 $feature = $feature->transfer($slice);
1044 throw('Could not transfer Feature to slice of
' .
1045 'entire seq_region prior to storing
');
1049 # Ensure this type of feature is known to be stored in this coord system.
1050 my $cs = $slice->coord_system;
1052 my ($tab) = $self->_tables();
1053 my $tabname = $tab->[0];
1056 my $mcc = $db->get_MetaCoordContainer();
1058 $mcc->add_feature_type($cs, $tabname, $feature->length);
1060 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
1062 if(!$seq_region_id) {
1063 throw('Feature is associated with seq_region which is not in
this DB.
');
1066 return ($feature, $seq_region_id);
1071 # helper function used to validate start/end/strand and
1072 # hstart/hend/hstrand etc.
1074 sub _check_start_end_strand {
1075 my ($self, $start, $end, $strand, $slice) = @_;
1078 # Make sure that the start, end, strand are valid
1080 if(int($start) != $start) {
1081 throw("Invalid Feature start [$start]. Must be integer.");
1083 if(int($end) != $end) {
1084 throw("Invalid Feature end [$end]. Must be integer.");
1086 if(int($strand) != $strand || $strand < -1 || $strand > 1) {
1087 throw("Invalid Feature strand [$strand]. Must be -1, 0 or 1.");
1089 if($end < $start && ! (defined $slice && $slice->is_circular())) {
1090 throw("Invalid Feature start/end [$start/$end]. Start must be less " .
1091 "than or equal to end.");
1099 # Given a list of features checks if they are in the correct coord system
1100 # by looking at the first features slice. If they are not then they are
1101 # converted and placed on the slice.
1104 my ( $self, $features, $mapper, $slice ) = @_;
1106 #check if any remapping is actually needed
1107 if(@$features && (!$features->[0]->isa('Bio::EnsEMBL::Feature
') ||
1108 $features->[0]->slice == $slice)) {
1112 #remapping has not been done, we have to do our own conversion from
1117 my $slice_start = $slice->start();
1118 my $slice_end = $slice->end();
1119 my $slice_strand = $slice->strand();
1120 my $slice_cs = $slice->coord_system();
1122 my ($seq_region_id, $start, $end, $strand);
1124 my $slice_seq_region_id = $slice->get_seq_region_id();
1126 foreach my $f (@$features) {
1127 #since feats were obtained in contig coords, attached seq is a contig
1128 my $fslice = $f->slice();
1130 throw("Feature does not have attached slice.\n");
1132 my $fseq_region_id = $fslice->get_seq_region_id();
1133 my $fcs = $fslice->coord_system();
1135 if(!$slice_cs->equals($fcs)) {
1136 #slice of feature in different coord system, mapping required
1137 # $seq_region comes out as an integer not a string
1138 ($seq_region_id, $start, $end, $strand) =
1139 $mapper->fastmap($fslice->seq_region_name(),$f->start(),$f->end(),$f->strand(),$fcs);
1141 # undefined start means gap
1142 next if(!defined $start);
1144 $start = $f->start();
1146 $strand = $f->strand();
1147 $seq_region_id = $fseq_region_id;
1150 # maps to region outside desired area
1151 next if ($start > $slice_end) || ($end < $slice_start) ||
1152 ($slice_seq_region_id != $seq_region_id);
1154 #shift the feature start, end and strand in one call
1155 if($slice_strand == -1) {
1156 $f->move( $slice_end - $end + 1, $slice_end - $start + 1, $strand * -1 );
1158 $f->move( $start - $slice_start + 1, $end - $slice_start + 1, $strand );
1170 # get seq region boundary (start|end) for a feature
1171 # the method attempts to retrieve the boundary directly from the db
1172 # return undef if cannot determine the associated feature table
1174 sub _seq_region_boundary_from_db {
1175 my ($self, $feature, $boundary) = @_;
1178 throw('Expected Feature argument.
');
1181 throw "Undefined boundary"
1182 unless defined $boundary;
1184 $boundary eq 'start' or $boundary eq 'end
'
1185 or throw "Wrong boundary: select start|end";
1187 $boundary = 'seq_region_
' . $boundary;
1189 my $sql_helper = $self->dbc->sql_helper;
1190 throw "Unable to get SqlHelper instance" unless defined $sql_helper;
1192 my $feature_table = ($self->_tables)[0]->[0];
1193 warning (sprintf "Unable to get %s for %s instance\nCould not find associated feature table, returning undef", $boundary, ref $feature)
1194 and return undef unless defined $feature_table;
1196 my $db_id = $feature->dbID;
1197 my $attrib_id = $feature_table . '_id
';
1198 my $query = "SELECT ${boundary} from ${feature_table} WHERE ${attrib_id} = ${db_id}";
1200 return $sql_helper->execute_single_result(-SQL => $query);
1205 Arg [1] : list of Bio::EnsEMBL::SeqFeature
1206 Example : $adaptor->store(@feats);
1207 Description: ABSTRACT Subclasses are responsible for implementing this
1208 method. It should take a list of features and store them in
1211 Exceptions : thrown method is not implemented by subclass
1221 throw("Abstract method store not defined by implementing subclass\n");
1227 Arg [1] : A feature $feature
1228 Example : $feature_adaptor->remove($feature);
1229 Description: This removes a feature from the database. The table the
1230 feature is removed from is defined by the abstract method
1231 _tablename, and the primary key of the table is assumed
1232 to be _tablename() . '_id
'. The feature argument must
1233 be an object implementing the dbID method, and for the
1234 feature to be removed from the database a dbID value must
1237 Exceptions : thrown if $feature arg does not implement dbID(), or if
1238 $feature->dbID is not a true value
1246 my ($self, $feature) = @_;
1249 throw('Feature argument is required
');
1252 if(!$feature->is_stored($self->db)) {
1253 throw("This feature is not stored in this database");
1256 my @tabs = $self->_tables;
1257 my ($table) = @{$tabs[0]};
1259 my $sth = $self->prepare("DELETE FROM $table WHERE ${table}_id = ?");
1260 $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
1263 #unset the feature dbID ad adaptor
1264 $feature->dbID(undef);
1265 $feature->adaptor(undef);
1271 =head2 remove_by_Slice
1273 Arg [1] : Bio::Ensembl::Slice $slice
1274 Example : $feature_adaptor->remove_by_Slice($slice);
1275 Description: This removes features from the database which lie on a region
1276 represented by the passed in slice. Only features which are
1277 fully contained by the slice are deleted; features which overlap
1278 the edge of the slice are not removed.
1279 The table the features are removed from is defined by
1280 the abstract method_tablename.
1282 Exceptions : thrown if no slice is supplied
1288 sub remove_by_Slice {
1289 my ($self, $slice) = @_;
1292 throw("Slice argument is required");
1295 my @tabs = $self->_tables;
1296 my ($table_name) = @{$tabs[0]};
1298 my $seq_region_id = $self->db->get_SliceAdaptor->get_seq_region_id($slice);
1299 my $start = $slice->start();
1300 my $end = $slice->end();
1303 # Delete only features fully on the slice, not overlapping ones
1305 my $sth = $self->prepare("DELETE FROM $table_name " .
1306 "WHERE seq_region_id = ? " .
1307 "AND seq_region_start >= ? " .
1308 "AND seq_region_end <= ?");
1310 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
1311 $sth->bind_param(2,$start,SQL_INTEGER);
1312 $sth->bind_param(3,$end,SQL_INTEGER);
1319 # Internal function. Allows the max feature length which is normally
1320 # retrieved from the meta_coord table to be overridden. This allows
1321 # for some significant optimizations to be put in when it is known
1322 # that requested features will not be over a certain size.
1324 sub _max_feature_length {
1326 return $self->{'_max_feature_length
'} = shift if(@_);
1327 return $self->{'_max_feature_length
'};
1332 # Lists all seq_region_ids that a particular feature type is found on.
1333 # Useful e.g. for finding out which seq_regions have genes.
1334 # Returns a listref of seq_region_ids.
1336 sub _list_seq_region_ids {
1337 my ($self, $table) = @_;
1347 WHERE sr.seq_region_id = a.seq_region_id
1348 AND sr.coord_system_id = cs.coord_system_id
1349 AND cs.species_id = ?);
1351 my $sth = $self->prepare($sql);
1353 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1357 while (my ($id) = $sth->fetchrow) {
1367 sub remove_by_analysis_id {
1368 my ($self, $analysis_id) = @_;
1370 $analysis_id or throw("Must call with analysis id");
1372 my @tabs = $self->_tables;
1373 my ($tablename) = @{$tabs[0]};
1375 my $sql = "DELETE FROM $tablename WHERE analysis_id = $analysis_id";
1376 # warn "SQL : $sql";
1378 my $sth = $self->prepare($sql);
1383 sub remove_by_feature_id {
1384 my ($self, $features_list) = @_;
1386 my @feats = @$features_list or throw("Must call store with features");
1388 my @tabs = $self->_tables;
1389 my ($tablename) = @{$tabs[0]};
1391 my $sql = sprintf "DELETE FROM $tablename WHERE ${tablename}_id IN (%s)", join ',
', @feats;
1392 # warn "SQL : $sql";
1394 my $sth = $self->prepare($sql);
1399 =head2 fetch_nearest_by_Feature
1401 Arg [1] : Reference Feature to start the search from
1402 Description: Searches iteratively outward from the starting feature until a nearby Feature is found
1403 If you require more than one result or more control of which features are returned, see
1404 fetch_all_nearest_by_Feature and fetch_all_by_outward_search. fetch_nearest_by_Feature
1405 is a convenience method.
1406 ReturnType : Bio::EnsEMBL::Feature
1409 sub fetch_nearest_by_Feature {
1411 my $feature = shift;
1412 my @hits = @{ $self->fetch_all_by_outward_search(-FEATURE => $feature, -MAX_RANGE => 1000000, -RANGE => 1000, -LIMIT => 1) };
1413 return $hits[0] if (scalar @hits > 0);
1418 =head2 fetch_all_by_outward_search
1420 Arguments the same as fetch_all_nearest_by_Feature
1421 Arg [0] : -MAX_RANGE : Set an upper limit on the search range, defaults to 10000 bp
1422 Arg [1] : -FEATURE ,Bio::EnsEMBL::Feature : 'Source
' Feature to anchor the search for nearest Features
1423 Arg [2] : -SAME_STRAND, Boolean (optional) : Respect the strand of the source Feature with ref, only
1424 returning Features on the same strand.
1425 Arg [3] : -OPPOSITE_STRAND, Boolean (optional) : Find features on the opposite strand of the same
1426 Arg [4] : -DOWNSTREAM/-UPSTREAM, (optional) : Search ONLY downstream or upstream from the source Feature.
1427 Can be omitted for searches in both directions.
1428 Arg [5] : -RANGE, Int : The size of the space to search for Features. Defaults to 1000 as a sensible starting point
1429 Arg [6] : -NOT_OVERLAPPING, Boolean (optional) : Do not return Features that overlap the source Feature
1430 Arg [7] : -FIVE_PRIME, Boolean (optional) : Determine range to a Feature by the 5' end, respecting strand
1431 Arg [8] : -THREE_PRIME, Boolean (optional): Determine range to a Feature by the 3
' end, respecting strand
1432 Arg [9] : -LIMIT, Int : The maximum number of Features to return, defaulting to one. Equally near features are all returned
1434 Description: Searches for features within the suggested -RANGE, and if it finds none, expands the search area
1435 until it satisfies -LIMIT or hits -MAX_RANGE. Useful if you don't know how far away the features
1436 might be, or if dealing with areas of high feature density. In the case of Variation Features, it is
1437 conceivable that a 2000 bp window might contain very many features, resulting in a slow and bloated
1438 response, thus the ability to explore outward in smaller sections can be useful.
1439 Returntype : Listref of [$feature,$distance]
1442 sub fetch_all_by_outward_search {
1444 my ($ref_feature, $respect_strand, $opposite_strand, $downstream, $upstream, $start_search_range,
1445 $limit,$not_overlapping,$five_prime,$three_prime, $max_range) =
1446 rearrange([qw(FEATURE SAME_STRAND OPPOSITE_STRAND DOWNSTREAM UPSTREAM RANGE LIMIT NOT_OVERLAPPING FIVE_PRIME THREE_PRIME MAX_RANGE)], @_);
1449 $start_search_range ||= 1000;
1450 my $current_search_range = $start_search_range;
1451 $max_range ||= 10000;
1453 while (scalar @results < $limit && $current_search_range < $max_range) {
1454 $current_search_range = $start_search_range * $factor;
1456 $self->fetch_all_nearest_by_Feature(-RANGE => $current_search_range,
1457 -FEATURE => $ref_feature,
1458 -SAME_STRAND => $respect_strand,
1459 -OPPOSITE_STRAND => $opposite_strand,
1460 -DOWNSTREAM => $downstream,
1461 -UPSTREAM => $upstream,
1463 -NOT_OVERLAPPING => $not_overlapping,
1464 -FIVE_PRIME => $five_prime,
1465 -THREE_PRIME => $three_prime,
1473 =head2 fetch_all_nearest_by_Feature
1475 Arg [1] : -FEATURE ,
Bio::EnsEMBL::Feature :
'Source' Feature to anchor the search
for nearest Features
1476 Arg [2] : -SAME_STRAND, Boolean (optional): Respect the strand of the source Feature with ref, only
1477 returning Features on the same strand
1478 Arg [3] : -OPPOSITE_STRAND, Boolean (optional) : Find features on the opposite strand of the same
1479 Arg [4] : -DOWNSTREAM/-UPSTREAM, (optional) : Search ONLY downstream or upstream from the source Feature.
1480 Can be omitted for searches in both directions.
1481 Arg [5] : -RANGE, Int : The size of the space to search for Features. Defaults to 1000 as a sensible starting point
1482 Arg [6] : -NOT_OVERLAPPING, Boolean (optional) : Do not return Features that overlap the source Feature
1483 Arg [7] : -FIVE_PRIME, Boolean (optional) : Determine range to a Feature by its 5
' end, respecting strand
1484 Arg [8] : -THREE_PRIME, Boolean (optional): Determine range to a Feature by its 3' end, respecting strand
1485 Arg [9] : -LIMIT, Int : The maximum number of Features to return, defaulting to one. Equally near features are all returned
1486 Example : #To fetch the gene(s) with the nearest 5
' end:
1487 $genes = $gene_adaptor->fetch_all_nearest_by_Feature(-FEATURE => $feat, -FIVE_PRIME => 1);
1489 Description: Gets the nearest Features to a given 'source
' Feature. The Feature returned and the format of the result
1490 are non-obvious, please read on.
1492 When looking beyond the boundaries of the source Feature, the distance is measured to the nearest end
1493 of that Feature to the nearby Feature's nearest end.
1494 If Features overlap the source Feature, then they are given a distance of zero but ordered by
1495 their proximity to the centre of the Feature.
1497 Features are found and prioritised within 1000 base pairs unless a -RANGE is given to the method. Any overlap with
1498 the search region is included, and the results can be restricted to upstream, downstream, forward strand or reverse
1500 The -FIVE_PRIME and -THREE_PRIME options allow searching for specific ends of nearby features, but still needs
1501 a -DOWN/UPSTREAM value and/or -NOT_OVERLAPPING to fulfil its most common application.
1504 Returntype : Listref containing an Arrayref of
Bio::
EnsEMBL::Feature objects and the distance
1505 [ [$feature, $distance] ... ]
1509 sub fetch_all_nearest_by_Feature{
1511 my ($ref_feature, $respect_strand, $opposite_strand, $downstream, $upstream, $search_range,$limit,$not_overlapping,$five_prime,$three_prime) =
1512 rearrange([qw(FEATURE SAME_STRAND OPPOSITE_STRAND DOWNSTREAM UPSTREAM RANGE LIMIT NOT_OVERLAPPING FIVE_PRIME THREE_PRIME)], @_);
1513 if ( !defined($search_range)) {
1514 $search_range ||= 1000;
1518 unless (defined($ref_feature) && $ref_feature->isa(
'Bio::EnsEMBL::Feature')) {
1519 throw (
'fetch_all_nearest_by_Feature method requires a valid Ensembl Feature object to operate');
1521 throw (
'Do not specify both -upstream and -downstream. This is the same as no arguments')
if ($upstream && $downstream);
1522 throw (
'Do not use both -SAME_STRAND and -OPPOSITE_STRAND in one query')
if ($opposite_strand && $respect_strand);
1523 my ($region_start,$region_end);
1525 # Define search box. Features overlapping the boundaries are found and considered
1526 my $slice = $ref_feature->feature_Slice;
1528 my $five_prime_expansion = 0;
1529 my $three_prime_expansion = 0;
1531 $five_prime_expansion = $search_range;
1532 $three_prime_expansion = $slice->length * -1;
1533 # shrinks the slice to exclude the features that do not exist beyond the end of the
1534 # considered feature
1536 elsif ($downstream) {
1537 $three_prime_expansion = $search_range;
1538 $five_prime_expansion = $slice->length * -1;
1541 $five_prime_expansion = $search_range;
1542 $three_prime_expansion = $search_range;
1545 $slice = $slice->expand( $five_prime_expansion, $three_prime_expansion);
1546 my $sa = $self->db->get_SliceAdaptor;
1547 my @candidates; # Features in the search region
1548 @candidates = @{$self->fetch_all_by_Slice($slice)};
1549 if ($respect_strand) { @candidates = grep {$_->strand == $ref_feature->strand} @candidates }
1550 if ($opposite_strand) { @candidates = grep {$_->strand != $ref_feature->strand} @candidates }
1551 # When user chooses UPSTREAM/DOWNSTREAM && FIVE_PRIME/THREE_PRIME, discount any features with
1552 # their other end in the overlap region. fetch_all_by_Slice cannot filter these out.
1553 # fetch_all_by_Slice_constraint could in principle with the correct constraint.
1554 if (($upstream && $ref_feature->strand == 1) || ($downstream && $ref_feature->strand == -1)) {
1556 @candidates = grep { ($_->strand == 1) ? $_->seq_region_start < $ref_feature->start : $_->seq_region_end < $ref_feature->start} @candidates;
1557 } elsif ($three_prime) {
1558 @candidates = grep { ($_->strand == 1) ? $_->seq_region_end < $ref_feature->start : $_->seq_region_start < $ref_feature->start} @candidates;
1561 if (($downstream && $ref_feature->strand == 1)|| ($upstream && $ref_feature->strand == -1)) {
1563 @candidates = grep { ($_->strand == 1) ? $_->seq_region_start > $ref_feature->end : $_->seq_region_end > $ref_feature->end } @candidates;
1564 } elsif ($three_prime) {
1565 @candidates = grep { ($_->strand == 1) ? $_->seq_region_end > $ref_feature->end : $_->seq_region_start > $ref_feature->end } @candidates;
1568 # Then sort and prioritise the candidates
1569 my $finalists; # = [[feature, distance, centre-weighted distance, length, dbID],..]
1570 $finalists = $self->select_nearest($ref_feature,\@candidates,$limit,$not_overlapping,$five_prime,$three_prime);
1571 $finalists = [
map { [ splice @$_,0,2 ]} @$finalists ]; # Remove the ugly bits from the sight of users.
1576 =head2 select_nearest
1578 Arg [1] : Bio::Ensembl::Feature, a Feature to find the nearest neighbouring feature to.
1579 Arg [2] : Listref of Features to be considered
for nearness.
1580 Arg [3] : Integer, limited number of Features to
return. Equally near features are all returned in spite of
this limit
1581 Arg [4] : Boolean, Overlapping prohibition. Overlapped Features are forgotten
1582 Arg [5] : Boolean, use the 5
' ends of the nearby features for distance calculation
1583 Arg [6] : Boolean, use the 3' ends of the nearby features
for distance calculation
1584 Example : $feature_list = $feature_adaptor->select_nearest($ref_feature,\@candidates,$limit,$not_overlapping)
1585 Description: Take a list of possible features, and determine which is nearest. Nearness is a
1586 tricky concept. Beware of
using the distance between Features, as it may not be the number you think
1588 Returntype : listref of Features ordered by proximity
1589 Caller : BaseFeatureAdaptor->fetch_all_nearest_by_Feature
1593 sub select_nearest {
1595 my $ref_feature = shift;
1596 my $candidates = shift;
1598 my $not_overlapping = shift;
1599 my $five_prime = shift;
1600 my $three_prime = shift;
1602 # Convert circular coordinates to linear ones for distance calculation
1603 my $ref_start = ($ref_feature->start < $ref_feature->end) ? $ref_feature->start : $ref_feature->start - $ref_feature->length; # Not ->end, in
case circular
1604 my $ref_end = $ref_feature->end;
1605 my $ref_midpoint = $self->_compute_midpoint($ref_feature);
1607 my $position_matrix = [];
1608 my $shortest_distance;
1609 my $adjusted_distance; # for when features overlap and we need another metric to order by
1611 foreach my $neighbour (@$candidates) {
1612 # rearrange coordinates into forward-stranded orientation, plus flatten circular features.
1613 my $neigh_start = ($neighbour->seq_region_start < $neighbour->seq_region_end)
1614 ? $neighbour->seq_region_start : $neighbour->seq_region_start - $neighbour->length; # Not ->end, in
case it is circular
1615 my $neigh_midpoint = $self->_compute_midpoint($neighbour);
1616 my $neigh_end = $neighbour->seq_region_end;
1618 # discard overlaps early if not required.
1619 next
if ( $not_overlapping
1621 ( $neigh_start >= $ref_start && $neigh_end <= $ref_end )
1622 || ( $neigh_end <= $ref_end && $neigh_end >= $ref_start )
1625 my @args = ($ref_start,$ref_midpoint,$ref_end,$neigh_start,$neigh_midpoint,$neigh_end,$neighbour->strand);
1626 if ($five_prime || $three_prime) {
1627 my $five = $five_prime ? 1 : 0; # Choose 5
' or 3' end
1628 ($shortest_distance,$adjusted_distance) = $self->_compute_prime_distance($five,@args);
1629 next unless ($shortest_distance || $adjusted_distance);
1632 ($shortest_distance,$adjusted_distance) = $self->_compute_nearest_end(@args);
1634 push @$position_matrix,[ $neighbour, $shortest_distance, $adjusted_distance, $neighbour->length, $neighbour->display_id] unless ($not_overlapping && $shortest_distance == 0);
1637 # Order by distance, then centre-to-centre distance, then smallest feature first, then an arbitrary ID.
1638 # $position_matrix looks like this:
1639 # [ [ $feature, closest measure of distance, size, dbID ] ]
1640 # Not proud of this line. It re-emits the original array in sorted order to favour better measures of order.
1641 my @ordered_matrix =
map { [$_->[0],$_->[1],$_->[2],$_->[3],$_->[4]] }
1642 sort { abs($a->[1]) <=> abs($b->[1]) || abs($a->[2]) <=> abs($b->[2]) || $a->[3] <=> $b->[3] || $a->[4] <=> $b->[4]} @$position_matrix;
1643 # Knock off unwanted hits. Equal distances are included, so more can be returned than asked for.
1644 @ordered_matrix = $self->_discard_excess_features_from_matrix(\@ordered_matrix,$limit);
1645 return \@ordered_matrix;
1648 =head2 _compute_nearest_end
1650 Arg [1] : Reference feature start
1651 Arg [2] : Reference feature mid-point
1652 Arg [3] : Reference feature end
1653 Arg [4] : Considered feature start
1654 Arg [5] : Considered feature mid-point
1655 Arg [6] : Considered feature end
1656 Example : $distance = $feature_adaptor->_compute_nearest_end($ref_start,$ref_midpoint,$ref_end,$f_start,$f_midpoint,$f_end)
1657 Description: For a given feature, calculate the smallest legitimate distance to a reference feature
1658 Calculate by mid-points to accommodate overlaps
1659 Returntype : Integer distance in base pairs
1660 Caller : BaseFeatureAdaptor->select_nearest()
1663 sub _compute_nearest_end {
1664 my ($self,$ref_start,$ref_midpoint,$ref_end,$f_start,$f_midpoint,$f_end) = @_;
1665 my ($effective_distance,$weighted_distance);
1666 if ($ref_start > $f_end ) {
1667 # upstream, no overlap
1668 $effective_distance = $f_end - $ref_start; # negative result
for upstream
1669 } elsif ( $ref_end < $f_start ) {
1671 $effective_distance = $f_start - $ref_end; # positive result
for downstream
1674 $effective_distance = 0;
1675 $weighted_distance = $f_midpoint - $ref_midpoint;
1677 return $effective_distance, $weighted_distance;
1680 =head2 _compute_prime_distance
1682 Arg [1] : Reference feature start
1683 Arg [2] : Reference feature mid-point
1684 Arg [3] : Reference feature end
1685 Arg [4] : Considered feature start
1686 Arg [5] : Considered feature mid-point
1687 Arg [6] : Considered feature end
1688 Arg [7] : Considered feature strand
1689 Example : $distance,$weighted_centre_distance = $feature_adaptor->_compute_prime_distance($ref_start,$ref_midpoint,$ref_end,$f_start,$f_midpoint,$f_end,$f_strand)
1690 Description: Calculate the smallest distance to the 5
' end of the considered feature
1691 Returntype : Integer distance in base pairs or a string warning that the result doesn't mean anything.
1692 Nearest 5
' and 3' features shouldn
't reside inside the reference Feature
1693 Caller : BaseFeatureAdaptor->select_nearest()
1696 sub _compute_prime_distance {
1697 my ($self,$five_prime,$ref_start,$ref_midpoint,$ref_end,$f_start,$f_midpoint,$f_end,$f_strand) = @_;
1700 my $effective_distance;
1701 my $weighted_distance;
1704 $f_coord = ($f_strand == 1) ? $f_start : $f_end;
1705 } else { # three prime end required
1706 $f_coord = ($f_strand ==1 ) ? $f_end : $f_start;
1709 if ($ref_start > $f_coord) {
1710 # n' end of feature upstream of reference
1711 $effective_distance = $f_coord - $ref_start;
1712 if ($f_end < $ref_start) {
1713 $weighted_distance = $f_coord - $ref_midpoint;
1715 } elsif ($ref_end < $f_coord && $f_start > $ref_end) {
1716 # n' end of feature downstream of reference
1717 $effective_distance = $ref_end - $f_coord;
1718 if ($f_start > $ref_end) {
1719 $weighted_distance = $f_coord - $ref_midpoint;
1721 } elsif ($ref_start < $f_coord && $ref_end > $f_coord) {
1722 # total overlap condition
1723 $effective_distance = 0;
1724 $weighted_distance = $f_coord - $ref_midpoint;
1726 # If feature's end falls outside of the reference feature, compute distance to reference midpoint.
1727 return $effective_distance, $weighted_distance;
1730 =head2 _compute_midpoint
1733 Example : $middle = $feature_adaptor->_compute_midpoint($feature);
1734 Description: Calculate the mid-point of a Feature. Used
for comparing Features that overlap each other
1735 and determining a canonical distance between two Features
for the majority of use cases.
1736 Returntype : Integer coordinate rounded down.
1737 Caller : BaseFeatureAdaptor->select_nearest()
1740 sub _compute_midpoint {
1742 my $feature = shift;
1744 my $end = $feature->seq_region_end;
1746 # consider circular slice
1747 if ($start > $end) {
1748 $midpoint = int($end + ($start - $end) / 2);
1750 $midpoint = int($start + ($end - $start) / 2);
1755 sub _discard_excess_features_from_matrix {
1758 my @ordered_matrix = @$list;
1760 return @ordered_matrix
if $#ordered_matrix == 0 || !defined($limit) || $limit > scalar @ordered_matrix;
1761 # cut off excess elements
1762 my @spares = splice @ordered_matrix, $limit, scalar @ordered_matrix - $limit;
1763 # Check nearest distance of the last member of ordered_matrix against spares and include those that match
1764 # This prevents first past the post.
1765 if (scalar @ordered_matrix > 0) {
1766 my $threshold_distance = $ordered_matrix[-1]->[1];
1768 while ($i <= $#spares && $spares[$i]->[1] == $threshold_distance) {
1769 # printf "Considering option %s, %s\n",$spares[$i]->[0]->stable_id,$spares[$i]->[1];
1770 push @ordered_matrix, $spares[$i];
1775 return @ordered_matrix;