3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
38 Abstract
class - should not be instantiated. Implementation of
39 abstract methods must be performed by subclasses.
43 This is a base adaptor
for feature adaptors. This base
class is simply a way
44 of eliminating code duplication through the implementation of methods
45 common to all feature adaptors.
51 package Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
52 use vars qw(@ISA @EXPORT);
64 @EXPORT = (@{$DBI::EXPORT_TAGS{
'sql_types'}});
66 our $SLICE_FEATURE_CACHE_SIZE = 4;
67 our $MAX_SPLIT_QUERY_SEQ_REGIONS = 3;
68 our $SILENCE_CACHE_WARNINGS = 0;
72 Arg [1] : list of args @args
73 Superclass constructor arguments
75 Description: Constructor which warns
if caching has been switched off
76 Returntype : Bio::EnsEMBL::BaseFeatureAdaptor
78 Caller : implementing subclass constructors
84 my ($class, @args) = @_;
85 my $self = $class->SUPER::new(@args);
86 if ( defined $self->db->no_cache() && $self->db->no_cache() && ! $SILENCE_CACHE_WARNINGS) {
87 warning(
"You are using the API without caching most recent features. "
88 .
"Performance might be affected." );
93 =head2 start_equals_end
95 Arg [1] : (optional)
boolean $newval
96 Example : $bfa->start_equals_end(1);
97 Description: Getter/Setter
for the start_equals_end flag. If set
98 to
true sub _slice_fetch will use a simplified sql to retrieve 1bp slices.
101 Caller : EnsemblGenomes variation DB build
106 sub start_equals_end {
107 my ( $self, $value ) = @_;
109 if ( defined($value) ) {
110 $self->{
'start_equals_end'} = $value;
112 return $self->{
'start_equals_end'};
120 $registry->get_adaptor(
'Mus musculus',
'Core',
123 $registry->get_adaptor(
'Mus musculus',
'Core',
127 $sa->fetch_by_region(
'Chromosome',
'1', 1e8,
130 my $genes = $ga->fetch_all_by_Slice($slice);
134 Description : Empties the feature cache associated with
this
139 Status : At risk (under development)
145 $self->_clear_slice_feature_cache();
146 if(!$self->_no_id_cache()) {
147 $self->_id_cache()->clear_cache();
152 sub _clear_slice_feature_cache {
154 %{$self->{_slice_feature_cache}} = ();
158 =head2 _slice_feature_cache
160 Description : Returns the feature cache
if we are allowed to cache and
161 will build it
if we need to. We will never
return a reference
162 to the hash to avoid unintentional
auto-vivfying caching
169 sub _slice_feature_cache {
171 return if $self->db()->no_cache();
172 if(! exists $self->{_slice_feature_cache}) {
173 tie my %cache,
'Bio::EnsEMBL::Utils::Cache', $SLICE_FEATURE_CACHE_SIZE;
174 $self->{_slice_feature_cache} = \%cache;
176 return $self->{_slice_feature_cache};
179 =head2 fetch_all_by_Slice
182 the slice from which to obtain features
183 Arg [2] : (optional)
string $logic_name
184 the logic name of the type of features to obtain
185 Example : $fts = $a->fetch_all_by_Slice($slice,
'Swall');
186 Description: Returns a listref of features created from the database
187 which are on the Slice defined by $slice. If $logic_name is
188 defined only features with an analysis of type $logic_name
190 NOTE: only features that are entirely on the slice
's seq_region
191 will be returned (i.e. if they hang off the start/end of a
192 seq_region they will be discarded). Features can extend over the
193 slice boundaries though (in cases where you have a slice that
194 doesn't span the whole seq_region).
195 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
202 sub fetch_all_by_Slice {
203 my ($self, $slice, $logic_name) = @_;
204 #fetch by constraint with empty constraint
205 return $self->fetch_all_by_Slice_constraint($slice,
'', $logic_name);
210 =head2 fetch_Iterator_by_Slice_method
212 Arg [1] : CODE ref of Slice fetch method
213 Arg [2] : ARRAY ref of parameters
for Slice fetch method
214 Arg [3] : Optional int: Slice index in parameters array
215 Arg [4] : Optional int: Slice chunk size. Default=500000
216 Example : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice_method
217 ($feature_adaptor->can(
'fetch_all_by_Slice_Arrays'),
218 \@fetch_method_params,
222 while(my $feature = $slice_iter->next && defined $feature){
226 Description: Creates an Iterator which chunks the query Slice to facilitate
227 large Slice queries which would have previously
run out of memory
229 Exceptions : Throws
if mandatory params not valid
235 #Does not support Collections. See Funcgen ResultFeatureAdaptor::fetch_collection_Iterator_by_Slice_method
237 sub fetch_Iterator_by_Slice_method{
238 my ($self, $slice_method_ref, $params_ref, $slice_idx, $chunk_size) = @_;
240 if(! ( defined $slice_method_ref &&
241 ref($slice_method_ref) eq
'CODE')
243 throw(
'Must pass a valid Slice fetch method CODE ref');
246 if (! ($params_ref &&
247 ref($params_ref) eq
'ARRAY')) {
248 #Don't need to check size here so long as we have valid Slice
249 throw(
'You must pass a method params ARRAYREF');
252 $slice_idx = 0
if(! defined $slice_idx);
253 my $slice = $params_ref->[$slice_idx];
254 $chunk_size ||= 1000000;
258 my $start = 1; #local coord
for sub slice
259 my $end = $slice->length;
260 my $num_overlaps = 0;
265 while (scalar(@feat_cache) == 0 &&
268 my $new_end = ($start + $chunk_size - 1);
270 if ($new_end >= $end) {
271 # this is our last chunk
276 #Chunk by sub slicing
277 my $sub_slice = $slice->sub_Slice($start, $new_end);
278 $params_ref->[$slice_idx] = $sub_slice;
279 @feat_cache = @{ $slice_method_ref->($self, @$params_ref)};
281 #Remove & count overlapping features
282 splice(@feat_cache, 0, $num_overlaps)
if($num_overlaps);
285 if (scalar(@feat_cache) > 0) {
287 my $feat_end = $feat_cache[$#feat_cache]->seq_region_end;
288 my $slice_end = $sub_slice->end;
290 for ($i = $#feat_cache; $i >=0; $i--) {
291 $feat_end = $feat_cache[$i]->seq_region_end;
293 if ($feat_end > $slice_end) {
302 # update the start coordinate
303 $start = $new_end + 1;
306 #this maybe returning from an undef cache
307 #Need to sub this out even more?
308 return shift @feat_cache;
315 =head2 fetch_Iterator_by_Slice
318 Arg [2] : Optional string: logic name of analysis
319 Arg [3] : Optional int: Chunk size to iterate over. Default is 500000
320 Example : my $slice_iter = $feature_adaptor->fetch_Iterator_by_Slice($slice);
322 while(my $feature = $slice_iter->next && defined $feature){
326 Description: Creates an Iterator which chunks the query Slice to facilitate
327 large Slice queries which would have previously
run out of memory
335 sub fetch_Iterator_by_Slice{
336 my ($self, $slice, $logic_name, $chunk_size) = @_;
338 my $method_ref = $self->can(
'fetch_all_by_Slice');
340 return $self->fetch_Iterator_by_Slice_method($method_ref, [$slice, $logic_name], 0, $chunk_size);
344 =head2 fetch_all_by_Slice_and_score
347 the slice from which to obtain features
348 Arg [2] : (optional)
float $score
349 lower bound of the the score of the features retrieved
350 Arg [3] : (optional)
string $logic_name
351 the logic name of the type of features to obtain
352 Example : $fts = $a->fetch_all_by_Slice_and_score($slice,90,
'Swall');
353 Description: Returns a list of features created from the database which are
354 are on the Slice defined by $slice and which have a score
355 greater than $score. If $logic_name is defined,
356 only features with an analysis of type $logic_name will be
358 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
365 sub fetch_all_by_Slice_and_score {
366 my ( $self, $slice, $score, $logic_name ) = @_;
369 if ( defined($score) ) {
370 # Get the synonym of the primary_table
371 my @tabs = $self->_tables();
372 my $syn = $tabs[0]->[1];
374 $constraint = sprintf(
"%s.score > %s",
376 $self->dbc()->db_handle()->quote( $score, SQL_FLOAT ) );
380 $self->fetch_all_by_Slice_constraint( $slice, $constraint,
385 =head2 fetch_all_by_Slice_constraint
388 the slice from which to obtain features
389 Arg [2] : (optional)
string $constraint
390 An SQL query constraint (i.e. part of the WHERE clause)
391 Arg [3] : (optional)
string $logic_name
392 the logic name of the type of features to obtain
393 Example : $fs = $a->fetch_all_by_Slice_constraint($slc,
'perc_ident > 5');
394 Description: Returns a listref of features created from the database which
395 are on the Slice defined by $slice and fulfill the SQL
396 constraint defined by $constraint. If logic name is defined,
397 only features with an analysis of type $logic_name will be
399 Returntype : listref of Bio::EnsEMBL::SeqFeatures in Slice coordinates
400 Exceptions : thrown
if $slice is not defined
406 sub fetch_all_by_Slice_constraint {
407 my ( $self, $slice, $constraint, $logic_name ) = @_;
411 #Pull out here as we need to remember them & reset accordingly
412 my $bind_params = $self->bind_param_generic_fetch();
415 || !( $slice->isa(
'Bio::EnsEMBL::Slice')
416 or $slice->isa(
'Bio::EnsEMBL::LRGSlice') ) )
418 throw(
"Bio::EnsEMBL::Slice argument expected.");
423 $self->_logic_name_to_constraint( $constraint, $logic_name );
425 # If the logic name was invalid, undef was returned
426 if ( !defined($constraint) ) {
return [] }
431 # Will only use feature_cache if hasn't been no_cache attribute set
433 !( defined( $self->db()->no_cache() ) && $self->db()->no_cache() ) )
436 #strain test and add to constraint if so to stop caching.
437 if ( $slice->isa(
'Bio::EnsEMBL::Variation::StrainSlice') ) {
439 $self->dbc()->db_handle()->quote( $slice->strain_name() );
441 if ( $constraint ne
"" ) {
442 $constraint .=
" AND $string = $string ";
444 $constraint .=
" $string = $string ";
448 # Check the cache and return the cached results if we have already
449 # done this query. The cache key is the made up from the slice
450 # name, the constraint, and the bound parameters (if there are any).
451 $key = uc( join(
':', $slice->name(), $constraint ) );
453 if ( defined($bind_params) ) {
455 . join(
':',
map { $_->[0] .
'/' . $_->[1] } @{$bind_params} );
458 $cache = $self->_slice_feature_cache();
459 if ( exists( $cache->{$key} ) ) {
460 # Clear the bound parameters and return the cached data.
461 $self->{
'_bind_param_generic_fetch'} = ();
462 #IMPORTANT: NEVER EVER RETURN A COPY OF THE DATA STRUCTURE.
463 # This will hold arrays of values. Since we've been doing
464 # this for so long people are expecting multiple calls
465 # to fetch_by_SliceXXXXX() methods to return the same
467 return $cache->{$key};
469 } ## end
if ( !( defined( $self...)))
472 my $proj_ref = $self->_get_and_filter_Slice_projections($slice);
473 my $bounds = $self->_generate_feature_bounds($slice);
475 # fetch features for the primary slice AND all symlinked slices
476 foreach my $seg (@{$proj_ref}) {
478 $self->_bind_param_generic_fetch($bind_params);
479 my $offset = $seg->from_start();
480 my $seg_slice = $seg->to_Slice();
482 $self->_slice_fetch( $seg_slice, $constraint );
484 # If this was a symlinked slice offset the feature coordinates as
486 if ( $seg_slice->name() ne $slice->name() ) {
489 foreach my $f ( @{$features} ) {
490 if ( $offset != 1 ) {
491 $f->{
'start'} += $offset - 1;
492 $f->{
'end'} += $offset - 1;
495 # discard boundary crossing features from symlinked regions
496 foreach my $bound (@{$bounds}) {
497 if ( $f->{
'start'} < $bound && $f->{
'end'} >= $bound ) {
502 $f->{
'slice'} = $slice;
506 push( @result, @{$features} );
508 } ## end
foreach my $seg (@proj)
510 # Will only use feature_cache when set attribute no_cache in DBAdaptor
511 if ( defined($key) ) {
512 $cache->{$key} = \@result;
516 } ## end sub fetch_all_by_Slice_constraint
519 =head2 fetch_all_by_logic_name
521 Arg [1] :
string $logic_name
522 the logic name of the type of features to obtain
523 Example : $fs = $a->fetch_all_by_logic_name(
'foobar');
524 Description: Returns a listref of features created from the database.
525 only features with an analysis of type $logic_name will
526 be returned. If the logic name is invalid (not in the
527 analysis table), a reference to an empty list will be
529 Returntype : listref of Bio::EnsEMBL::SeqFeatures
530 Exceptions : thrown
if no $logic_name
536 sub fetch_all_by_logic_name {
537 my ( $self, $logic_name ) = @_;
539 if ( !defined($logic_name) ) {
540 throw(
"Need a logic_name");
543 my $constraint = $self->_logic_name_to_constraint(
'', $logic_name );
545 if ( !defined($constraint) ) {
546 warning(
"Invalid logic name: $logic_name");
550 return $self->generic_fetch($constraint);
553 =head2 fetch_all_by_stable_id_list
555 Arg [1] :
string $logic_name
556 the logic name of the type of features to obtain
558 the slice from which to obtain features
559 Example : $fs = $a->fetch_all_by_stable_id_list([
"ENSG00001",
"ENSG00002", ...]);
560 Description: Returns a listref of features identified by their stable IDs.
561 This method only fetches features of the same type as the calling
563 Results are constrained to a slice
if the slice is provided.
565 Exceptions : thrown
if no stable ID list is provided.
571 # Adapted from BaseAdaptor->uncached_fetch_all_by_dbID_list
572 sub fetch_all_by_stable_id_list {
573 my ( $self, $id_list_ref, $slice ) = @_;
575 return $self->_uncached_fetch_all_by_id_list($id_list_ref,$slice,
"stable_id");
578 # Method that creates an object. Called by the _objs_from_sth() method
579 # in the sub-classes (the various feature adaptors). Overridden by the
580 # feature collection classes.
582 sub _create_feature {
583 my ( $self, $feature_type, $args ) = @_;
584 return $feature_type->
new( %{$args} );
587 # This is the same as the above, but calls the new_fast() constructor of
590 sub _create_feature_fast {
591 my ( $self, $feature_type, $args ) = @_;
592 return $feature_type->
new_fast($args);
595 =head2 count_by_Slice_constraint
598 Arg [2] : String Custom SQL constraint
599 Arg [3] : String Logic name to search by
600 Description : Finds all features with at least partial overlap to the given
601 slice and sums them up.
602 Explanation of workings with projections:
604 |-------------------------Constraint Slice---------------------------------|
607 | |--Segment 2, on desired Coordinate System--| |
608 | | |---Segment 3----|
609 #Feature 1# #Feature 2# #Feature 3# |
610 | #####################Feature 4#################### |
613 Feature 1 is overlapping the original constraint. Counted in Segment 1
614 Feature 2,3 and 4 are counted when inspecting Segment 2
615 Feature 5 is counted in Segment 1
620 sub count_by_Slice_constraint {
621 my ($self, $slice, $constraint, $logic_name) = @_;
623 assert_ref($slice,
'Bio::EnsEMBL::Slice',
'slice');
627 #Remember the bind params
628 my $bind_params = $self->bind_param_generic_fetch();
631 my @tables = $self->_tables;
632 my ($table_name, $table_synonym) = @{ $tables[0] };
634 # Basic constraints limit the feature hits to the starting Slice constraint
636 $constraint = $self->_logic_name_to_constraint( $constraint, $logic_name );
638 return $count
if ! defined $constraint;
641 my $sa = $slice->adaptor();
642 my $projections = $self->_get_and_filter_Slice_projections($slice);
644 my $segment_count = @{$projections};
645 #Manual loop to support look-ahead/behind
646 for (my $i = 0; $i < $segment_count; $i++) {
647 my $seg = $projections->[$i];
648 my $seg_slice = $seg->to_Slice();
649 $self->_bind_param_generic_fetch($bind_params);
651 # We cannot filter boundary crossing features in code, so we constrain the
652 # SQL. We detect when we are not on the original query Slice and manually
653 # filter by the segment Slice's start and end. If we are on "earlier" (5')
654 # projection segments, we only limit by the *end* and when we are on the later
655 # projection segments, we filter by the *start*.
657 # This is the same as fetch_all_by_Slice_constraint()'s in-memory filtering
658 # except we need to alter projected features in that code with an offset
659 my $local_constraint = $constraint;
660 if ( $seg_slice->name() ne $slice->name() ) {
661 my ($limit_start, $limit_end) = (1,1); #fully constrained by
default, flags are reset every iteration
664 } elsif ($i == ($segment_count - 1)) {
665 $limit_end = 0; #don
't check end as we are on the final projection
667 $local_constraint .= ' AND
' if $local_constraint;
671 # Restrict features to the end of this projection segment when after the named segment
672 push(@conditionals, sprintf('%1$s.seq_region_end <= %2$d
', $table_synonym, $seg_slice->end));
675 #Do not cross the start boundary on this projection segment
676 push(@conditionals, sprintf('%1$s.seq_region_start >= %2$d
', $table_synonym, $seg_slice->start));
679 $local_constraint .= join(q{ AND }, @conditionals);
682 my $count_array = $self->_get_by_Slice($seg_slice, $local_constraint, 'count
');
683 $count += $_ for @$count_array;
689 =head2 _get_and_filter_Slice_projections
691 Arg [1] : Bio::EnsEMBL::Slice
692 Description : Delegates onto SliceAdaptor::fetch_normalized_slice_projection()
694 Returntype : ArrayRef Bio::EnsEMBL::ProjectionSegment; Returns an array
695 of projected segments
698 sub _get_and_filter_Slice_projections {
699 my ($self, $slice) = @_;
700 my $sa = $slice->adaptor();
701 my $filter_projections = 1;
702 return $sa->fetch_normalized_slice_projection($slice, $filter_projections);
705 =head2 _generate_feature_bounds
707 Arg [1] : Bio::EnsEMBL::Slice
708 Description : Performs a projection of Slice and records the bounds
709 of that projection. This can be used later on to restrict
710 Features which overlap into unwanted areas such as
711 regions which exist on another HAP/PAR region.
713 Bounds are defined as projection_start - slice_start + 1.
714 Example : my $bounds = $self->_generate_feature_bounds($slice);
715 Returntype : ArrayRef Integer; Returns the location of the bounds.
718 sub _generate_feature_bounds {
719 my ($self, $slice) = @_;
720 my $sa = $slice->adaptor();
721 # construct list of Hap/PAR boundaries for entire seq region
724 my $sr_id = $slice->get_seq_region_id();
725 my $ent_slice = $sa->fetch_by_seq_region_id($sr_id);
726 if ( $slice->strand() == -1 ) {
727 $ent_slice = $ent_slice->invert();
730 my @ent_proj = @{ $sa->fetch_normalized_slice_projection($ent_slice) };
731 shift(@ent_proj); # skip first; 1st does not have bounds normally; may change if we ever have a patch a pos 1
733 @bounds = map { $_->from_start() - $slice->start() + 1 } @ent_proj;
739 Arg [0] : Bio::EnsEMBL::Slice to find all the features within
740 Arg [1] : SQL constraint string
741 Arg [2] : Type of query to run. Default behaviour is to select, but
742 'count
' is also valid
743 Description: Abstracted logic from _slice_fetch
744 Returntype : Listref of Bio::EnsEMBL::Feature, or integers for counting mode
748 my ($self, $slice, $orig_constraint, $query_type) = @_;
750 # features can be scattered across multiple coordinate systems
751 my @tables = $self->_tables;
752 my ($table_name, $table_synonym) = @{ $tables[0] };
754 my @feature_coord_systems;
756 my $meta_values = $self->db->get_MetaContainer->list_value_by_key( $table_name."build.level");
757 if ( @$meta_values and $slice->is_toplevel() ) {
758 push @feature_coord_systems, $slice->coord_system();
760 @feature_coord_systems = @{ $self->db->get_MetaCoordContainer->fetch_all_CoordSystems_by_feature_type($table_name)};
763 my $assembly_mapper_adaptor = $self->db->get_AssemblyMapperAdaptor();
764 my @pan_coord_features;
766 COORD_SYSTEM: foreach my $coord_system (@feature_coord_systems) {
767 my @query_accumulator;
768 # Build up a combination of query constraints that will quickly establish the result set
769 my $constraint = $orig_constraint;
770 if ( $coord_system->equals( $slice->coord_system ) ) {
771 my $max_len = $self->_max_feature_length
772 || $self->db->get_MetaCoordContainer
773 ->fetch_max_length_by_CoordSystem_feature_type( $coord_system,$table_name );
776 if ( $slice->adaptor ) {
777 $seq_region_id = $slice->adaptor->get_seq_region_id($slice);
779 $seq_region_id = $self->db->get_SliceAdaptor->get_seq_region_id($slice);
782 my @seq_region_ids = ($seq_region_id);
784 my $ext_seq_region_id = $self->get_seq_region_id_external($seq_region_id);
786 if ( $ext_seq_region_id == $seq_region_id ) { last }
788 push( @seq_region_ids, $ext_seq_region_id );
789 $seq_region_id = $ext_seq_region_id;
792 $constraint .= " AND " if ($constraint);
794 $constraint .= ${table_synonym}.".seq_region_id IN (". join( ',
', @seq_region_ids ) . ") AND ";
796 #faster query for 1bp slices where SNP data is not compressed
797 if ( $self->start_equals_end && $slice->start == $slice->end ) {
798 $constraint .= " AND ".$table_synonym.".seq_region_start = ".$slice->end .
799 " AND ".$table_synonym.".seq_region_end = ".$slice->start;
802 if ( !$slice->is_circular() ) {
803 # Deal with the default case of a non-circular chromosome.
804 $constraint .= $table_synonym.".seq_region_start <= ".$slice->end." AND "
805 .$table_synonym.".seq_region_end >= ".$slice->start;
808 my $min_start = $slice->start - $max_len;
809 $constraint .= " AND ".$table_synonym.".seq_region_start >= ".$min_start;
813 # Deal with the case of a circular chromosome.
814 if ( $slice->start > $slice->end ) {
815 $constraint .= " ( ".$table_synonym.".seq_region_start >= ".$slice->start
816 . " OR ".$table_synonym.".seq_region_start <= ".$slice->end
817 . " OR ".$table_synonym.".seq_region_end >= ".$slice->start
818 . " OR ".$table_synonym.".seq_region_end <= ".$slice->end
819 . " OR ".$table_synonym.".seq_region_start > ".$table_synonym.".seq_region_end)";
821 $constraint .= " ((".$table_synonym.".seq_region_start <= ".$slice->end
822 . " AND ".$table_synonym.".seq_region_end >= ".$slice->start.") "
823 . "OR (".$table_synonym.".seq_region_start > ".$table_synonym.".seq_region_end"
824 . " AND (".$table_synonym.".seq_region_start <= ".$slice->end
825 . " OR ".$table_synonym.".seq_region_end >= ".$slice->start.")))";
829 push @query_accumulator, [$constraint,undef,$slice]; # $mapper intentionally absent here.
832 #coordinate systems do not match
833 $mapper = $assembly_mapper_adaptor->fetch_by_CoordSystems( $slice->coord_system(), $coord_system );
834 next unless defined $mapper;
836 # Get list of coordinates and corresponding internal ids for
837 # regions the slice spans
838 my @coords = $mapper->map( $slice->seq_region_name, $slice->start, $slice->end,
839 $slice->strand, $slice->coord_system );
843 next COORD_SYSTEM if ( !@coords );
845 my @ids = map { $_->id() } @coords;
846 #coords are now id rather than name
849 && $slice->coord_system->name() ne 'lrg
') {
850 $constraint = $orig_constraint;
851 my $id_str = join( ',
', @ids );
852 $constraint .= " AND " if ($constraint);
853 $constraint .= $table_synonym.".seq_region_id IN ($id_str)";
855 push @query_accumulator, [$constraint,$mapper,$slice];
858 $self->_max_feature_length()
859 || $self->db->get_MetaCoordContainer
860 ->fetch_max_length_by_CoordSystem_feature_type($coord_system, $table_name)
863 my $length = @coords;
864 for ( my $i = 0; $i < $length; $i++ ) {
865 $constraint = $orig_constraint;
866 $constraint .= " AND " if ($constraint);
867 $constraint .= $table_synonym.".seq_region_id = "
869 . $table_synonym.".seq_region_start <= "
870 . $coords[$i]->end() . " AND "
871 . $table_synonym.".seq_region_end >= "
872 . $coords[$i]->start();
875 my $min_start = $coords[$i]->start() - $max_len;
876 $constraint .= " AND ".$table_synonym.".seq_region_start >= ".$min_start;
879 push @query_accumulator, [$constraint,$mapper,$slice];
880 } # end multi-query cycle
883 } # end else (coord sytems not matching)
885 #Record the bind params if we have to do multiple queries
886 my $bind_params = $self->bind_param_generic_fetch();
888 foreach my $query (@query_accumulator) {
889 my ($local_constraint,$local_mapper,$local_slice) = @$query;
890 $self->_bind_param_generic_fetch($bind_params);
891 if ($query_type and $query_type eq 'count
') {
892 push @pan_coord_features, $self->generic_count($local_constraint);
895 my $features = $self->generic_fetch( $local_constraint, $local_mapper, $local_slice );
896 $features = $self->_remap( $features, $local_mapper, $local_slice );
897 push @pan_coord_features, @$features;
902 $self->{_bind_param_generic_fetch} = undef;
903 return \@pan_coord_features;
906 # helper function used by fetch_all_by_Slice_constraint method
909 my ( $self, $slice, $orig_constraint ) = @_;
911 my $features = $self->_get_by_Slice($slice,$orig_constraint);
914 } ## end sub _slice_fetch
917 #for a given seq_region_id, gets the one used in an external database, if present, otherwise, returns the internal one
918 sub get_seq_region_id_external {
919 my ( $self, $sr_id ) = @_;
920 my $cs_a = $self->db()->get_CoordSystemAdaptor();
921 return ( exists( $cs_a->{'_internal_seq_region_mapping
'}->{$sr_id} )
922 ? $cs_a->{'_internal_seq_region_mapping
'}->{$sr_id}
926 #for a given seq_region_id and coord_system, gets the one used in the internal (core) database
927 sub get_seq_region_id_internal{
928 my ( $self, $sr_id ) = @_;
929 my $cs_a = $self->db()->get_CoordSystemAdaptor();
930 return ( exists $cs_a->{'_external_seq_region_mapping
'}->{$sr_id}
931 ? $cs_a->{'_external_seq_region_mapping
'}->{$sr_id}
936 # Helper function containing some common feature storing functionality
938 # Given a Feature this will return a copy (or the same feature if no changes
939 # to the feature are needed) of the feature which is relative to the start
940 # of the seq_region it is on. The seq_region_id of the seq_region it is on
943 # This method will also ensure that the database knows which coordinate
944 # systems that this feature is stored in.
952 throw('Expected Feature argument.
');
954 my $slice = $feature->slice();
956 $self->_check_start_end_strand($feature->start(),$feature->end(),
957 $feature->strand(), $slice);
960 my $db = $self->db();
962 my $slice_adaptor = $db->get_SliceAdaptor();
965 throw('Feature must be attached to Slice to be stored.
');
968 # make sure feature coords are relative to start of entire seq_region
970 if($slice->start != 1 || $slice->strand != 1) {
971 #move feature onto a slice of the entire seq_region
972 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
973 $slice->seq_region_name(),
977 $slice->coord_system->version());
979 $feature = $feature->transfer($slice);
982 throw('Could not transfer Feature to slice of
' .
983 'entire seq_region prior to storing
');
987 # Ensure this type of feature is known to be stored in this coord system.
988 my $cs = $slice->coord_system;
990 my ($tab) = $self->_tables();
991 my $tabname = $tab->[0];
993 my $mcc = $db->get_MetaCoordContainer();
995 $mcc->add_feature_type($cs, $tabname, $feature->length);
997 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
999 if(!$seq_region_id) {
1000 throw('Feature is associated with seq_region which is not in
this DB.
');
1003 return ($feature, $seq_region_id);
1007 # The same function as _pre_store
1008 # This one is used to store user uploaded features in XXX_userdata db
1010 sub _pre_store_userdata {
1012 my $feature = shift;
1014 if(!ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature
')) {
1015 throw('Expected Feature argument.
');
1018 my $slice = $feature->slice();
1019 my $slice_adaptor = $slice->adaptor;
1021 $self->_check_start_end_strand($feature->start(),$feature->end(),
1022 $feature->strand(), $slice);
1026 throw('Feature must be attached to Slice to be stored.
');
1029 # make sure feature coords are relative to start of entire seq_region
1031 if($slice->start != 1 || $slice->strand != 1) {
1032 #move feature onto a slice of the entire seq_region
1033 $slice = $slice_adaptor->fetch_by_region($slice->coord_system->name(),
1034 $slice->seq_region_name(),
1038 $slice->coord_system->version());
1040 $feature = $feature->transfer($slice);
1043 throw('Could not transfer Feature to slice of
' .
1044 'entire seq_region prior to storing
');
1048 # Ensure this type of feature is known to be stored in this coord system.
1049 my $cs = $slice->coord_system;
1051 my ($tab) = $self->_tables();
1052 my $tabname = $tab->[0];
1055 my $mcc = $db->get_MetaCoordContainer();
1057 $mcc->add_feature_type($cs, $tabname, $feature->length);
1059 my $seq_region_id = $slice_adaptor->get_seq_region_id($slice);
1061 if(!$seq_region_id) {
1062 throw('Feature is associated with seq_region which is not in
this DB.
');
1065 return ($feature, $seq_region_id);
1070 # helper function used to validate start/end/strand and
1071 # hstart/hend/hstrand etc.
1073 sub _check_start_end_strand {
1074 my ($self, $start, $end, $strand, $slice) = @_;
1077 # Make sure that the start, end, strand are valid
1079 if(int($start) != $start) {
1080 throw("Invalid Feature start [$start]. Must be integer.");
1082 if(int($end) != $end) {
1083 throw("Invalid Feature end [$end]. Must be integer.");
1085 if(int($strand) != $strand || $strand < -1 || $strand > 1) {
1086 throw("Invalid Feature strand [$strand]. Must be -1, 0 or 1.");
1088 if($end < $start && ! (defined $slice && $slice->is_circular())) {
1089 throw("Invalid Feature start/end [$start/$end]. Start must be less " .
1090 "than or equal to end.");
1098 # Given a list of features checks if they are in the correct coord system
1099 # by looking at the first features slice. If they are not then they are
1100 # converted and placed on the slice.
1103 my ( $self, $features, $mapper, $slice ) = @_;
1105 #check if any remapping is actually needed
1106 if(@$features && (!$features->[0]->isa('Bio::EnsEMBL::Feature
') ||
1107 $features->[0]->slice == $slice)) {
1111 #remapping has not been done, we have to do our own conversion from
1116 my $slice_start = $slice->start();
1117 my $slice_end = $slice->end();
1118 my $slice_strand = $slice->strand();
1119 my $slice_cs = $slice->coord_system();
1121 my ($seq_region_id, $start, $end, $strand);
1123 my $slice_seq_region_id = $slice->get_seq_region_id();
1125 foreach my $f (@$features) {
1126 #since feats were obtained in contig coords, attached seq is a contig
1127 my $fslice = $f->slice();
1129 throw("Feature does not have attached slice.\n");
1131 my $fseq_region_id = $fslice->get_seq_region_id();
1132 my $fcs = $fslice->coord_system();
1134 if(!$slice_cs->equals($fcs)) {
1135 #slice of feature in different coord system, mapping required
1136 # $seq_region comes out as an integer not a string
1137 ($seq_region_id, $start, $end, $strand) =
1138 $mapper->fastmap($fslice->seq_region_name(),$f->start(),$f->end(),$f->strand(),$fcs);
1140 # undefined start means gap
1141 next if(!defined $start);
1143 $start = $f->start();
1145 $strand = $f->strand();
1146 $seq_region_id = $fseq_region_id;
1149 # maps to region outside desired area
1150 next if ($start > $slice_end) || ($end < $slice_start) ||
1151 ($slice_seq_region_id != $seq_region_id);
1153 #shift the feature start, end and strand in one call
1154 if($slice_strand == -1) {
1155 $f->move( $slice_end - $end + 1, $slice_end - $start + 1, $strand * -1 );
1157 $f->move( $start - $slice_start + 1, $end - $slice_start + 1, $strand );
1169 # get seq region boundary (start|end) for a feature
1170 # the method attempts to retrieve the boundary directly from the db
1171 # return undef if cannot determine the associated feature table
1173 sub _seq_region_boundary_from_db {
1174 my ($self, $feature, $boundary) = @_;
1177 throw('Expected Feature argument.
');
1180 throw "Undefined boundary"
1181 unless defined $boundary;
1183 $boundary eq 'start' or $boundary eq 'end
'
1184 or throw "Wrong boundary: select start|end";
1186 $boundary = 'seq_region_
' . $boundary;
1188 my $sql_helper = $self->dbc->sql_helper;
1189 throw "Unable to get SqlHelper instance" unless defined $sql_helper;
1191 my $feature_table = ($self->_tables)[0]->[0];
1192 warning (sprintf "Unable to get %s for %s instance\nCould not find associated feature table, returning undef", $boundary, ref $feature)
1193 and return undef unless defined $feature_table;
1195 my $db_id = $feature->dbID;
1196 my $attrib_id = $feature_table . '_id
';
1197 my $query = "SELECT ${boundary} from ${feature_table} WHERE ${attrib_id} = ${db_id}";
1199 return $sql_helper->execute_single_result(-SQL => $query);
1204 Arg [1] : list of Bio::EnsEMBL::SeqFeature
1205 Example : $adaptor->store(@feats);
1206 Description: ABSTRACT Subclasses are responsible for implementing this
1207 method. It should take a list of features and store them in
1210 Exceptions : thrown method is not implemented by subclass
1220 throw("Abstract method store not defined by implementing subclass\n");
1226 Arg [1] : A feature $feature
1227 Example : $feature_adaptor->remove($feature);
1228 Description: This removes a feature from the database. The table the
1229 feature is removed from is defined by the abstract method
1230 _tablename, and the primary key of the table is assumed
1231 to be _tablename() . '_id
'. The feature argument must
1232 be an object implementing the dbID method, and for the
1233 feature to be removed from the database a dbID value must
1236 Exceptions : thrown if $feature arg does not implement dbID(), or if
1237 $feature->dbID is not a true value
1245 my ($self, $feature) = @_;
1248 throw('Feature argument is required
');
1251 if(!$feature->is_stored($self->db)) {
1252 throw("This feature is not stored in this database");
1255 my @tabs = $self->_tables;
1256 my ($table) = @{$tabs[0]};
1258 my $sth = $self->prepare("DELETE FROM $table WHERE ${table}_id = ?");
1259 $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
1262 #unset the feature dbID ad adaptor
1263 $feature->dbID(undef);
1264 $feature->adaptor(undef);
1270 =head2 remove_by_Slice
1272 Arg [1] : Bio::Ensembl::Slice $slice
1273 Example : $feature_adaptor->remove_by_Slice($slice);
1274 Description: This removes features from the database which lie on a region
1275 represented by the passed in slice. Only features which are
1276 fully contained by the slice are deleted; features which overlap
1277 the edge of the slice are not removed.
1278 The table the features are removed from is defined by
1279 the abstract method_tablename.
1281 Exceptions : thrown if no slice is supplied
1287 sub remove_by_Slice {
1288 my ($self, $slice) = @_;
1291 throw("Slice argument is required");
1294 my @tabs = $self->_tables;
1295 my ($table_name) = @{$tabs[0]};
1297 my $seq_region_id = $self->db->get_SliceAdaptor->get_seq_region_id($slice);
1298 my $start = $slice->start();
1299 my $end = $slice->end();
1302 # Delete only features fully on the slice, not overlapping ones
1304 my $sth = $self->prepare("DELETE FROM $table_name " .
1305 "WHERE seq_region_id = ? " .
1306 "AND seq_region_start >= ? " .
1307 "AND seq_region_end <= ?");
1309 $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
1310 $sth->bind_param(2,$start,SQL_INTEGER);
1311 $sth->bind_param(3,$end,SQL_INTEGER);
1318 # Internal function. Allows the max feature length which is normally
1319 # retrieved from the meta_coord table to be overridden. This allows
1320 # for some significant optimizations to be put in when it is known
1321 # that requested features will not be over a certain size.
1323 sub _max_feature_length {
1325 return $self->{'_max_feature_length
'} = shift if(@_);
1326 return $self->{'_max_feature_length
'};
1331 # Lists all seq_region_ids that a particular feature type is found on.
1332 # Useful e.g. for finding out which seq_regions have genes.
1333 # Returns a listref of seq_region_ids.
1335 sub _list_seq_region_ids {
1336 my ($self, $table) = @_;
1346 WHERE sr.seq_region_id = a.seq_region_id
1347 AND sr.coord_system_id = cs.coord_system_id
1348 AND cs.species_id = ?);
1350 my $sth = $self->prepare($sql);
1352 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1356 while (my ($id) = $sth->fetchrow) {
1366 sub remove_by_analysis_id {
1367 my ($self, $analysis_id) = @_;
1369 $analysis_id or throw("Must call with analysis id");
1371 my @tabs = $self->_tables;
1372 my ($tablename) = @{$tabs[0]};
1374 my $sql = "DELETE FROM $tablename WHERE analysis_id = $analysis_id";
1375 # warn "SQL : $sql";
1377 my $sth = $self->prepare($sql);
1382 sub remove_by_feature_id {
1383 my ($self, $features_list) = @_;
1385 my @feats = @$features_list or throw("Must call store with features");
1387 my @tabs = $self->_tables;
1388 my ($tablename) = @{$tabs[0]};
1390 my $sql = sprintf "DELETE FROM $tablename WHERE ${tablename}_id IN (%s)", join ',
', @feats;
1391 # warn "SQL : $sql";
1393 my $sth = $self->prepare($sql);
1398 =head2 fetch_nearest_by_Feature
1400 Arg [1] : Reference Feature to start the search from
1401 Description: Searches iteratively outward from the starting feature until a nearby Feature is found
1402 If you require more than one result or more control of which features are returned, see
1403 fetch_all_nearest_by_Feature and fetch_all_by_outward_search. fetch_nearest_by_Feature
1404 is a convenience method.
1405 ReturnType : Bio::EnsEMBL::Feature
1408 sub fetch_nearest_by_Feature {
1410 my $feature = shift;
1411 my @hits = @{ $self->fetch_all_by_outward_search(-FEATURE => $feature, -MAX_RANGE => 1000000, -RANGE => 1000, -LIMIT => 1) };
1412 return $hits[0] if (scalar @hits > 0);
1417 =head2 fetch_all_by_outward_search
1419 Arguments the same as fetch_all_nearest_by_Feature
1420 Arg [0] : -MAX_RANGE : Set an upper limit on the search range, defaults to 10000 bp
1421 Arg [1] : -FEATURE ,Bio::EnsEMBL::Feature : 'Source
' Feature to anchor the search for nearest Features
1422 Arg [2] : -SAME_STRAND, Boolean (optional) : Respect the strand of the source Feature with ref, only
1423 returning Features on the same strand.
1424 Arg [3] : -OPPOSITE_STRAND, Boolean (optional) : Find features on the opposite strand of the same
1425 Arg [4] : -DOWNSTREAM/-UPSTREAM, (optional) : Search ONLY downstream or upstream from the source Feature.
1426 Can be omitted for searches in both directions.
1427 Arg [5] : -RANGE, Int : The size of the space to search for Features. Defaults to 1000 as a sensible starting point
1428 Arg [6] : -NOT_OVERLAPPING, Boolean (optional) : Do not return Features that overlap the source Feature
1429 Arg [7] : -FIVE_PRIME, Boolean (optional) : Determine range to a Feature by the 5' end, respecting strand
1430 Arg [8] : -THREE_PRIME, Boolean (optional): Determine range to a Feature by the 3
' end, respecting strand
1431 Arg [9] : -LIMIT, Int : The maximum number of Features to return, defaulting to one. Equally near features are all returned
1433 Description: Searches for features within the suggested -RANGE, and if it finds none, expands the search area
1434 until it satisfies -LIMIT or hits -MAX_RANGE. Useful if you don't know how far away the features
1435 might be, or if dealing with areas of high feature density. In the case of Variation Features, it is
1436 conceivable that a 2000 bp window might contain very many features, resulting in a slow and bloated
1437 response, thus the ability to explore outward in smaller sections can be useful.
1438 Returntype : Listref of [$feature,$distance]
1441 sub fetch_all_by_outward_search {
1443 my ($ref_feature, $respect_strand, $opposite_strand, $downstream, $upstream, $start_search_range,
1444 $limit,$not_overlapping,$five_prime,$three_prime, $max_range) =
1445 rearrange([qw(FEATURE SAME_STRAND OPPOSITE_STRAND DOWNSTREAM UPSTREAM RANGE LIMIT NOT_OVERLAPPING FIVE_PRIME THREE_PRIME MAX_RANGE)], @_);
1448 $start_search_range ||= 1000;
1449 my $current_search_range = $start_search_range;
1450 $max_range ||= 10000;
1452 while (scalar @results < $limit && $current_search_range < $max_range) {
1453 $current_search_range = $start_search_range * $factor;
1455 $self->fetch_all_nearest_by_Feature(-RANGE => $current_search_range,
1456 -FEATURE => $ref_feature,
1457 -SAME_STRAND => $respect_strand,
1458 -OPPOSITE_STRAND => $opposite_strand,
1459 -DOWNSTREAM => $downstream,
1460 -UPSTREAM => $upstream,
1462 -NOT_OVERLAPPING => $not_overlapping,
1463 -FIVE_PRIME => $five_prime,
1464 -THREE_PRIME => $three_prime,
1472 =head2 fetch_all_nearest_by_Feature
1474 Arg [1] : -FEATURE ,
Bio::EnsEMBL::Feature :
'Source' Feature to anchor the search
for nearest Features
1475 Arg [2] : -SAME_STRAND, Boolean (optional): Respect the strand of the source Feature with ref, only
1476 returning Features on the same strand
1477 Arg [3] : -OPPOSITE_STRAND, Boolean (optional) : Find features on the opposite strand of the same
1478 Arg [4] : -DOWNSTREAM/-UPSTREAM, (optional) : Search ONLY downstream or upstream from the source Feature.
1479 Can be omitted for searches in both directions.
1480 Arg [5] : -RANGE, Int : The size of the space to search for Features. Defaults to 1000 as a sensible starting point
1481 Arg [6] : -NOT_OVERLAPPING, Boolean (optional) : Do not return Features that overlap the source Feature
1482 Arg [7] : -FIVE_PRIME, Boolean (optional) : Determine range to a Feature by its 5
' end, respecting strand
1483 Arg [8] : -THREE_PRIME, Boolean (optional): Determine range to a Feature by its 3' end, respecting strand
1484 Arg [9] : -LIMIT, Int : The maximum number of Features to return, defaulting to one. Equally near features are all returned
1485 Example : #To fetch the gene(s) with the nearest 5
' end:
1486 $genes = $gene_adaptor->fetch_all_nearest_by_Feature(-FEATURE => $feat, -FIVE_PRIME => 1);
1488 Description: Gets the nearest Features to a given 'source
' Feature. The Feature returned and the format of the result
1489 are non-obvious, please read on.
1491 When looking beyond the boundaries of the source Feature, the distance is measured to the nearest end
1492 of that Feature to the nearby Feature's nearest end.
1493 If Features overlap the source Feature, then they are given a distance of zero but ordered by
1494 their proximity to the centre of the Feature.
1496 Features are found and prioritised within 1000 base pairs unless a -RANGE is given to the method. Any overlap with
1497 the search region is included, and the results can be restricted to upstream, downstream, forward strand or reverse
1499 The -FIVE_PRIME and -THREE_PRIME options allow searching for specific ends of nearby features, but still needs
1500 a -DOWN/UPSTREAM value and/or -NOT_OVERLAPPING to fulfil its most common application.
1503 Returntype : Listref containing an Arrayref of
Bio::
EnsEMBL::Feature objects and the distance
1504 [ [$feature, $distance] ... ]
1508 sub fetch_all_nearest_by_Feature{
1510 my ($ref_feature, $respect_strand, $opposite_strand, $downstream, $upstream, $search_range,$limit,$not_overlapping,$five_prime,$three_prime) =
1511 rearrange([qw(FEATURE SAME_STRAND OPPOSITE_STRAND DOWNSTREAM UPSTREAM RANGE LIMIT NOT_OVERLAPPING FIVE_PRIME THREE_PRIME)], @_);
1512 if ( !defined($search_range)) {
1513 $search_range ||= 1000;
1517 unless (defined($ref_feature) && $ref_feature->isa(
'Bio::EnsEMBL::Feature')) {
1518 throw (
'fetch_all_nearest_by_Feature method requires a valid Ensembl Feature object to operate');
1520 throw (
'Do not specify both -upstream and -downstream. This is the same as no arguments')
if ($upstream && $downstream);
1521 throw (
'Do not use both -SAME_STRAND and -OPPOSITE_STRAND in one query')
if ($opposite_strand && $respect_strand);
1522 my ($region_start,$region_end);
1524 # Define search box. Features overlapping the boundaries are found and considered
1525 my $slice = $ref_feature->feature_Slice;
1527 my $five_prime_expansion = 0;
1528 my $three_prime_expansion = 0;
1530 $five_prime_expansion = $search_range;
1531 $three_prime_expansion = $slice->length * -1;
1532 # shrinks the slice to exclude the features that do not exist beyond the end of the
1533 # considered feature
1535 elsif ($downstream) {
1536 $three_prime_expansion = $search_range;
1537 $five_prime_expansion = $slice->length * -1;
1540 $five_prime_expansion = $search_range;
1541 $three_prime_expansion = $search_range;
1544 $slice = $slice->expand( $five_prime_expansion, $three_prime_expansion);
1545 my $sa = $self->db->get_SliceAdaptor;
1546 my @candidates; # Features in the search region
1547 @candidates = @{$self->fetch_all_by_Slice($slice)};
1548 if ($respect_strand) { @candidates = grep {$_->strand == $ref_feature->strand} @candidates }
1549 if ($opposite_strand) { @candidates = grep {$_->strand != $ref_feature->strand} @candidates }
1550 # When user chooses UPSTREAM/DOWNSTREAM && FIVE_PRIME/THREE_PRIME, discount any features with
1551 # their other end in the overlap region. fetch_all_by_Slice cannot filter these out.
1552 # fetch_all_by_Slice_constraint could in principle with the correct constraint.
1553 if (($upstream && $ref_feature->strand == 1) || ($downstream && $ref_feature->strand == -1)) {
1555 @candidates = grep { ($_->strand == 1) ? $_->seq_region_start < $ref_feature->start : $_->seq_region_end < $ref_feature->start} @candidates;
1556 } elsif ($three_prime) {
1557 @candidates = grep { ($_->strand == 1) ? $_->seq_region_end < $ref_feature->start : $_->seq_region_start < $ref_feature->start} @candidates;
1560 if (($downstream && $ref_feature->strand == 1)|| ($upstream && $ref_feature->strand == -1)) {
1562 @candidates = grep { ($_->strand == 1) ? $_->seq_region_start > $ref_feature->end : $_->seq_region_end > $ref_feature->end } @candidates;
1563 } elsif ($three_prime) {
1564 @candidates = grep { ($_->strand == 1) ? $_->seq_region_end > $ref_feature->end : $_->seq_region_start > $ref_feature->end } @candidates;
1567 # Then sort and prioritise the candidates
1568 my $finalists; # = [[feature, distance, centre-weighted distance, length, dbID],..]
1569 $finalists = $self->select_nearest($ref_feature,\@candidates,$limit,$not_overlapping,$five_prime,$three_prime);
1570 $finalists = [
map { [ splice @$_,0,2 ]} @$finalists ]; # Remove the ugly bits from the sight of users.
1575 =head2 select_nearest
1577 Arg [1] : Bio::Ensembl::Feature, a Feature to find the nearest neighbouring feature to.
1578 Arg [2] : Listref of Features to be considered
for nearness.
1579 Arg [3] : Integer, limited number of Features to
return. Equally near features are all returned in spite of
this limit
1580 Arg [4] : Boolean, Overlapping prohibition. Overlapped Features are forgotten
1581 Arg [5] : Boolean, use the 5
' ends of the nearby features for distance calculation
1582 Arg [6] : Boolean, use the 3' ends of the nearby features
for distance calculation
1583 Example : $feature_list = $feature_adaptor->select_nearest($ref_feature,\@candidates,$limit,$not_overlapping)
1584 Description: Take a list of possible features, and determine which is nearest. Nearness is a
1585 tricky concept. Beware of
using the distance between Features, as it may not be the number you think
1587 Returntype : listref of Features ordered by proximity
1588 Caller : BaseFeatureAdaptor->fetch_all_nearest_by_Feature
1592 sub select_nearest {
1594 my $ref_feature = shift;
1595 my $candidates = shift;
1597 my $not_overlapping = shift;
1598 my $five_prime = shift;
1599 my $three_prime = shift;
1601 # Convert circular coordinates to linear ones for distance calculation
1602 my $ref_start = ($ref_feature->start < $ref_feature->end) ? $ref_feature->start : $ref_feature->start - $ref_feature->length; # Not ->end, in
case circular
1603 my $ref_end = $ref_feature->end;
1604 my $ref_midpoint = $self->_compute_midpoint($ref_feature);
1606 my $position_matrix = [];
1607 my $shortest_distance;
1608 my $adjusted_distance; # for when features overlap and we need another metric to order by
1610 foreach my $neighbour (@$candidates) {
1611 # rearrange coordinates into forward-stranded orientation, plus flatten circular features.
1612 my $neigh_start = ($neighbour->seq_region_start < $neighbour->seq_region_end)
1613 ? $neighbour->seq_region_start : $neighbour->seq_region_start - $neighbour->length; # Not ->end, in
case it is circular
1614 my $neigh_midpoint = $self->_compute_midpoint($neighbour);
1615 my $neigh_end = $neighbour->seq_region_end;
1617 # discard overlaps early if not required.
1618 next
if ( $not_overlapping
1620 ( $neigh_start >= $ref_start && $neigh_end <= $ref_end )
1621 || ( $neigh_end <= $ref_end && $neigh_end >= $ref_start )
1624 my @args = ($ref_start,$ref_midpoint,$ref_end,$neigh_start,$neigh_midpoint,$neigh_end,$neighbour->strand);
1625 if ($five_prime || $three_prime) {
1626 my $five = $five_prime ? 1 : 0; # Choose 5
' or 3' end
1627 ($shortest_distance,$adjusted_distance) = $self->_compute_prime_distance($five,@args);
1628 next unless ($shortest_distance || $adjusted_distance);
1631 ($shortest_distance,$adjusted_distance) = $self->_compute_nearest_end(@args);
1633 push @$position_matrix,[ $neighbour, $shortest_distance, $adjusted_distance, $neighbour->length, $neighbour->display_id] unless ($not_overlapping && $shortest_distance == 0);
1636 # Order by distance, then centre-to-centre distance, then smallest feature first, then an arbitrary ID.
1637 # $position_matrix looks like this:
1638 # [ [ $feature, closest measure of distance, size, dbID ] ]
1639 # Not proud of this line. It re-emits the original array in sorted order to favour better measures of order.
1640 my @ordered_matrix =
map { [$_->[0],$_->[1],$_->[2],$_->[3],$_->[4]] }
1641 sort { abs($a->[1]) <=> abs($b->[1]) || abs($a->[2]) <=> abs($b->[2]) || $a->[3] <=> $b->[3] || $a->[4] <=> $b->[4]} @$position_matrix;
1642 # Knock off unwanted hits. Equal distances are included, so more can be returned than asked for.
1643 @ordered_matrix = $self->_discard_excess_features_from_matrix(\@ordered_matrix,$limit);
1644 return \@ordered_matrix;
1647 =head2 _compute_nearest_end
1649 Arg [1] : Reference feature start
1650 Arg [2] : Reference feature mid-point
1651 Arg [3] : Reference feature end
1652 Arg [4] : Considered feature start
1653 Arg [5] : Considered feature mid-point
1654 Arg [6] : Considered feature end
1655 Example : $distance = $feature_adaptor->_compute_nearest_end($ref_start,$ref_midpoint,$ref_end,$f_start,$f_midpoint,$f_end)
1656 Description: For a given feature, calculate the smallest legitimate distance to a reference feature
1657 Calculate by mid-points to accommodate overlaps
1658 Returntype : Integer distance in base pairs
1659 Caller : BaseFeatureAdaptor->select_nearest()
1662 sub _compute_nearest_end {
1663 my ($self,$ref_start,$ref_midpoint,$ref_end,$f_start,$f_midpoint,$f_end) = @_;
1664 my ($effective_distance,$weighted_distance);
1665 if ($ref_start > $f_end ) {
1666 # upstream, no overlap
1667 $effective_distance = $f_end - $ref_start; # negative result
for upstream
1668 } elsif ( $ref_end < $f_start ) {
1670 $effective_distance = $f_start - $ref_end; # positive result
for downstream
1673 $effective_distance = 0;
1674 $weighted_distance = $f_midpoint - $ref_midpoint;
1676 return $effective_distance, $weighted_distance;
1679 =head2 _compute_prime_distance
1681 Arg [1] : Reference feature start
1682 Arg [2] : Reference feature mid-point
1683 Arg [3] : Reference feature end
1684 Arg [4] : Considered feature start
1685 Arg [5] : Considered feature mid-point
1686 Arg [6] : Considered feature end
1687 Arg [7] : Considered feature strand
1688 Example : $distance,$weighted_centre_distance = $feature_adaptor->_compute_prime_distance($ref_start,$ref_midpoint,$ref_end,$f_start,$f_midpoint,$f_end,$f_strand)
1689 Description: Calculate the smallest distance to the 5
' end of the considered feature
1690 Returntype : Integer distance in base pairs or a string warning that the result doesn't mean anything.
1691 Nearest 5
' and 3' features shouldn
't reside inside the reference Feature
1692 Caller : BaseFeatureAdaptor->select_nearest()
1695 sub _compute_prime_distance {
1696 my ($self,$five_prime,$ref_start,$ref_midpoint,$ref_end,$f_start,$f_midpoint,$f_end,$f_strand) = @_;
1699 my $effective_distance;
1700 my $weighted_distance;
1703 $f_coord = ($f_strand == 1) ? $f_start : $f_end;
1704 } else { # three prime end required
1705 $f_coord = ($f_strand ==1 ) ? $f_end : $f_start;
1708 if ($ref_start > $f_coord) {
1709 # n' end of feature upstream of reference
1710 $effective_distance = $f_coord - $ref_start;
1711 if ($f_end < $ref_start) {
1712 $weighted_distance = $f_coord - $ref_midpoint;
1714 } elsif ($ref_end < $f_coord && $f_start > $ref_end) {
1715 # n' end of feature downstream of reference
1716 $effective_distance = $ref_end - $f_coord;
1717 if ($f_start > $ref_end) {
1718 $weighted_distance = $f_coord - $ref_midpoint;
1720 } elsif ($ref_start < $f_coord && $ref_end > $f_coord) {
1721 # total overlap condition
1722 $effective_distance = 0;
1723 $weighted_distance = $f_coord - $ref_midpoint;
1725 # If feature's end falls outside of the reference feature, compute distance to reference midpoint.
1726 return $effective_distance, $weighted_distance;
1729 =head2 _compute_midpoint
1732 Example : $middle = $feature_adaptor->_compute_midpoint($feature);
1733 Description: Calculate the mid-point of a Feature. Used
for comparing Features that overlap each other
1734 and determining a canonical distance between two Features
for the majority of use cases.
1735 Returntype : Integer coordinate rounded down.
1736 Caller : BaseFeatureAdaptor->select_nearest()
1739 sub _compute_midpoint {
1741 my $feature = shift;
1743 my $end = $feature->seq_region_end;
1745 # consider circular slice
1746 if ($start > $end) {
1747 $midpoint = int($end + ($start - $end) / 2);
1749 $midpoint = int($start + ($end - $start) / 2);
1754 sub _discard_excess_features_from_matrix {
1757 my @ordered_matrix = @$list;
1759 return @ordered_matrix
if $#ordered_matrix == 0 || !defined($limit) || $limit > scalar @ordered_matrix;
1760 # cut off excess elements
1761 my @spares = splice @ordered_matrix, $limit, scalar @ordered_matrix - $limit;
1762 # Check nearest distance of the last member of ordered_matrix against spares and include those that match
1763 # This prevents first past the post.
1764 if (scalar @ordered_matrix > 0) {
1765 my $threshold_distance = $ordered_matrix[-1]->[1];
1767 while ($i <= $#spares && $spares[$i]->[1] == $threshold_distance) {
1768 # printf "Considering option %s, %s\n",$spares[$i]->[0]->stable_id,$spares[$i]->[1];
1769 push @ordered_matrix, $spares[$i];
1774 return @ordered_matrix;