3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
34 the creation of Slice objects.
42 -host =>
'ensembldb.ensembl.org',
49 # get a slice on the entire chromosome X
50 $chr_slice = $slice_adaptor->fetch_by_region(
'chromosome',
'X' );
52 # get a slice for each clone in the database
53 foreach $cln_slice ( @{ $slice_adaptor->fetch_all(
'clone') } ) {
54 # do something with clone
57 # get a slice which is part of NT_004321
59 $slice_adaptor->fetch_by_region(
'supercontig',
'NT_004321',
62 # get all non-redundant slices from the highest possible coordinate
64 $slices = $slice_adaptor->fetch_all(
'toplevel');
66 # include non-reference regions
67 $slices = $slice_adaptor->fetch_all(
'toplevel', undef, 1 );
69 # include duplicate regions
70 $slices = $slice_adaptor->fetch_all(
'toplevel', undef, 0, 1 );
72 # split up a list of slices into smaller slices
75 $slices = split_Slices( $slices, $max_length, $overlap );
77 # store a list of slice names in a file
78 open( FILE,
">$filename" ) or die("Could not open file $filename");
79 foreach my $slice (@$slices) {
80 print FILE $slice->name(),
"\n";
84 # retrieve a list of slices from a file
85 open( FILE, $filename ) or die(
"Could not open file $filename");
86 while ( $name = <FILE> ) {
88 $slice = $slice_adaptor->fetch_by_name($name);
89 # do something with slice
94 This module is responsible
for fetching Slices representing genomic
95 regions from a database. A Details on how slices can be used are in the
103 package Bio::EnsEMBL::DBSQL::SliceAdaptor;
115 use Scalar::Util qw/looks_like_number/;
118 @ISA = (
'Bio::EnsEMBL::DBSQL::BaseAdaptor');
123 my $class = ref($caller) || $caller;
125 my $self = $class->SUPER::new(@_);
127 # use a cache which is shared and also used by the assembly
130 my $seq_region_cache = $self->db->get_SeqRegionCache();
132 $self->{
'sr_name_cache'} = $seq_region_cache->{
'name_cache'};
133 $self->{
'sr_id_cache'} = $seq_region_cache->{
'id_cache'};
135 $self->{
'lrg_region_test'} = undef;
136 my $meta_container = $self->db->get_MetaContainer();
137 my @values = $meta_container->list_value_by_key(
"LRG");
138 if(scalar(@values) and $values[0]->[0]){
139 $self->{
'lrg_region_test'} = $values[0]->[0];
145 =head2 fetch_by_region
147 Arg [1] :
string $coord_system_name (optional)
148 The name of the coordinate system of the slice to be created
149 This may be a name of an actual coordinate system or an alias
150 to a coordinate system. Valid aliases are
'seqlevel' or
152 Arg [2] :
string $seq_region_name
153 The name of the sequence region that the slice will be
155 Arg [3] :
int $start (optional,
default = 1)
156 The start of the slice on the sequence region
157 Arg [4] :
int $end (optional, default = seq_region length)
158 The end of the slice on the sequence region
159 Arg [5] :
int $strand (optional, default = 1)
160 The orientation of the slice on the sequence region
161 Arg [6] :
string $version (optional, default = default version)
162 The version of the coordinate system to use (e.g. NCBI33)
163 Arg [7] :
boolean $no_fuzz (optional, default = undef (false))
164 If true (non-zero), do not use "fuzzy matching" (see below).
165 Example : $slice = $slice_adaptor->fetch_by_region('chromosome', 'X');
166 $slice = $slice_adaptor->fetch_by_region('clone', 'AC008066.4');
167 Description: Retrieves a slice on the requested region. At a minimum the
168 name the name of the seq_region to fetch must be provided.
170 If no coordinate system name is provided than a slice on the
171 highest ranked coordinate system with a matching
172 seq_region_name will be returned. If a version but no
173 coordinate system name is provided, the same behaviour will
174 apply, but only coordinate systems of the appropriate version
175 are considered. The same applies if the 'toplevel' coordinate
176 system is specified, however in this case the version is
177 ignored. The coordinate system should always be specified if
178 it is known, since this is unambiguous and faster.
180 Some fuzzy matching is performed if no exact match for
181 the provided name is found. This allows clones to be
182 fetched even when their version is not known. For
183 example fetch_by_region('clone', 'AC008066') will
184 retrieve the sequence_region with name 'AC008066.4'.
186 The fuzzy matching can be turned off by setting the
187 $no_fuzz argument to a true value.
189 If the requested seq_region is not found in the database undef
193 Exceptions : throw if no seq_region_name is provided
194 throw if invalid coord_system_name is provided
195 throw if start > end is provided
203 # ARNE: This subroutine needs simplification!!
205 sub fetch_by_region {
206 my ( $self, $coord_system_name, $seq_region_name, $start, $end,
207 $strand, $version, $no_fuzz )
210 assert_integer($start,
'start') if defined $start;
211 assert_integer($end, 'end') if defined $end;
213 if ( !defined($start) ) { $start = 1 }
214 if ( !defined($strand) ) { $strand = 1 }
216 if ( !defined($seq_region_name) ) {
217 throw(
'seq_region_name argument is required');
221 my $csa = $self->db->get_CoordSystemAdaptor();
223 if ( defined($coord_system_name) ) {
224 $cs = $csa->fetch_by_name( $coord_system_name, $version );
226 if ( !defined($cs) ) {
227 # deal with cases where undefined coordsystem name requested is 'chromosome'
228 if ( $coord_system_name eq
"chromosome" ) {
230 # set the 'chromosome' alias, if not yet set
231 $cs = $self->_create_chromosome_alias();
234 throw( sprintf(
"Unknown coordinate system:\n"
235 .
"name='%s' version='%s' seq region name='%s'\n",
236 $coord_system_name, $version, $seq_region_name ) );
240 # fetching by toplevel is same as fetching w/o name or version
241 if ( $cs->is_top_level() ) {
246 } ## end
if ( defined($coord_system_name...))
253 if ( defined($cs) ) {
255 # if chromosome alias is defined, use karyotype_cache to access seq region data
256 # rather than a database query
257 if ( defined($cs->alias_to()) && $cs->alias_to() eq
"chromosome" ) {
259 $key =
"karyotype_cache";
261 }
else { # otherwise, search database to access seq region data, etc.
263 $sql = sprintf(
"SELECT sr.name, sr.seq_region_id, sr.length, %d "
264 .
"FROM seq_region sr ",
267 $constraint =
"AND sr.coord_system_id = ?";
268 push( @bind_params, [ $cs->dbID(), SQL_INTEGER ] );
270 $key =
"$seq_region_name:" . $cs->dbID();
275 "SELECT sr.name, sr.seq_region_id, sr.length, cs.coord_system_id "
276 .
"FROM seq_region sr, coord_system cs ";
278 $constraint =
"AND sr.coord_system_id = cs.coord_system_id "
279 .
"AND cs.species_id = ? ";
280 push( @bind_params, [ $self->species_id(), SQL_INTEGER ] );
282 if ( defined($version) ) {
283 $constraint .=
"AND cs.version = ? ";
284 push( @bind_params, [ $version, SQL_VARCHAR ] );
287 $constraint .=
"ORDER BY cs.rank ASC";
290 # check the cache so we only go to the db if necessary
294 # use $key to access karyotype cache and define $arr
295 if ( defined($key) && ($key eq
"karyotype_cache") ) {
297 my $coord_system_id = $cs->dbID();
299 # retrieve values from karyotype cache
300 my $seq_region_id = $cs->{$key}{$seq_region_name}{$coord_system_id}{
'seq_region_id'};
301 $length = $cs->{$key}{$seq_region_name}{$coord_system_id}{
'length'};
303 # populate $arr with values from karyotype cache
304 $arr = [ $seq_region_name, $seq_region_id, $length, $coord_system_id ];
306 # define $end, if not yet defined
307 if ( !defined($end) ) { $end = $length }
309 # add in check that $arr is populated
310 throw(
"Unable to popular $arr with values from karyotype cache") unless $arr;
314 if ( defined($key) ) { $arr = $self->{
'sr_name_cache'}->{$key} }
316 if ( defined($arr) ) {
320 $self->prepare( $sql .
"WHERE sr.name = ? " . $constraint );
322 unshift( @bind_params, [ $seq_region_name, SQL_VARCHAR ] );
325 foreach my $param (@bind_params) {
326 $sth->bind_param( ++$pos, $param->[0], $param->[1] );
330 my @row = $sth->fetchrow_array();
335 # deal with cases where a coordsystem might not be defined by user
338 $slice = $self->_fetch_by_seq_region_synonym( undef, $seq_region_name, $start, $end, $strand, $version, $no_fuzz );
340 $slice = $self->_fetch_by_seq_region_synonym( $cs, $seq_region_name, $start, $end, $strand, $version, $no_fuzz );
343 # check whether any slice data has been returned
344 if ( $slice && $slice->seq_region_name ) {
345 my $matched_name = $slice->seq_region_name;
347 # if matched name is different to query name, skip fuzzy matching
348 if ( $matched_name ne $seq_region_name ) {
349 $seq_region_name = $matched_name;
352 my $tmp_key_string =
"$seq_region_name:" . $slice->coord_system()->dbID();
353 $arr = $self->{
'sr_name_cache'}->{$tmp_key_string};
355 $cs = $slice->coord_system()
if (!$cs);
358 }
else { #
if no slice
object with a match returned,
try using a fuzzy match
361 my ($fuzzy_matched_name, $cs_new) = $self->_fetch_by_fuzzy_matching( $cs, $seq_region_name, $sql, $constraint, \@bind_params );
364 if (!$fuzzy_matched_name) {
369 my $tmp_key_string = $fuzzy_matched_name .
":" . $cs_new->dbID();
370 if (exists $self->{
'sr_name_cache'}->{$tmp_key_string}) {
371 $seq_region_name = $fuzzy_matched_name;
372 $arr = $self->{
'sr_name_cache'}->{$tmp_key_string};
384 ( $seq_region_name, $id, $length, $cs_id ) = @row;
386 # cache to speed up for future queries
387 my $arr = [ $id, $seq_region_name, $cs_id, $length ];
388 $self->{
'sr_name_cache'}->{
"$seq_region_name:$cs_id"} = $arr;
389 $self->{
'sr_id_cache'}->{
"$id"} = $arr;
390 $cs = $csa->fetch_by_dbID($cs_id);
392 } ## end
else [
if ( defined($arr) ) ]
395 if ( !defined($end) ) { $end = $length }
397 #If this was given then check if we've got a circular seq region otherwise
398 #let it fall through to the normal Slice method
399 if ( $end + 1 < $start ) {
400 my $cs_id = $cs->dbID();
401 my $seq_region_id = $self->{
'sr_name_cache'}->{
"$seq_region_name:$cs_id"}->[0];
402 if($self->is_circular($seq_region_id)) {
405 -COORD_SYSTEM => $cs,
406 -SEQ_REGION_NAME => $seq_region_name,
407 -SEQ_REGION_LENGTH => $length,
417 if ( defined( $self->{
'lrg_region_test'} )
418 and substr( $cs->name, 0, 3 ) eq $self->{
'lrg_region_test'} )
422 -SEQ_REGION_NAME => $seq_region_name,
423 -SEQ_REGION_LENGTH => $length,
431 'coord_system' => $cs,
432 'seq_region_name' => $seq_region_name,
433 'seq_region_length' => $length,
437 'adaptor' => $self } );
439 } ## end sub fetch_by_region
441 =head2 fetch_by_toplevel_location
443 Arg [1] :
string $location
444 Ensembl formatted location. Can be a format like
445 C<name:start-end>, C<name:start..end>, C<name:start:end>,
446 C<name:start>, C<name>. We can also support strand
447 specification as a +/- or 1/-1.
449 Location names must be separated by a C<:>. All others can be
450 separated by C<..>, C<:> or C<->.
451 Arg[2] :
boolean $no_warnings
452 Suppress warnings from
this method
453 Arg[3] :
boolean $no_fuzz
454 Stop fuzzy matching of sequence regions from occuring
455 Arg[4] :
boolean $ucsc
456 If we are unsuccessful at retriving a location retry taking any
457 possible chr prefix into account e.g. chrX and X are treated as
459 Example : my $slice = $sa->fetch_by_toplevel_location(
'X:1-10000')
460 my $slice = $sa->fetch_by_toplevel_location(
'X:1-10000:-1')
461 Description : Converts an Ensembl location/region into the sequence region
462 name, start and end and passes them onto C<fetch_by_region()>.
463 The code assumes that the required slice is on the top level
464 coordinate system. The code assumes that location formatting
465 is not perfect and will perform basic
cleanup before parsing.
467 Exceptions : If $location is
false otherwise see C<fetch_by_location()>
468 or C<fetch_by_region()>
474 sub fetch_by_toplevel_location {
475 my ($self, $location, $no_warnings, $no_fuzz, $ucsc) = @_;
476 return $self->fetch_by_location($location,
'toplevel', undef, $no_warnings, $no_fuzz, $ucsc);
479 =head2 fetch_by_location
481 Arg [1] :
string $location
482 Ensembl formatted location. Can be a format like
483 C<name:start-end>, C<name:start..end>, C<name:start:end>,
484 C<name:start>, C<name>. We can also support strand
485 specification as a +/- or 1/-1.
487 Location names must be separated by a C<:>. All others can be
488 separated by C<..>, C<:>, C<_> or C<->.
489 Arg[2] : String $coord_system_name
490 The coordinate system to retrieve
491 Arg[3] : String $coord_system_version
492 Optional parameter. Version of the coordinate system to fetch
493 Arg[4] :
boolean $no_warnings
494 Suppress warnings from
this method
495 Arg[5] :
boolean $no_fuzz
496 Stop fuzzy matching of sequence regions from occuring
497 Arg[6] :
boolean $ucsc
498 If we are unsuccessful at retriving a location retry taking any
499 possible chr prefix into account e.g. chrX and X are treated as
501 Example : my $slice = $sa->fetch_by_location(
'X:1-10000',
'chromosome')
502 my $slice = $sa->fetch_by_location(
'X:1-10000:-1',
'toplevel')
503 Description : Converts an Ensembl location/region into the sequence region
504 name, start and end and passes them onto C<fetch_by_region()>.
505 The code assumes that location formatting is not perfect and
506 will perform basic
cleanup before parsing.
508 Exceptions : If $location or coordinate system is
false otherwise
509 see C<fetch_by_region()>
515 sub fetch_by_location {
516 my ($self, $location, $coord_system_name, $coord_system_version, $no_warnings, $no_fuzz, $ucsc) = @_;
517 throw "No coordinate system name specified" unless $coord_system_name;
519 my ($seq_region_name, $start, $end, $strand) = $self->parse_location_to_values($location, $no_warnings);
521 if(! $seq_region_name) {
525 if(defined $start && defined $end && $start > $end) {
526 throw "Cannot request a slice whose start is greater than its end. Start: $start. End: $end";
529 $seq_region_name =~ s/^chr
530 my $slice = $self->fetch_by_region($coord_system_name, $seq_region_name, $start, $end, $strand, $coord_system_version, $no_fuzz);
531 if(! defined $slice) {
533 my $ucsc_seq_region_name = $seq_region_name;
534 $ucsc_seq_region_name =~ s/^chr
535 if($ucsc_seq_region_name ne $seq_region_name) {
536 $slice = $self->fetch_by_region($coord_system_name, $ucsc_seq_region_name, $start, $end, $strand, $coord_system_version, $no_fuzz);
537 return if ! defined $slice; #
if we had no slice still then bail
540 return; #If it was not different then we didn
't have the prefix so just return (same bail as before)
544 return; #We didn't have a slice and no UCSC specifics are being triggered
549 my $name = $slice->seq_region_name();
550 if(defined $start && $start > $srl) {
551 throw "Cannot request a slice whose start ($start) is greater than $srl for $name.";
553 if(defined $end && $end > $srl) {
554 warning
"Requested end ($end) is greater than $srl for $name. Resetting to $srl" if ! $no_warnings;
555 $slice->{end} = $srl;
561 =head2 parse_location_to_values
563 Arg [1] :
string $location
564 Ensembl formatted location. Can be a format like
565 C<name:start-end>, C<name:start..end>, C<name:start:end>,
566 C<name:start>, C<name>. We can also support strand
567 specification as a +/- or 1/-1.
569 Location names must be separated by a C<:>. All others can be
570 separated by C<..>, C<:> C<_>, or C<->.
572 If the start is negative, start will be reset to 1 (e.g.: 1: -10-1,000
')
573 If both start and end are negative, returns undef (e.g.: 1: -10--1,000')
574 Arg[2] :
boolean $no_warnings
575 Suppress warnings from
this method
576 Arg[3] :
boolean $no_errors
577 Supress errors being thrown from
this method
578 Example : my ($name, $start, $end, $strand) = $sa->parse_location_to_values(
'X:1..100:1);
579 Description : Takes in an Ensembl location String and returns the parsed
581 Returntype : List. Contains name, start, end and strand
586 sub parse_location_to_values {
587 my ($self, $location, $no_warnings, $no_errors) = @_;
589 throw 'You must specify a location
' if ! $location;
591 #cleanup any nomenclature like 1 000 or 1,000
592 my $number_seps_regex = qr/\s+|,/;
593 my $separator_regex = qr/(?:-|[.]{2}|\:|_)?/; # support -, .., : and _ as separators
594 my $hgvs_nomenclature_regex = qr/(?:g\.)?/; # check for HGVS looking locations e.g. X:g.1-100
595 my $number_regex = qr/[0-9, EMKG]+/xmsi;
596 my $number_regex_signed = qr/-?[0-9, EMKG]+/xmsi; # to capture negative locations as sometimes we end up in negative location if the location is padded
597 my $strand_regex = qr/[+-1]|-1/xms;
599 my $regex = qr/^((?:\w|\.|_|-)+) \s* :? \s* $hgvs_nomenclature_regex ($number_regex_signed)? $separator_regex ($number_regex)? $separator_regex ($strand_regex)? $/xms;
600 my ($seq_region_name, $start, $end, $strand);
601 if(($seq_region_name, $start, $end, $strand) = $location =~ $regex) {
603 if(defined $strand) {
604 if(!looks_like_number($strand)) {
605 $strand = ($strand eq '+
') ? 1 : -1;
610 $start =~ s/$number_seps_regex//g;
612 warning "Start was less than 1 (${start}) which is not allowed. Resetting to 1" if ! $no_warnings;
614 unless(defined $end) {
615 # We will reach here only when the location is given without start and '-
' is used as seperator eg: 1:-10 (expected to return 1:1-10)
622 $end =~ s/$number_seps_regex//g;
624 throw "Cannot request negative or 0 end indexes through this interface. Given $end but expected something greater than 0" unless $no_errors;
630 return ($seq_region_name, $start, $end, $strand);
633 =head2 fetch_by_region_unique
635 Arg [1] : string $coord_system_name (optional)
636 The name of the coordinate system of the slice to be created
637 This may be a name of an actual coordinate system or an alias
638 to a coordinate system. Valid aliases are 'seqlevel
' or
640 Arg [2] : string $seq_region_name
641 The name of the sequence region that the slice will be
643 Arg [3] : int $start (optional, default = 1)
644 The start of the slice on the sequence region
645 Arg [4] : int $end (optional, default = seq_region length)
646 The end of the slice on the sequence region
647 Arg [5] : int $strand (optional, default = 1)
648 The orientation of the slice on the sequence region
649 Arg [6] : string $version (optional, default = default version)
650 The version of the coordinate system to use (e.g. NCBI33)
651 Arg [7] : boolean $no_fuzz (optional, default = undef (false))
652 If true (non-zero), do not use "fuzzy matching" (see below).
653 Example : $slice = $slice_adaptor->fetch_by_region_unique('chromosome
', 'HSCHR6_MHC_COX
');
654 Description: Retrieves a slice on the requested region but returns only the unique
655 parts of the slice. At a minimum the
656 name the name of the seq_region to fetch must be provided.
658 If no coordinate system name is provided than a slice on the
659 highest ranked coordinate system with a matching
660 seq_region_name will be returned. If a version but no
661 coordinate system name is provided, the same behaviour will
662 apply, but only coordinate systems of the appropriate version
663 are considered. The same applies if the 'toplevel
' coordinate
664 system is specified, however in this case the version is
665 ignored. The coordinate system should always be specified if
666 it is known, since this is unambiguous and faster.
668 Some fuzzy matching is performed if no exact match for
669 the provided name is found. This allows clones to be
670 fetched even when their version is not known. For
671 example fetch_by_region('clone
', 'AC008066
') will
672 retrieve the sequence_region with name 'AC008066.4
'.
674 The fuzzy matching can be turned off by setting the
675 $no_fuzz argument to a true value.
677 If the requested seq_region is not found in the database undef
680 Returntype : listref Bio::EnsEMBL::Slice
681 Exceptions : throw if no seq_region_name is provided
682 throw if invalid coord_system_name is provided
683 throw if start > end is provided
689 sub fetch_by_region_unique {
693 my $slice = $self->fetch_by_region(@_);
696 if ( !exists( $self->{'asm_exc_cache
'} ) ) {
697 $self->_build_exception_cache();
701 $self->{'asm_exc_cache
'}->{ $self->get_seq_region_id($slice) }
704 # Dereference symlinked assembly regions. Take out any regions
705 # which are symlinked because these are duplicates.
707 @{ $self->fetch_normalized_slice_projection($slice) };
709 foreach my $segment (@projection) {
710 if ( $segment->[2]->seq_region_name() eq $slice->seq_region_name()
711 && $segment->[2]->coord_system->equals( $slice->coord_system ) )
713 push( @out, $segment->[2] );
721 } ## end sub fetch_by_region_unique
725 Arg [1] : string $name
726 Example : $name = 'chromosome:NCBI34:X:1000000:2000000:1
';
727 $slice = $slice_adaptor->fetch_by_name($name);
728 $slice2 = $slice_adaptor->fetch_by_name($slice3->name());
729 Description: Fetches a slice using a slice name (i.e. the value returned by
730 the Slice::name method). This is useful if you wish to
731 store a unique identifier for a slice in a file or database or
732 pass a slice over a network.
733 Slice::name allows you to serialise/marshall a slice and this
734 method allows you to deserialise/unmarshal it.
736 Returns undef if no seq_region with the provided name exists in
739 Returntype : Bio::EnsEMBL::Slice or undef
740 Exceptions : throw if incorrent arg provided
751 throw("name argument is required");
754 my @array = split(/:/,$name);
756 if(scalar(@array) < 3 || scalar(@array) > 6) {
757 throw("Malformed slice name [$name]. Format is " .
758 "coord_system:version:name:start:end:strand");
761 # Rearrange arguments to suit fetch_by_region
765 $targetarray[0]=$array[0];
766 $targetarray[5]=(($array[1]&&$array[1] ne "")?$array[1]:undef);
767 $targetarray[1]=(($array[2]&&$array[2] ne "")?$array[2]:undef);
768 $targetarray[2]=(($array[3]&&$array[3] ne "")?$array[3]:undef);
769 $targetarray[3]=(($array[4]&&$array[4] ne "")?$array[4]:undef);
770 $targetarray[4]=(($array[5]&&$array[5] ne "")?$array[5]:undef);
771 return $self->fetch_by_region(@targetarray);
776 =head2 fetch_by_seq_region_id
778 Arg [1] : string $seq_region_id
779 The internal identifier of the seq_region to create this slice
781 Arg [2] : optional start
782 Arg [3] : optional end
783 Arg [4] : optional strand
784 Example : $slice = $slice_adaptor->fetch_by_seq_region_id(34413);
785 Description: Creates a slice object of an entire seq_region using the
786 seq_region internal identifier to resolve the seq_region.
787 Returns undef if no such slice exists.
788 Returntype : Bio::EnsEMBL::Slice or undef
795 sub fetch_by_seq_region_id {
796 my ( $self, $seq_region_id, $start, $end, $strand, $check_prior_ids ) = @_;
798 my $csa = $self->db->get_CoordSystemAdaptor();
799 my $arr = $self->{'sr_id_cache
'}->{$seq_region_id};
800 my ( $name, $length, $cs, $cs_id );
803 if ( $arr && defined( $arr->[2] ) ) {
804 ( $name, $cs_id, $length ) = ( $arr->[1], $arr->[2], $arr->[3] );
805 $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id);
808 $self->prepare( "SELECT sr.name, sr.coord_system_id, sr.length "
809 . "FROM seq_region sr "
810 . "WHERE sr.seq_region_id = ? " );
812 $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
815 my @row = $sth->fetchrow_array();
817 # This could have been an old seq region id so see if we can
818 # translate it into a more recent version.
819 if($check_prior_ids) {
820 if(exists $csa->{_external_seq_region_mapping}->{$seq_region_id}) {
821 my $new_seq_region_id = $csa->{_external_seq_region_mapping}->{$seq_region_id};
822 # No need to pass check prior ids flag because it's a 1 step relationship
823 return $self->fetch_by_seq_region_id($new_seq_region_id, $start, $end, $strand);
829 ( $name, $cs_id, $length ) = @row;
832 $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id);
834 #cache results to speed up repeated queries
835 my $arr = [ $seq_region_id, $name, $cs_id, $length ];
837 $self->{
'sr_name_cache'}->{
"$name:$cs_id"} = $arr;
838 $self->{
'sr_id_cache'}->{
"$seq_region_id"} = $arr;
843 'coord_system' => $cs,
844 'seq_region_name' => $name,
845 'seq_region_length'=> $length,
846 'start' => $start || 1,
847 'end' => $end || $length,
848 'strand' => $strand || 1,
849 'adaptor' => $self} );
850 } ## end sub fetch_by_seq_region_id
857 The slice to fetch a seq_region_id
for
859 Description: Retrieves the seq_region id (in
this database) given a slice
860 Seq region ids are not stored on the slices themselves
861 because they are intended to be somewhat database independant
862 and seq_region_ids vary accross databases.
864 Exceptions :
throw if the seq_region of the slice is not in the db
865 throw if incorrect arg provided
866 Caller : BaseFeatureAdaptor
875 if(!$slice || !ref($slice) || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
876 throw(
'Slice argument is required');
879 my $seq_region_name = $slice->seq_region_name();
880 my $key = $seq_region_name.
":".$slice->coord_system->dbID();
881 my $arr = $self->{
'sr_name_cache'}->{
"$key"};
887 my $cs_id = $slice->coord_system->dbID();
889 my $sth = $self->prepare(
"SELECT seq_region_id, length " .
891 "WHERE name = ? AND coord_system_id = ?");
893 #force seq_region_name cast to string so mysql cannot treat as int
894 $sth->bind_param(1,
"$seq_region_name",SQL_VARCHAR);
895 $sth->bind_param(2,$cs_id,SQL_INTEGER);
898 my @row = $sth->fetchrow_array();
900 throw(
"No-existent seq_region [$seq_region_name] in coord system [$cs_id]");
902 my @more = $sth->fetchrow_array();
904 throw(
"Ambiguous seq_region [$seq_region_name] in coord system [$cs_id]");
907 my($seq_region_id, $length) = @row;
910 #cache information for future requests
911 $arr = [ $seq_region_id, $seq_region_name, $cs_id, $length ];
913 $self->{
'sr_name_cache'}->{
"$seq_region_name:$cs_id"} = $arr;
914 $self->{
'sr_id_cache'}->{
"$seq_region_id"} = $arr;
916 return $seq_region_id;
923 Arg [1] :
string $coord_system_name
924 The name of the coordinate system to retrieve slices of.
925 This may be a name of an acutal coordinate system or an alias
926 to a coordinate system. Valid aliases are
'seqlevel' or
928 Arg [2] :
string $coord_system_version (optional)
929 The version of the coordinate system to retrieve slices of
930 Arg [3] :
bool $include_non_reference (optional)
931 If
this argument is not provided then only reference slices
932 will be returned. If set, both reference and non reference
933 slices will be returned.
934 Arg [4] :
int $include_duplicates (optional)
935 If set duplicate regions will be returned.
937 NOTE:
if you
do not use
this option and you have a PAR
938 (pseudo-autosomal region) at the beginning of your seq_region
939 then your slice will not start at position 1, so coordinates
940 retrieved from
this slice might not be what you expected.
942 Arg[5] :
bool $include_lrg (optional) (
default 0)
943 If set lrg regions will be returned aswell.
946 Example : @chromos = @{$slice_adaptor->fetch_all(
'chromosome',
'NCBI33')};
947 @contigs = @{$slice_adaptor->fetch_all(
'contig')};
949 # get even non-reference regions
950 @slices = @{$slice_adaptor->fetch_all(
'toplevel',undef,1)};
952 # include duplicate regions (such as pseudo autosomal regions)
953 @slices = @{$slice_adaptor->fetch_all(
'toplevel', undef,0,1)};
955 Description: Retrieves slices of all seq_regions
for a given coordinate
956 system. This is analagous to the methods fetch_all which were
957 formerly on the ChromosomeAdaptor, RawContigAdaptor and
958 CloneAdaptor classes. Slices fetched span the entire
959 seq_regions and are on the forward strand.
960 If the coordinate system with the provided name and version
961 does not exist an empty list is returned.
962 If the coordinate system name provided is
'toplevel', all
963 non-redundant toplevel slices are returned (note that any
964 coord_system_version argument is ignored in that
case).
966 Retrieved slices can be broken into smaller slices
using the
969 Returntype : listref of Bio::EnsEMBL::Slices
979 my $cs_version = shift ||
'';
981 my ($include_non_reference, $include_duplicates, $include_lrg) = @_;
984 # verify existance of requested coord system and get its id
986 my $csa = $self->db->get_CoordSystemAdaptor();
987 my $orig_cs = $csa->fetch_by_name($cs_name, $cs_version);
989 return []
if ( !$orig_cs );
995 # Get a hash of non reference seq regions
997 if ( !$include_non_reference ) {
999 $self->prepare(
'SELECT sr.seq_region_id '
1000 .
'FROM seq_region sr, seq_region_attrib sra, '
1001 .
'attrib_type at, coord_system cs '
1002 .
'WHERE at.code = "non_ref" '
1003 .
'AND sra.seq_region_id = sr.seq_region_id '
1004 .
'AND at.attrib_type_id = sra.attrib_type_id '
1005 .
'AND sr.coord_system_id = cs.coord_system_id '
1006 .
'AND cs.species_id = ?' );
1008 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1011 my ($seq_region_id);
1012 $sth->bind_columns( \$seq_region_id );
1014 while ( $sth->fetch() ) {
1015 $bad_vals{$seq_region_id} = 1;
1020 # if we do not want lrg's then add them to the bad list;
1022 if ( !$include_lrg ) {
1024 $self->prepare(
'SELECT sr.seq_region_id '
1025 .
'FROM seq_region sr, seq_region_attrib sra, '
1026 .
'attrib_type at, coord_system cs '
1027 .
'WHERE at.code = "LRG" '
1028 .
'AND sra.seq_region_id = sr.seq_region_id '
1029 .
'AND at.attrib_type_id = sra.attrib_type_id '
1030 .
'AND sr.coord_system_id = cs.coord_system_id '
1031 .
'AND cs.species_id = ?' );
1033 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1036 my ($seq_region_id);
1037 $sth->bind_columns( \$seq_region_id );
1039 while ( $sth->fetch() ) {
1040 $bad_vals{$seq_region_id} = 1;
1045 # Retrieve the seq_regions from the database
1049 if ( $orig_cs->is_top_level() ) {
1051 $self->prepare(
'SELECT sr.seq_region_id, sr.name, '
1052 .
'sr.length, sr.coord_system_id '
1053 .
'FROM seq_region sr, seq_region_attrib sra, '
1054 .
'attrib_type at, coord_system cs '
1055 .
'WHERE at.code = "toplevel" '
1056 .
'AND at.attrib_type_id = sra.attrib_type_id '
1057 .
'AND sra.seq_region_id = sr.seq_region_id '
1058 .
'AND sr.coord_system_id = cs.coord_system_id '
1059 .
'AND cs.species_id = ?' );
1061 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1065 $self->prepare(
'SELECT sr.seq_region_id, sr.name, '
1066 .
'sr.length, sr.coord_system_id '
1067 .
'FROM seq_region sr '
1068 .
'WHERE sr.coord_system_id = ?' );
1070 $sth->bind_param( 1, $orig_cs->dbID, SQL_INTEGER );
1074 my ( $seq_region_id, $name, $length, $cs_id );
1075 $sth->bind_columns( \( $seq_region_id, $name, $length, $cs_id ) );
1077 my $cache_count = 0;
1080 while($sth->fetch()) {
1081 if(!defined($bad_vals{$seq_region_id})){
1082 my $cs = $csa->fetch_by_dbID($cs_id);
1085 throw(
"seq_region $name references non-existent coord_system $cs_id.");
1088 #cache values for future reference, but stop adding to the cache once we
1089 #we know we have filled it up
1090 if($cache_count < $Bio::EnsEMBL::Utils::SeqRegionCache::SEQ_REGION_CACHE_SIZE) {
1091 my $arr = [ $seq_region_id, $name, $cs_id, $length ];
1093 $self->{
'sr_name_cache'}->{
"$name:$cs_id"} = $arr;
1094 $self->{
'sr_id_cache'}->{
"$seq_region_id"} = $arr;
1103 'seq_region_name' => $name,
1104 'seq_region_length'=> $length,
1105 'coord_system' => $cs,
1106 'adaptor' => $self});
1108 if(!defined($include_duplicates) or !$include_duplicates){
1109 # test if this slice *could* have a duplicate (exception) region
1110 $self->_build_exception_cache()
if(!exists $self->{
'asm_exc_cache'});
1111 if(exists $self->{asm_exc_cache}->{$seq_region_id}) {
1113 # Dereference symlinked assembly regions. Take out
1114 # any regions which are symlinked because these are duplicates
1115 my @projection = @{$self->fetch_normalized_slice_projection($slice)};
1116 foreach my $segment ( @projection) {
1118 $segment->[2]->coord_system->equals($slice->coord_system)) {
1119 push @out, $segment->[2];
1123 # no duplicate regions
1127 # we want duplicates anyway so do not do any checks
1136 =head2 fetch_all_by_genome_component
1138 Arg [1] :
string $genome_component_name
1139 The name of the genome component to retrieve slices of.
1140 Example : @slices = @{$slice_adaptor->fetch_all_by_genome_component(
'A')};
1141 Description: Returns the list of all top level slices
for a a given
1143 Returntype : listref of Bio::EnsEMBL::Slices
1144 Exceptions : If argument is not provided or is not a valid genome
1151 sub fetch_all_by_genome_component {
1153 my $genome_component = shift;
1154 defined $genome_component or
1155 throw "Undefined genome component";
1157 # check the provided genome component is valid
1158 my $gc = $self->db->get_adaptor(
'GenomeContainer');
1159 my $is_valid_component = grep { $_ eq $genome_component }
1160 @{$gc->get_genome_components};
1161 throw "Invalid genome component"
1162 unless $is_valid_component;
1165 # Retrieve the toplevel seq_regions from the database
1168 $self->prepare(
"SELECT sr.seq_region_id, sr.name, sr.length, sr.coord_system_id "
1169 .
"FROM seq_region sr "
1170 .
"JOIN seq_region_attrib sa1 USING (seq_region_id) "
1171 .
"JOIN attrib_type a1 ON sa1.attrib_type_id = a1.attrib_type_id "
1172 .
"JOIN seq_region_attrib sa2 USING (seq_region_id) "
1173 .
"JOIN attrib_type a2 ON sa2.attrib_type_id = a2.attrib_type_id "
1174 .
"WHERE sa2.value=? and a1.code='toplevel' and a2.code='genome_component'"
1177 if (looks_like_number($genome_component)) {
1178 $sth->bind_param( 1, $genome_component, SQL_INTEGER );
1180 $sth->bind_param( 1, $genome_component, SQL_VARCHAR );
1185 my ( $seq_region_id, $name, $length, $cs_id );
1186 $sth->bind_columns( \( $seq_region_id, $name, $length, $cs_id ) );
1188 my $cache_count = 0;
1191 my $csa = $self->db->get_CoordSystemAdaptor();
1193 while($sth->fetch()) {
1194 my $cs = $csa->fetch_by_dbID($cs_id);
1195 throw(
"seq_region $name references non-existent coord_system $cs_id.")
1198 # cache values for future reference, but stop adding to the cache once we
1199 # we know we have filled it up
1200 if($cache_count < $Bio::EnsEMBL::Utils::SeqRegionCache::SEQ_REGION_CACHE_SIZE) {
1201 my $arr = [ $seq_region_id, $name, $cs_id, $length ];
1203 $self->{
'sr_name_cache'}->{
"$name:$cs_id"} = $arr;
1204 $self->{
'sr_id_cache'}->{
"$seq_region_id"} = $arr;
1213 'seq_region_name' => $name,
1214 'seq_region_length'=> $length,
1215 'coord_system' => $cs,
1216 'adaptor' => $self});
1222 =head2 get_genome_component_for_slice
1226 Description: Returns the genome component of a slice
1227 Returntype : Scalar; the identifier of the genome component of the slice
1234 sub get_genome_component_for_slice {
1235 my ($self, $slice) = @_;
1237 throw "Undefined slice" unless defined $slice;
1238 throw "Argument is not a slice"
1239 unless $slice->isa(
"Bio::EnsEMBL::Slice");
1241 my $seq_region_id = $self->get_seq_region_id($slice);
1244 $self->prepare(
"SELECT sa.value "
1245 .
"FROM seq_region_attrib sa "
1246 .
"JOIN seq_region sr USING (seq_region_id) "
1247 .
"JOIN attrib_type at ON sa.attrib_type_id = at.attrib_type_id "
1248 .
"WHERE sr.seq_region_id=? AND at.code='genome_component'"
1250 $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
1253 my $genome_component;
1254 $sth->bind_columns( \( $genome_component ) );
1257 return $genome_component;
1260 =head2 fetch_all_karyotype
1262 Example : my $top = $slice_adptor->fetch_all_karyotype()
1263 Description: returns the list of all slices which are part of the karyotype
1264 Returntype : listref of Bio::EnsEMBL::Slices
1270 sub fetch_all_karyotype {
1273 my $csa = $self->db->get_CoordSystemAdaptor();
1276 $self->prepare(
'SELECT sr.seq_region_id, sr.name, '
1277 .
'sr.length, sr.coord_system_id, sra.value '
1278 .
'FROM seq_region sr, seq_region_attrib sra, '
1279 .
'attrib_type at, coord_system cs '
1280 .
'WHERE at.code = "karyotype_rank" '
1281 .
'AND at.attrib_type_id = sra.attrib_type_id '
1282 .
'AND sra.seq_region_id = sr.seq_region_id '
1283 .
'AND sr.coord_system_id = cs.coord_system_id '
1284 .
'AND cs.species_id = ?');
1285 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1287 my ( $seq_region_id, $name, $length, $cs_id, $rank );
1288 $sth->bind_columns( \( $seq_region_id, $name, $length, $cs_id, $rank ) );
1291 while($sth->fetch()) {
1292 my $cs = $csa->fetch_by_dbID($cs_id);
1296 'end' => ($length+0),
1298 'seq_region_name' => $name,
1299 'seq_region_length'=> ($length+0),
1300 'coord_system' => $cs,
1303 'karyotype_rank' => ($rank+0),
1309 #Sort using Perl as value in MySQL is a text field and not portable
1310 @out = sort { $a->{karyotype_rank} <=> $b->{karyotype_rank} } @out;
1316 Arg :
int seq_region_id
1317 Example : my $top = $slice_adptor->
is_toplevel($seq_region_id)
1318 Description: Returns 1
if slice is a toplevel slice
else 0
1320 Caller : Slice method is_toplevel
1329 my $sth = $self->prepare(
1330 "SELECT at.code from seq_region_attrib sra, attrib_type at "
1331 .
"WHERE sra.seq_region_id = ? "
1332 .
"AND at.attrib_type_id = sra.attrib_type_id "
1333 .
"AND at.code = 'toplevel'" );
1335 $sth->bind_param( 1, $id, SQL_INTEGER );
1339 $sth->bind_columns( \$code );
1341 while ( $sth->fetch ) {
1352 Arg :
int seq_region_id
1353 Example : my $karyotype = $slice_adptor->has_karyotype($seq_region_id)
1354 Description: Returns 1
if slice is a part of a karyotype
else 0
1365 my $sth = $self->prepare(
1366 "SELECT at.code from seq_region_attrib sra, attrib_type at "
1367 .
"WHERE sra.seq_region_id = ? "
1368 .
"AND at.attrib_type_id = sra.attrib_type_id "
1369 .
"AND at.code = 'karyotype_rank'" );
1371 $sth->bind_param( 1, $id, SQL_INTEGER );
1375 $sth->bind_columns( \$code );
1377 while ( $sth->fetch ) {
1386 =head2 get_karyotype_rank
1387 Arg :
int seq_region_id
1388 Example : my $rank = $slice_adptor->get_karyotype_rank($seq_region_id)
1389 Description: Returns the rank of a slice
if it is part of the karyotype
else 0
1391 Caller : Slice method get_karyotype_rank
1396 sub get_karyotype_rank {
1400 my $sth = $self->prepare(
1401 "SELECT sra.value from seq_region_attrib sra, attrib_type at "
1402 .
"WHERE sra.seq_region_id = ? "
1403 .
"AND at.attrib_type_id = sra.attrib_type_id "
1404 .
"AND at.code = 'karyotype_rank'" );
1406 $sth->bind_param( 1, $id, SQL_INTEGER );
1410 $sth->bind_columns( \$code );
1412 my $rank = $sth->fetchrow_array();
1421 Arg :
int seq_region_id
1422 Example : my $reference = $slice_adptor->is_reference($seq_region_id)
1423 Description: Returns 1
if slice is a reference slice
else 0
1425 Caller : Slice method is_reference
1434 my $sth = $self->prepare(
1435 "SELECT at.code from seq_region_attrib sra, attrib_type at "
1436 .
"WHERE sra.seq_region_id = ? "
1437 .
"AND at.attrib_type_id = sra.attrib_type_id "
1438 .
"AND at.code = 'non_ref'" );
1440 $sth->bind_param( 1, $id, SQL_INTEGER );
1444 $sth->bind_columns( \$code );
1446 while ( $sth->fetch ) {
1457 Arg[1] :
int seq_region_id
1458 Example : my $circular = $slice_adptor->is_circular($seq_region_id);
1459 Description : Indicates
if the sequence region was circular or not
1460 Returntype : Boolean
1465 my ($self, $id) = @_;
1467 if (! defined $self->{is_circular}) {
1468 $self->_build_circular_slice_cache();
1471 return 0
if $self->{is_circular} == 0;
1472 return (exists $self->{circular_sr_id_cache}->{$id}) ? 1 : 0;
1476 =head2 fetch_by_chr_band
1478 Title : fetch_by_chr_band
1480 Function: create a Slice representing a series of bands
1483 Args : the band name
1488 sub fetch_by_chr_band {
1489 my ( $self, $chr, $band ) = @_;
1491 my $chr_slice = $self->fetch_by_region(
'toplevel', $chr );
1495 $self->prepare(
'SELECT MIN(k.seq_region_start), '
1496 .
'MAX(k.seq_region_end) '
1497 .
'FROM karyotype k '
1498 .
'WHERE k.seq_region_id = ? '
1499 .
'AND k.band LIKE ?' );
1501 $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
1502 $sth->bind_param( 2,
"$band%", SQL_VARCHAR );
1505 my ( $slice_start, $slice_end ) = $sth->fetchrow_array;
1507 if ( defined $slice_start ) {
1509 $self->fetch_by_region(
'toplevel', $chr,
1510 $slice_start, $slice_end );
1513 throw(
"Band not recognised in database");
1514 } ## end sub fetch_by_chr_band
1518 =head2 fetch_by_exon_stable_id
1520 Arg [1] :
string $exonid
1521 The stable
id of the
exon around which the slice is
1523 Arg [2] : (optional)
int $size
1524 The length of the flanking regions the slice should encompass
1525 on either side of the
exon (0 by
default)
1526 Example : $slc = $sa->fetch_by_exon_stable_id(
'ENSE00000302930',10);
1527 Description: Creates a slice around the region of the specified
exon.
1528 If a context size is given, the slice is extended by that
1529 number of basepairs on either side of the
exon.
1531 The slice will be created in the
exon's native coordinate system
1532 and in the forward orientation.
1533 Returntype : Bio::EnsEMBL::Slice
1534 Exceptions : Thrown if the exon is not in the database.
1540 sub fetch_by_exon_stable_id{
1541 my ($self,$exonid,$size) = @_;
1543 throw('Exon argument is required.
') if(!$exonid);
1545 my $ea = $self->db->get_ExonAdaptor;
1546 my $exon = $ea->fetch_by_stable_id($exonid);
1548 throw("Exon [$exonid] does not exist in DB.") if(!$exon);
1550 return $self->fetch_by_Feature($exon, $size);
1553 =head2 fetch_by_transcript_stable_id
1555 Arg [1] : string $transcriptid
1556 The stable id of the transcript around which the slice is
1558 Arg [2] : (optional) int $size
1559 The length of the flanking regions the slice should encompass
1560 on either side of the transcript (0 by default)
1561 Example : $slc = $sa->fetch_by_transcript_stable_id('ENST00000302930
',10);
1562 Description: Creates a slice around the region of the specified transcript.
1563 If a context size is given, the slice is extended by that
1564 number of basepairs on either side of the
1567 The slice will be created in the transcript's native coordinate
1568 system and in the forward orientation.
1570 Exceptions : Thrown
if the
transcript is not in the database.
1576 sub fetch_by_transcript_stable_id{
1577 my ($self,$transcriptid,$size) = @_;
1579 throw(
'Transcript argument is required.')
if(!$transcriptid);
1581 my $ta = $self->db->get_TranscriptAdaptor;
1582 my $transcript = $ta->fetch_by_stable_id($transcriptid);
1584 throw(
"Transcript [$transcriptid] does not exist in DB.")
if(!$transcript);
1586 return $self->fetch_by_Feature($transcript, $size);
1589 =head2 fetch_by_transcript_id
1591 Arg [1] :
int $transcriptid
1592 The unique database identifier of the
transcript around which
1593 the slice is desired
1594 Arg [2] : (optional)
int $size
1595 The length of the flanking regions the slice should encompass
1596 on either side of the
transcript (0 by
default)
1597 Example : $slc = $sa->fetch_by_transcript_id(24, 1000);
1598 Description: Creates a slice around the region of the specified
transcript.
1599 If a context size is given, the slice is extended by that
1600 number of basepairs on either side of the
1603 The slice will be created in the
transcript's native coordinate
1604 system and in the forward orientation.
1605 Returntype : Bio::EnsEMBL::Slice
1606 Exceptions : throw on incorrect args
1607 throw if transcript is not in database
1613 sub fetch_by_transcript_id {
1614 my ($self,$transcriptid,$size) = @_;
1616 throw('Transcript
id argument is required.
') if(!$transcriptid);
1618 my $transcript_adaptor = $self->db()->get_TranscriptAdaptor();
1619 my $transcript = $transcript_adaptor->fetch_by_dbID($transcriptid);
1621 throw("Transcript [$transcriptid] does not exist in DB.") if(!$transcript);
1623 return $self->fetch_by_Feature($transcript, $size);
1628 =head2 fetch_by_gene_stable_id
1630 Arg [1] : string $geneid
1631 The stable id of the gene around which the slice is
1633 Arg [2] : (optional) int $size
1634 The length of the flanking regions the slice should encompass
1635 on either side of the gene (0 by default)
1636 Example : $slc = $sa->fetch_by_gene_stable_id('ENSG00000012123
',10);
1637 Description: Creates a slice around the region of the specified gene.
1638 If a context size is given, the slice is extended by that
1639 number of basepairs on either side of the gene.
1641 The slice will be created in the gene's native coordinate system
1642 and in the forward orientation.
1644 Exceptions :
throw on incorrect args
1651 sub fetch_by_gene_stable_id {
1652 my ($self,$geneid,$size) = @_;
1654 throw(
'Gene argument is required.')
if(!$geneid);
1656 my $gene_adaptor = $self->db->get_GeneAdaptor();
1657 my $gene = $gene_adaptor->fetch_by_stable_id($geneid);
1659 throw(
"Gene [$geneid] does not exist in DB.")
if(!$gene);
1661 return $self->fetch_by_Feature($gene, $size);
1666 =head2 fetch_by_Feature
1669 The feature to fetch the slice around
1670 Arg [2] :
int size (optional)
1671 The desired number of flanking basepairs around the feature.
1672 The size may also be provided as a percentage of the feature
1673 size such as 200% or 80.5%.
1674 Example : $slice = $slice_adaptor->fetch_by_Feature($feat, 100);
1675 Description: Retrieves a slice around a specific feature. All
this really
1676 does is
return a resized version of the slice that the feature
1677 is already on. Note that slices returned from
this method
1678 are always on the forward strand of the seq_region regardless of
1679 the strandedness of the feature passed in.
1681 Exceptions :
throw if the feature does not have an attached slice
1682 throw if feature argument is not provided
1683 Caller : fetch_by_gene_stable_id, fetch_by_transcript_stable_id,
1684 fetch_by_gene_id, fetch_by_transcript_id
1689 sub fetch_by_Feature{
1690 my ($self, $feature, $size) = @_;
1694 if(!ref($feature) || !$feature->isa(
'Bio::EnsEMBL::Feature')) {
1695 throw(
'Feature argument expected.');
1698 my $slice = $feature->
slice();
1699 if(!$slice || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice') )) {
1700 throw(
'Feature must be attached to a valid slice.');
1704 my $fstart = $feature->start();
1705 my $fend = $feature->end();
1706 if(!defined($fstart) || !defined($fend)) {
1707 throw(
'Feature must have defined start and end.');
1710 #convert the feature slice coordinates to seq_region coordinates
1711 my $slice_start = $slice->start();
1712 my $slice_end = $slice->end();
1713 my $slice_strand = $slice->strand();
1714 if($slice_start != 1 || $slice_strand != 1) {
1715 if($slice_strand == 1) {
1716 $fstart = $fstart + $slice_start - 1;
1717 $fend = $fend + $slice_start - 1;
1719 my $tmp_start = $fstart;
1720 $fstart = $slice_end - $fend + 1;
1721 $fend = $slice_end - $tmp_start + 1;
1725 ## Size may be stored as a %age of the length of the feature
1726 ## Size = 100% gives no context
1727 ## Size = 200% gives context - 50% the size of the feature either side of
1730 $size = int( ($1-100)/200 * ($fend-$fstart+1) ) if( $size =~/([\d+\.]+)%/ );
1732 #return a new slice covering the region of the feature
1734 'seq_region_name' => $slice->seq_region_name,
1735 'seq_region_length' => $slice->seq_region_length,
1736 'coord_system' => $slice->coord_system,
1737 'start' => $fstart - $size,
1738 'end' => $fend + $size,
1740 'adaptor' => $self});
1741 $S->{
'_raw_feature_strand'} = $feature->
strand * $slice_strand
if $feature->can(
'strand');
1747 =head2 fetch_by_misc_feature_attribute
1749 Arg [1] :
string $attribute_type
1750 The code of the attribute type
1751 Arg [2] : (optional)
string $attribute_value
1752 The value of the attribute to fetch by
1753 Arg [3] : (optional)
int $size
1754 The amount of flanking region around the misc feature desired.
1755 Example : $slice = $sa->fetch_by_misc_feature_attribute(
'superctg',
1757 $slice = $sa->fetch_by_misc_feature_attribute(
'synonym',
1760 Description: Fetches a slice around a MiscFeature with a particular
1761 attribute type and value. If no value is specified then
1762 the feature with the particular attribute is used.
1763 If no size is specified then 0 is used.
1765 Exceptions : Throw
if no feature with the specified attribute type and value
1766 exists in the database
1767 Warning
if multiple features with the specified attribute type
1768 and value exist in the database.
1774 sub fetch_by_misc_feature_attribute {
1775 my ($self, $attrib_type_code, $attrib_value, $size) = @_;
1777 my $mfa = $self->db()->get_MiscFeatureAdaptor();
1779 my $feats = $mfa->fetch_all_by_attribute_type_value($attrib_type_code,
1783 throw(
"MiscFeature with $attrib_type_code=$attrib_value does " .
1784 "not exist in DB.");
1788 warning(
"MiscFeature with $attrib_type_code=$attrib_value is " .
1789 "ambiguous - using first one found.");
1792 my ($feat) = @$feats;
1794 return $self->fetch_by_Feature($feat, $size);
1797 =head2 fetch_by_misc_feature_set
1799 Arg [1] :
string $attribute_type
1800 The code of the attribute type
1801 Arg [2] : (optional)
string $attribute_value
1802 The value of the attribute to fetch by
1803 Arg [3] : (optional) the name of the set
1804 Arg [4] : (optional)
int $size
1805 The amount of flanking region around the misc feature desired.
1806 Example : $slice = $sa->fetch_by_misc_feature_set(
'clone',
1809 Description: Fetches a slice around a MiscFeature with a particular
1810 attribute type, value and set. If no value is specified then
1811 the feature with the particular attribute is used.
1812 A size can be specified to include flanking region
1813 If no size is specified then 0 is used.
1815 Exceptions : Throw
if no feature with the specified attribute type, value and set
1816 exists in the database
1817 Warning
if multiple features with the specified attribute type, set
1818 and value exist in the database.
1824 sub fetch_by_misc_feature_set {
1825 my ($self, $attrib_type_code, $attrib_value, $misc_set, $size) = @_;
1827 my $mfa = $self->db()->get_MiscFeatureAdaptor();
1829 my $feat = $mfa->fetch_by_attribute_set_value($attrib_type_code,
1833 return $self->fetch_by_Feature($feat, $size);
1836 =head2 fetch_normalized_slice_projection
1839 Arg [2] :
boolean $filter_projections
1840 Optionally filter the projections to remove anything
1841 which is the same sequence region as the given slice
1842 Example : ( optional )
1843 Description: gives back a project style result. The returned slices
1844 represent the areas to which there are symlinks
for the
1845 given slice. start, end show which area on given slice is
1847 Returntype : [[start,end,$slice][]]
1849 Caller : BaseFeatureAdaptor
1855 sub fetch_normalized_slice_projection {
1858 my $filter_projections = shift;
1862 $self->_build_exception_cache()
if(!exists($self->{
'asm_exc_cache'}));
1864 my $result = $self->{
'asm_exc_cache'}->{$slice_seq_region_id};
1870 foreach my $row (@$result) {
1871 my ( $seq_region_id, $seq_region_start, $seq_region_end,
1872 $exc_type, $exc_seq_region_id, $exc_seq_region_start,
1873 $exc_seq_region_end ) = @$row;
1875 # need overlapping PAR and all HAPs if any
1876 if( $exc_type eq
"PAR" ) {
1877 if( $seq_region_start <= $slice->end() &&
1878 $seq_region_end >= $slice->start() ) {
1879 push( @pars, [ $seq_region_start, $seq_region_end, $exc_seq_region_id,
1880 $exc_seq_region_start, $exc_seq_region_end ] );
1883 push( @haps, [ $seq_region_start, $seq_region_end, $exc_seq_region_id,
1884 $exc_seq_region_start, $exc_seq_region_end ] );
1888 if(!@pars && !@haps) {
1889 #just return this slice, there were no haps or pars
1890 return [bless ( [1,$slice->length, $slice],
"Bio::EnsEMBL::ProjectionSegment")];
1895 my @sort_haps = sort { $a->[1] <=> $b->[1] } @haps;
1902 my $seq_region_slice = $self->fetch_by_seq_region_id($slice_seq_region_id);
1903 my $exc_slice = $self->fetch_by_seq_region_id( $sort_haps[0][2] );
1904 my $len1 = $seq_region_slice->length();
1905 my $len2 = $exc_slice->length();
1906 my $max_len = ($len1 > $len2) ? $len1 : $len2;
1908 while($count <= scalar(@sort_haps) and !$last){
1911 if(defined($sort_haps[$count]) and defined($sort_haps[$count][0]) ){
1912 $hap_end = $sort_haps[$count][0]-1;
1913 $chr_end = $sort_haps[$count][3]-1
1919 my $diff = ($hap_end-$hap_start)-($chr_end-$chr_start);
1921 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end+$diff] );
1924 push( @syms, [ $hap_start, $hap_end - $diff, $sort_haps[0][2], $chr_start, $chr_end] );
1927 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] );
1931 if($hap_end and $hap_start < $len1){ #
if hap at start or end of chromosome
1932 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] );
1934 $chr_start = $chr_end + ($sort_haps[$count][4]-$sort_haps[$count][3]) + 2;
1935 $hap_start = $hap_end + ($sort_haps[$count][1]-$sort_haps[$count][0]) + 2;
1943 # for now haps and pars should not be both there, but in theory we
1944 # could handle it here by cleverly merging the pars into the existing syms,
1946 push( @syms, @pars );
1950 for my $sym ( @syms ) {
1951 $mapper->add_map_coordinates( $slice_seq_region_id, $sym->[0], $sym->[1],
1952 1, $sym->[2], $sym->[3], $sym->[4] );
1955 my @linked = $mapper->map_coordinates( $slice_seq_region_id,
1956 $slice->start(), $slice->end(),
1957 $slice->strand(),
"sym" );
1959 # gaps are regions where there is no mapping to another region
1962 #if there was only one coord and it is a gap, we know it is just the
1963 #same slice with no overlapping symlinks
1964 if(@linked == 1 && $linked[0]->isa(
'Bio::EnsEMBL::Mapper::Gap')) {
1965 return [bless( [1,$slice->length, $slice],
"Bio::EnsEMBL::ProjectionSegment" )];
1969 for my $coord ( @linked ) {
1970 if( $coord->isa(
"Bio::EnsEMBL::Mapper::Gap" )) {
1972 'start' => $coord->start(),
1973 'end' => $coord->end(),
1974 'strand' => $slice->strand(),
1975 'coord_system' => $slice->coord_system(),
1977 'seq_region_name' => $slice->seq_region_name(),
1978 'seq_region_length' => $slice->seq_region_length()});
1979 push( @out, bless ( [ $rel_start, $coord->length()+$rel_start-1,
1980 $exc_slice ],
"Bio::EnsEMBL::ProjectionSegment") );
1982 my $exc_slice = $self->fetch_by_seq_region_id( $coord->id() );
1985 'start' => $coord->start(),
1986 'end' => $coord->end(),
1987 'strand' => $coord->strand(),
1988 'seq_region_name' => $exc_slice->seq_region_name(),
1989 'seq_region_length' => $exc_slice->seq_region_length(),
1990 'coord_system' => $exc_slice->coord_system(),
1994 push( @out, bless( [ $rel_start, $coord->length() + $rel_start - 1,
1995 $exc2_slice ],
"Bio::EnsEMBL::ProjectionSegment") );
1997 $rel_start += $coord->length();
2000 if($filter_projections) {
2001 return $self->_filter_Slice_projections($slice, \@out);
2006 =head2 _filter_Slice_projections
2009 Arg [2] : Array The projections which were fetched from the previous slice
2010 Description : Removes any projections which occur within the same sequence
2011 region as the given Slice
object
2013 of projected segments
2016 sub _filter_Slice_projections {
2017 my ($self, $slice, $projections) = @_;
2018 my @proj = @{ $projections };
2020 throw(
'Was not given any projections to filter. Database may have incorrect assembly_exception information loaded');
2023 # Want to get features on the FULL original slice as well as any
2026 # Filter out partial slices from projection that are on same
2027 # seq_region as original slice.
2029 my $sr_id = $slice->get_seq_region_id();
2033 my $segment = bless( [ 1, $slice->length(), $slice ],
2034 'Bio::EnsEMBL::ProjectionSegment' );
2035 push( @proj, $segment );
2043 Arg [2] : (optional) $seqref reference to a
string
2044 The sequence associated with the slice to be stored.
2046 $seq_region_id = $slice_adaptor->store($slice, \$sequence);
2047 Description: This stores a slice as a sequence region in the database
2048 and returns the seq region
id. The passed in slice must
2049 start at 1, and must have a valid seq_region name and coordinate
2050 system. The attached coordinate system must already be stored in
2051 the database. The sequence region is assumed to start at 1 and
2052 to have a length equalling the length of the slice. The end of
2053 the slice must equal the seq_region_length.
2054 If the slice coordinate system is the sequence level coordinate
2055 system then the seqref argument must also be passed. If the
2056 slice coordinate system is NOT a sequence level coordinate
2057 system then the sequence argument cannot be passed.
2059 Exceptions :
throw if slice has no coord system.
2060 throw if slice coord system is not already stored.
2061 throw if slice coord system is seqlevel and no sequence is
2063 throw if slice coord system is not seqlevel and sequence is
2065 throw if slice does not start at 1
2066 throw if sequence is provided and the sequence length does not
2067 match the slice length.
2068 throw if the SQL insert fails (e.g. on duplicate seq region)
2069 throw if slice argument is not passed
2070 throw if the slice end is not equal to seq_region_length
2071 Caller : database loading scripts
2082 my $not_dna = shift;
2085 # Get all of the sanity checks out of the way before storing anything
2088 if(!ref($slice) || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2089 throw(
'Slice argument is required');
2093 throw(
"Slice must have attached CoordSystem.")
if(!$cs);
2095 my $db = $self->db();
2096 if(!$cs->is_stored($db)) {
2097 throw(
"Slice CoordSystem must already be stored in DB.")
2100 if($slice->start != 1 || $slice->strand != 1) {
2101 throw(
"Slice must have start==1 and strand==1.");
2104 if($slice->end() != $slice->seq_region_length()) {
2105 throw(
"Slice must have end==seq_region_length");
2108 my $sr_len = $slice->length();
2109 my $sr_name = $slice->seq_region_name();
2111 if($sr_name eq
'') {
2112 throw(
"Slice must have valid seq region name.");
2115 if($cs->is_sequence_level() && !$not_dna) {
2117 throw(
"Must provide sequence for sequence level coord system.");
2119 if(ref($seqref) ne
'SCALAR') {
2120 throw(
"Sequence must be a scalar reference.");
2122 my $seq_len = length($$seqref);
2124 if($seq_len != $sr_len) {
2125 throw(
"Sequence length ($seq_len) must match slice length ($sr_len).");
2129 throw(
"Cannot provide sequence for non-sequence level seq regions.");
2133 #store the seq_region
2135 my $sth = $db->dbc->prepare(
"INSERT INTO seq_region " .
2136 " ( name, length, coord_system_id ) " .
2137 " VALUES ( ?, ?, ? )"
2140 $sth->bind_param(1,$sr_name,SQL_VARCHAR);
2141 $sth->bind_param(2,$sr_len,SQL_INTEGER);
2142 $sth->bind_param(3,$cs->dbID,SQL_INTEGER);
2146 my $seq_region_id = $self->last_insert_id(
'seq_region_id', undef,
'seq_region');
2148 if(!$seq_region_id) {
2149 throw(
"Database seq_region insertion failed.");
2152 if($cs->is_sequence_level() && !$not_dna) {
2153 #store sequence if it was provided
2154 my $seq_adaptor = $db->get_SequenceAdaptor();
2155 $seq_adaptor->store($seq_region_id, $$seqref);
2159 if(defined($slice->{
'synonym'})){
2160 foreach my $syn (@{$slice->{
'synonym'}} ){
2161 $syn->seq_region_id($seq_region_id); # set the seq_region_id
2162 $syn->adaptor->store($syn);
2167 $slice->adaptor($self);
2169 return $seq_region_id;
2178 # Get all of the sanity checks out of the way before storing anything
2181 if(!ref($slice) || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2182 throw(
'Slice argument is required');
2185 my $cs = $slice->coord_system();
2186 throw(
"Slice must have attached CoordSystem.")
if(!$cs);
2188 my $db = $self->db();
2190 if($slice->start != 1 || $slice->strand != 1) {
2191 throw(
"Slice must have start==1 and strand==1.");
2194 if($slice->end() != $slice->seq_region_length()) {
2195 throw(
"Slice must have end==seq_region_length");
2198 my $sr_len = $slice->length();
2199 my $sr_name = $slice->seq_region_name();
2202 throw(
"Slice must have valid seq region name.");
2205 #update the seq_region
2207 my $seq_region_id = $slice->get_seq_region_id();
2208 my $update_sql = qq(
2213 WHERE seq_region_id = ?
2216 my $sth = $db->dbc->prepare($update_sql);
2218 $sth->bind_param(1,$sr_name,SQL_VARCHAR);
2219 $sth->bind_param(2,$sr_len,SQL_INTEGER);
2220 $sth->bind_param(3,$cs->dbID,SQL_INTEGER);
2222 $sth->bind_param(4, $seq_region_id, SQL_INTEGER);
2227 if(defined($slice->{
'synonym'})){
2228 foreach my $syn (@{$slice->{
'synonym'}} ){
2229 $syn->seq_region_id($seq_region_id); # set the seq_region_id
2230 my $syn_adaptor = $db->get_SeqRegionSynonymAdaptor();
2231 $syn_adaptor->store($syn);
2239 The slice to remove from the database
2240 Example : $slice_adaptor->remove($slice);
2241 Description: Removes a slice completely from the database.
2242 All associated seq_region_attrib are removed as well.
2243 If dna is attached to the slice, it is also removed.
2245 Exceptions :
throw if slice has no coord system.
2246 throw if slice argument is not passed
2247 warning
if slice is not stored in
this database
2259 # Get all of the sanity checks out of the way before storing anything
2262 if(!ref($slice) || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2263 throw(
'Slice argument is required');
2267 throw(
"Slice must have attached CoordSystem.")
if(!$cs);
2269 my $db = $self->db();
2270 my $seq_region_id = $slice->get_seq_region_id();
2272 if ($cs->is_sequence_level()) {
2273 my $seq_adaptor = $db->get_SequenceAdaptor();
2274 $seq_adaptor->remove($seq_region_id);
2277 if(defined($slice->{
'synonym'})){
2278 foreach my $syn (@{$slice->{
'synonym'}} ){
2279 $syn->seq_region_id($seq_region_id); # set the seq_region_id
2284 my $attrib_adaptor = $self->
db->get_AttributeAdaptor();
2285 $attrib_adaptor->remove_from_Slice($slice);
2286 $self->remove_assembly($slice);
2288 my $sr_name = $slice->seq_region_name();
2290 #remove the seq_region
2292 my $sth = $db->
dbc->
prepare(
"DELETE FROM seq_region " .
2293 "WHERE name = ? AND " .
2294 " coord_system_id = ?" );
2296 $sth->bind_param(1,$sr_name,SQL_VARCHAR);
2297 $sth->bind_param(2,$cs->dbID,SQL_INTEGER);
2305 =head2 remove_assembly
2308 Example : $slice_adaptor->remove_assembly( $slice );
2309 Description: Deletes from the assembly table
2310 where
asm or cmp corresponds to slice
2311 Do not call
this method unless you really know what you are doing
2313 Exceptions :
throw if slice has no coord system (cs).
2314 throw if no slice provided, or argument is not a slice
2316 Status : Experimental
2321 sub remove_assembly {
2325 if(!ref($slice) || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2326 throw(
'Slice argument is required');
2330 throw(
"Slice must have attached CoordSystem.")
if(!$cs);
2333 # Checks complete. Delete the data
2335 my $sth = $self->db->dbc->prepare
2336 (
"DELETE FROM assembly " .
2337 "WHERE asm_seq_region_id = ? OR " .
2338 " cmp_seq_region_id = ? ");
2340 my $asm_seq_region_id = $self->get_seq_region_id( $slice );
2341 my $cmp_seq_region_id = $self->get_seq_region_id( $slice );
2343 $sth->bind_param(1,$asm_seq_region_id,SQL_INTEGER);
2344 $sth->bind_param(2,$cmp_seq_region_id,SQL_INTEGER);
2353 =head2 fetch_assembly
2357 Example : $asm = $slice_adaptor->fetch_assembly( $slice1, $slice2 );
2358 Description: Fetches from the assembly table based on the
2359 coordinates of the two slices supplied.
2360 Returns a mapper
object mapping the two slices
2361 Do not call
this method unless you really know what you are doing
2363 Exceptions :
throw if either slice has no coord system (cs).
2364 throw if there is no mapping path between coord systems
2365 throw if there are existing mappings between either slice
2368 Status : Experimental
2373 sub fetch_assembly {
2375 my $asm_slice = shift;
2376 my $cmp_slice = shift;
2378 if(!ref($asm_slice) || !($asm_slice->isa(
'Bio::EnsEMBL::Slice') or $asm_slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2379 throw(
'Assembled Slice argument is required');
2381 if(!ref($cmp_slice) || !($cmp_slice->isa(
'Bio::EnsEMBL::Slice') or $cmp_slice->isa(
'Bio::EnsEMBL::LRGSlice')) ) {
2382 throw(
'Assembled Slice argument is required');
2385 my $asm_cs = $asm_slice->coord_system();
2386 throw(
"Slice must have attached CoordSystem.")
if(!$asm_cs);
2387 my $cmp_cs = $cmp_slice->coord_system();
2388 throw(
"Slice must have attached CoordSystem.")
if(!$cmp_cs);
2391 @{ $asm_cs->adaptor()->get_mapping_path( $asm_cs, $cmp_cs ) };
2394 throw(
"No mapping path defined between "
2395 . $asm_cs->name() .
" and "
2396 . $cmp_cs->name() );
2400 # Checks complete. Fetch the data
2402 my $sth = $self->db->dbc->prepare
2403 (
"SELECT * FROM assembly " .
2404 "WHERE asm_seq_region_id = ? AND " .
2405 " cmp_seq_region_id = ? AND " .
2406 " asm_start = ? AND " .
2407 " asm_end = ? AND " .
2408 " cmp_start = ? AND " .
2409 " cmp_end = ? AND " .
2412 my $asm_seq_region_id = $self->get_seq_region_id( $asm_slice );
2413 my $cmp_seq_region_id = $self->get_seq_region_id( $cmp_slice );
2414 my $ori = $asm_slice->strand * $cmp_slice->strand;
2416 $sth->bind_param(1,$asm_seq_region_id,SQL_INTEGER);
2417 $sth->bind_param(2,$cmp_seq_region_id,SQL_INTEGER);
2418 $sth->bind_param(3,$asm_slice->start,SQL_INTEGER);
2419 $sth->bind_param(4,$asm_slice->end,SQL_INTEGER);
2420 $sth->bind_param(5,$cmp_slice->start,SQL_INTEGER);
2421 $sth->bind_param(6,$cmp_slice->end,SQL_INTEGER);
2422 $sth->bind_param(7,$ori,SQL_INTEGER);
2426 my $results = $sth->fetchrow_array();
2438 =head2 store_assembly
2442 Example : $asm = $slice_adaptor->store_assembly( $slice1, $slice2 );
2443 Description: Creates an entry in the assembly table based on the
2444 coordinates of the two slices supplied. Returns a
string
2445 representation of the assembly that gets created.
2447 Exceptions :
throw if either slice has no coord system (cs).
2448 throw unless the cs rank of the asm_slice is lower than the
2450 throw if there is no mapping path between coord systems
2451 throw if the lengths of each slice are not equal
2452 throw if there are existing mappings between either slice
2454 Caller : database loading scripts
2455 Status : Experimental
2461 my $asm_slice = shift;
2462 my $cmp_slice = shift;
2465 # Get all of the sanity checks out of the way before storing anything
2468 if(!ref($asm_slice) || !($asm_slice->isa(
'Bio::EnsEMBL::Slice') or $asm_slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2469 throw(
'Assembled Slice argument is required');
2471 if(!ref($cmp_slice) || !($cmp_slice->isa(
'Bio::EnsEMBL::Slice') or $cmp_slice->isa(
'Bio::EnsEMBL::LRGSlice')) ) {
2472 throw(
'Assembled Slice argument is required');
2476 throw(
"Slice must have attached CoordSystem.")
if(!$asm_cs);
2477 my $cmp_cs = $cmp_slice->coord_system();
2478 throw(
"Slice must have attached CoordSystem.")
if(!$cmp_cs);
2481 @{ $asm_cs->
adaptor()->get_mapping_path( $asm_cs, $cmp_cs ) };
2484 throw(
"No mapping path defined between "
2485 . $asm_cs->name() .
" and "
2486 . $cmp_cs->name() );
2489 if( $asm_slice->length != $cmp_slice->length ){
2490 throw(
"The lengths of the assembled and component slices are not equal" );
2493 # For now we disallow any existing mappings between the asm slice and cmp
2494 # CoordSystem and vice-versa.
2495 # Some cases of multiple mappings may be allowable by the API, but their
2496 # logic needs to be coded below.
2498 my $asm_proj = $asm_slice->project( $cmp_cs->name, $cmp_cs->version );
2499 if( @$asm_proj && $cmp_cs->name ne
'lrg' && $asm_cs->name ne
'lrg'){
2500 throw(
"Regions of the assembled slice are already assembled ".
2501 "into the component CoordSystem" );
2503 my $cmp_proj = $cmp_slice->project( $asm_cs->name, $asm_cs->version );
2504 if( @$cmp_proj && $cmp_cs->name ne
'lrg' && $asm_cs->name ne
'lrg'){
2505 throw(
"Regions of the component slice are already assembled ".
2506 "into the assembled CoordSystem" );
2510 # Checks complete. Store the data
2513 (
"INSERT INTO assembly " .
2514 " ( asm_seq_region_id, " .
2515 " cmp_seq_region_id, " .
2521 "VALUES ( ?, ?, ?, ?, ?, ?, ? )");
2523 my $asm_seq_region_id = $self->get_seq_region_id( $asm_slice );
2524 my $cmp_seq_region_id = $self->get_seq_region_id( $cmp_slice );
2525 my $ori = $asm_slice->strand * $cmp_slice->strand;
2527 $sth->bind_param(1,$asm_seq_region_id,SQL_INTEGER);
2528 $sth->bind_param(2,$cmp_seq_region_id,SQL_INTEGER);
2529 $sth->bind_param(3,$asm_slice->start,SQL_INTEGER);
2530 $sth->bind_param(4,$asm_slice->end,SQL_INTEGER);
2531 $sth->bind_param(5,$cmp_slice->start,SQL_INTEGER);
2532 $sth->bind_param(6,$cmp_slice->end,SQL_INTEGER);
2533 $sth->bind_param(7,$ori,SQL_INTEGER);
2537 #use Data::Dumper qw( Dumper );
2538 #warn Dumper( $self->db->{seq_region_cache} );
2539 #$self->db->{seq_region_cache} = undef;
2540 #$self->_cache_seq_regions();
2542 my $ama = $self->db->get_AssemblyMapperAdaptor();
2543 $ama->delete_cache();
2546 return $asm_slice->name .
"<>" . $cmp_slice->name;
2553 Arg [1] : String $sql
2554 Example : ( optional )
2555 Description: overrides the
default adaptor prepare method.
2556 All slice sql will usually use the dna_db.
2557 Returntype : DBD::sth
2559 Caller :
internal, convenience method
2565 my ( $self, $sql ) = @_;
2566 return $self->db()->dnadb()->dbc->prepare($sql);
2569 sub _build_exception_cache {
2572 # build up a cache of the entire assembly exception table
2573 # it should be small anyway
2575 $self->prepare(
'SELECT ae.seq_region_id, ae.seq_region_start, '
2576 .
'ae.seq_region_end, ae.exc_type, ae.exc_seq_region_id, '
2577 .
'ae.exc_seq_region_start, ae.exc_seq_region_end '
2578 .
'FROM assembly_exception ae, '
2579 .
'seq_region sr, coord_system cs '
2580 .
'WHERE sr.seq_region_id = ae.seq_region_id '
2581 .
'AND sr.coord_system_id = cs.coord_system_id '
2582 .
'AND cs.species_id = ?' );
2584 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
2588 $self->{
'asm_exc_cache'} = \%hash;
2591 while ( $row = $sth->fetchrow_arrayref() ) {
2593 $hash{ $result[0] } ||= [];
2594 push( @{ $hash{ $result[0] } }, \@result );
2597 } ## end sub _build_exception_cache
2599 =head2 cache_toplevel_seq_mappings
2602 Example : $slice_adaptor->cache_toplevel_seq_mappings();
2603 Description: caches all the assembly mappings needed
for genes
2608 : New experimental code
2612 sub cache_toplevel_seq_mappings {
2615 # Get the sequence level to map too
2620 WHERE attrib like
"%sequence_level%"
2624 my $sth = $self->prepare($sql);
2625 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
2628 my $sequence_level = $sth->fetchrow_array();
2632 my $csa = $self->db->get_CoordSystemAdaptor();
2633 my $ama = $self->db->get_AssemblyMapperAdaptor();
2635 my $cs1 = $csa->fetch_by_name($sequence_level);
2637 #get level to map too.
2640 SELECT DISTINCT(cs.name)
2642 seq_region_attrib sra,
2645 WHERE sra.seq_region_id = sr.seq_region_id
2646 AND sra.attrib_type_id = at.attrib_type_id
2647 AND at.code = "toplevel"
2648 AND cs.coord_system_id = sr.coord_system_id
2649 AND cs.species_id = ?
2652 $sth = $self->prepare($sql);
2653 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
2656 while ( my $csn = $sth->fetchrow_array() ) {
2657 if ( $csn eq $sequence_level ) { next }
2658 my $cs2 = $csa->fetch_by_name($csn);
2659 my $am = $ama->fetch_by_CoordSystems( $cs1, $cs2 );
2660 $am->register_all();
2663 } ## end sub cache_toplevel_seq_mappings
2666 sub _build_circular_slice_cache {
2669 # build up a cache of circular sequence region ids
2671 $self->prepare(
"SELECT sra.seq_region_id FROM seq_region_attrib sra "
2672 .
"INNER JOIN attrib_type at ON sra.attrib_type_id = at.attrib_type_id "
2673 .
"INNER JOIN seq_region sr ON sra.seq_region_id = sr.seq_region_id "
2674 .
"INNER JOIN coord_system cs ON sr.coord_system_id = cs.coord_system_id "
2675 .
"WHERE code = 'circular_seq' and cs.species_id = ?");
2677 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
2682 if ( ($id) = $sth->fetchrow_array() ) {
2683 $self->{
'circular_sr_id_cache'} = \%hash;
2684 $self->{
'is_circular'} = 1;
2686 while ( ($id) = $sth->fetchrow_array() ) {
2690 $self->{
'is_circular'} = 0;
2693 } ## end _build_circular_slice_cache
2695 =head2 _create_chromosome_alias
2698 Example : $self->create_chromosome_alias();
2699 Description: create chromosome alias in coordinate system with karyotype attributes
2707 sub _create_chromosome_alias {
2709 my $csa = $self->db->get_CoordSystemAdaptor();
2711 # to store reformatted db query results
2712 my $karyotype_seq_regions;
2714 my $karyotype_rank_string =
"karyotype_rank";
2716 # to store coord_system_id to alias
2720 # check whether seq region cache exists, otherwise run database query
2721 if ( $self->{
'karyotype_cache'} ) {
2722 $karyotype_seq_regions = $self->{
'karyotype_cache'};
2724 # get candidate coordSystem(s) to alias
2725 foreach my $seq_region_name ( keys %{$karyotype_seq_regions} ) {
2726 foreach my $coord_system_id ( keys %{ $karyotype_seq_regions->{ $seq_region_name } } ) {
2727 my $rank = $karyotype_seq_regions->{ $seq_region_name }->{ $coord_system_id }->{ rank };
2728 $cs_id_rank{ $coord_system_id } = $rank;
2732 # cannot find a coordssystem called chromosome
2733 # look through attribs to find seq_regions with karyotype
2736 $self->prepare(
"SELECT sr.seq_region_id, sr.name, sr.coord_system_id, sr.length, a.code, cs.rank "
2737 .
"FROM seq_region sr, seq_region_attrib sra, attrib_type a, coord_system cs "
2738 .
"WHERE sr.seq_region_id = sra.seq_region_id "
2739 .
"AND a.attrib_type_id = sra.attrib_type_id "
2740 .
"AND cs.coord_system_id = sr.coord_system_id "
2741 .
"AND a.code = ?" );
2743 $sth->bind_param( 1, $karyotype_rank_string );
2746 # fetch SQL results and identify coord_system_id(s) that
2747 # has(have) karyotype attribs
2748 while ( my $hashref = $sth->fetchrow_hashref() ) {
2749 my $seq_region_id = $hashref->{ seq_region_id };
2750 my $seq_region_name = $hashref->{ name };
2751 my $coord_system_id = $hashref->{ coord_system_id };
2752 my $length = $hashref->{ length };
2753 my $code = $hashref->{ code };
2754 my $rank = $hashref->{ rank };
2756 # account for possibility of multiple coord_system_ids returned from db query
2757 $karyotype_seq_regions->{ $seq_region_name }->{ $coord_system_id } = {
2760 seq_region_id => $seq_region_id,
2764 # hash to help decide which cs id to use, if multiple
2765 $cs_id_rank{ $coord_system_id } = $rank;
2770 my $cs_id_count = keys %cs_id_rank;
2771 # if number of coordSystem ids retrieved is one, simply use this coordSystem id
2772 # otherwise, choose the coordSystem with the best-rank (i.e. lowest number)
2773 if ( $cs_id_count == 1 ) {
2774 $cs_id = (keys %cs_id_rank)[0]; # get only key name in hash
2776 foreach my $id (sort { $cs_id_rank{$a} <=> $cs_id_rank{$b} } keys %cs_id_rank) {
2778 last; # exit loop after getting lowest-ranked coordSystem
id
2783 throw(
"No coordinate system to create a chromosome slice from (because there is no suitable coordinate system to create an alias to).\n");
2786 # use appropriate 'coord_system_id' to set 'alias_to' variable
2787 # first check that a chromosome object does not already exist
2788 my $cs = $csa->fetch_by_dbID( $cs_id );
2790 throw(
"Unable to retrieve CoordSystem object using dbID: $cs_id");
2792 if ( $cs->name eq
"chromosome" ) {
2793 throw(
"A chromosome CoordSystem object already exists. Cannot create chromosome alias.");
2797 # create karyotype cache in retrieved coordsystem
2798 $cs->{
'karyotype_cache'} = $karyotype_seq_regions;
2802 } ## end create_chromosome_alias
2805 =head2 _fetch_by_seq_region_synonym
2806 Args : $cs, $seq_region_name, $start, $end, $strand, $version, $no_fuzz
2807 Example : $self->fetch_by_seq_region_synonym( $cs, $seq_region_name, $start, $end, $strand, $version, $no_fuzz );
2808 Description: fetches all the seq region synonyms (or uses wildcard) when requested
2809 Returntype : Bio::EnsEMBL::SliceAdaptor, or none
2812 Status : (refactored from fetch_by_region)
2815 sub _fetch_by_seq_region_synonym {
2817 my ( $self, $cs, $seq_region_name, $start, $end, $strand, $version, $no_fuzz ) = @_;
2820 assert_integer($start,
'start') if defined $start;
2821 assert_integer($end, 'end') if defined $end;
2823 if ( !defined($start) ) { $start = 1 }
2824 if ( !defined($strand) ) { $strand = 1 }
2826 if ( !defined($seq_region_name) ) {
2827 throw(
'seq_region_name argument is required');
2830 my $coord_system_name;
2832 my $syn_sql =
"select s.name, cs.name, cs.version from seq_region s join seq_region_synonym ss using (seq_region_id) join coord_system cs using (coord_system_id) where ss.synonym like ? and cs.species_id =? ";
2834 $coord_system_name = $cs->
name;
2835 $syn_sql .=
"AND cs.name = '" . $coord_system_name .
"' ";
2837 if (defined $version) {
2838 $syn_sql .=
"AND cs.version = '" . $version .
"' ";
2840 $syn_sql .=
"ORDER BY cs.rank ASC";
2841 my $syn_sql_sth = $self->prepare($syn_sql);
2842 $syn_sql_sth->bind_param(1, $seq_region_name, SQL_VARCHAR);
2843 $syn_sql_sth->bind_param(2, $self->species_id(), SQL_INTEGER);
2844 $syn_sql_sth->execute();
2845 my ($new_name, $new_coord_system, $new_version);
2846 $syn_sql_sth->bind_columns( \$new_name, \$new_coord_system, \$new_version);
2848 # we want to see if there was an exact match for the seq_region_synonym.
2849 # The overall logic is:
2850 # Case 1 - A synonym exactly matches the query, and either:
2851 # a - the coord_system matches
2853 # b - the user is looking for toplevel
2854 # In this case, we now have the real seq_region name, so we can re-run
2855 # fetch_by_region using that name, coord_system, and version
2856 # Case 2 - A synonym exactly matches, but none of the coord_systems match
2857 # and the user is not looking for toplevel.
2858 # In this case, we return nothing, and move on to try fuzzy-matching
2860 # Case 3 - No synonyms match, so repeat the query using wildcards
2861 my $matched_synonym = 0;
2862 while ($syn_sql_sth->fetch){
2863 $matched_synonym = 1;
2864 if ((not defined($cs)) || ($cs->name eq $new_coord_system && $cs->version eq $new_version)) {
2865 return $self->fetch_by_region($new_coord_system, $new_name, $start, $end, $strand, $new_version, $no_fuzz);
2869 if ($matched_synonym == 0) {
2870 # Try wildcard searching if no exact synonym was found
2871 $syn_sql_sth = $self->prepare($syn_sql);
2872 my $escaped_seq_region_name = $seq_region_name;
2873 my $escape_char = $self->dbc->db_handle->get_info(14);
2874 $escaped_seq_region_name =~ s/([_%])/$escape_char$1/g;
2875 $syn_sql_sth->bind_param(1,
"$escaped_seq_region_name%", SQL_VARCHAR);
2876 $syn_sql_sth->bind_param(2, $self->species_id(), SQL_INTEGER);
2877 $syn_sql_sth->execute();
2878 $syn_sql_sth->bind_columns( \$new_name, \$new_coord_system, \$new_version);
2880 my @matched_syn_wrong_cs;
2881 while ($syn_sql_sth->fetch){
2882 if ((not defined($cs)) || ($cs->name eq $new_coord_system && $cs->version eq $new_version)) {
2883 return $self->fetch_by_region($new_coord_system, $new_name, $start, $end, $strand, $new_version, $no_fuzz);
2885 push(@matched_syn_wrong_cs, [$new_coord_system, $new_version]);
2889 if (scalar(@matched_syn_wrong_cs) > 0) {
2890 my $cs_warning_text =
"Searched for a known feature on coordniate system: " .
2892 " but found it on " .
2893 join(
", ",
map {
if ($_->[1]) {
2894 $_->[0] .
":" . $_->[1]
2898 } @matched_syn_wrong_cs) .
2899 "\n No result returned, consider searching without coordinate system or use toplevel.";
2900 warning($cs_warning_text);
2904 $syn_sql_sth->finish;
2909 =head2 _fetch_by_fuzzy_matching
2910 Args : $cs, $seq_region_name, $sql, $constraint, $bind_params
2911 Example : my $fuzzy_matched_name = $self->fetch_by_fuzzy_matching( $cs, $seq_region_name, $sql, $constraint, $bind_params );
2912 Description: fetches all the fuzzy matches
for a given seq_region_name when requested
2916 Status : (refactored from fetch_by_region)
2919 sub _fetch_by_fuzzy_matching {
2921 my ($self, $cs, $seq_region_name, $sql, $constraint, $bind_params) = @_;
2923 my $csa = $self->db->get_CoordSystemAdaptor();
2926 # Do fuzzy matching, assuming that we are just missing a version
2927 # on the end of the seq_region name.
2930 $self->prepare( $sql .
" WHERE sr.name LIKE ? " . $constraint );
2932 @$bind_params[0] = [ sprintf(
'%s.%%', $seq_region_name ), SQL_VARCHAR ];
2935 foreach my $param (@$bind_params) {
2936 $sth->bind_param( ++$pos, $param->[0], $param->[1] );
2941 my $prefix_len = length($seq_region_name) + 1;
2942 my $high_ver = undef;
2945 # Find the fuzzy-matched seq_region with the highest postfix
2946 # (which ought to be a version).
2948 my ( $tmp_name, $id, $tmp_length, $cs_id );
2949 $sth->bind_columns( \( $tmp_name, $id, $tmp_length, $cs_id ) );
2953 while ( $sth->fetch ) {
2955 ( defined($cs) ? $cs : $csa->fetch_by_dbID($cs_id) );
2957 # cache values for future reference
2958 my $arr = [ $id, $tmp_name, $cs_id, $tmp_length ];
2959 $self->{
'sr_name_cache'}->{
"$tmp_name:$cs_id"} = $arr;
2960 $self->{
'sr_id_cache'}->{
"$id"} = $arr;
2962 my $tmp_ver = substr( $tmp_name, $prefix_len );
2964 # skip versions which are non-numeric and apparently not
2966 if ( $tmp_ver !~ /^\d+$/ ) { next }
2968 # take version with highest num, if two versions match take one
2969 # with highest ranked coord system (lowest num)
2970 if ( !defined($high_ver)
2971 || $tmp_ver > $high_ver
2972 || ( $tmp_ver == $high_ver && $tmp_cs->rank < $high_cs->rank )
2975 $seq_region_name = $tmp_name;
2976 $length = $tmp_length;
2977 $high_ver = $tmp_ver;
2982 } ## end
while ( $sth->fetch )
2985 # warn if fuzzy matching found more than one result
2989 "Fuzzy matching of seq_region_name "
2990 .
"returned more than one result.\n"
2991 .
"You might want to check whether the returned seq_region\n"
2992 .
"(%s:%s) is the one you intended to fetch.\n",
2993 $high_cs->name(), $seq_region_name ) );
2998 # return if we did not find any appropriate match:
2999 if ( !defined($high_ver) ) {
return; }
3001 return ($seq_region_name, $cs);