3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
34 the creation of Slice objects.
42 -host =>
'ensembldb.ensembl.org',
49 # get a slice on the entire chromosome X
50 $chr_slice = $slice_adaptor->fetch_by_region(
'chromosome',
'X' );
52 # get a slice for each clone in the database
53 foreach $cln_slice ( @{ $slice_adaptor->fetch_all(
'clone') } ) {
54 # do something with clone
57 # get a slice which is part of NT_004321
59 $slice_adaptor->fetch_by_region(
'supercontig',
'NT_004321',
62 # get all non-redundant slices from the highest possible coordinate
64 $slices = $slice_adaptor->fetch_all(
'toplevel');
66 # include non-reference regions
67 $slices = $slice_adaptor->fetch_all(
'toplevel', undef, 1 );
69 # include duplicate regions
70 $slices = $slice_adaptor->fetch_all(
'toplevel', undef, 0, 1 );
72 # split up a list of slices into smaller slices
75 $slices = split_Slices( $slices, $max_length, $overlap );
77 # store a list of slice names in a file
78 open( FILE,
">$filename" ) or die("Could not open file $filename");
79 foreach my $slice (@$slices) {
80 print FILE $slice->name(),
"\n";
84 # retrieve a list of slices from a file
85 open( FILE, $filename ) or die(
"Could not open file $filename");
86 while ( $name = <FILE> ) {
88 $slice = $slice_adaptor->fetch_by_name($name);
89 # do something with slice
94 This module is responsible
for fetching Slices representing genomic
95 regions from a database. A Details on how slices can be used are in the
103 package Bio::EnsEMBL::DBSQL::SliceAdaptor;
116 use Scalar::Util qw/looks_like_number/;
119 @ISA = (
'Bio::EnsEMBL::DBSQL::BaseAdaptor');
124 my $class = ref($caller) || $caller;
126 my $self = $class->SUPER::new(@_);
128 # use a cache which is shared and also used by the assembly
131 my $seq_region_cache = $self->db->get_SeqRegionCache();
133 $self->{
'sr_name_cache'} = $seq_region_cache->{
'name_cache'};
134 $self->{
'sr_id_cache'} = $seq_region_cache->{
'id_cache'};
136 $self->{
'lrg_region_test'} = undef;
137 my $meta_container = $self->db->get_MetaContainer();
138 my @values = $meta_container->list_value_by_key(
"LRG");
139 if(scalar(@values) and $values[0]->[0]){
140 $self->{
'lrg_region_test'} = $values[0]->[0];
146 =head2 fetch_by_region
148 Arg [1] :
string $coord_system_name (optional)
149 The name of the coordinate system of the slice to be created
150 This may be a name of an actual coordinate system or an alias
151 to a coordinate system. Valid aliases are
'seqlevel' or
153 Arg [2] :
string $seq_region_name
154 The name of the sequence region that the slice will be
156 Arg [3] :
int $start (optional,
default = 1)
157 The start of the slice on the sequence region
158 Arg [4] :
int $end (optional, default = seq_region length)
159 The end of the slice on the sequence region
160 Arg [5] :
int $strand (optional, default = 1)
161 The orientation of the slice on the sequence region
162 Arg [6] :
string $version (optional, default = default version)
163 The version of the coordinate system to use (e.g. NCBI33)
164 Arg [7] :
boolean $no_fuzz (optional, default = undef (false))
165 If true (non-zero), do not use "fuzzy matching" (see below).
166 Example : $slice = $slice_adaptor->fetch_by_region('chromosome', 'X');
167 $slice = $slice_adaptor->fetch_by_region('clone', 'AC008066.4');
168 Description: Retrieves a slice on the requested region. At a minimum the
169 name the name of the seq_region to fetch must be provided.
171 If no coordinate system name is provided than a slice on the
172 highest ranked coordinate system with a matching
173 seq_region_name will be returned. If a version but no
174 coordinate system name is provided, the same behaviour will
175 apply, but only coordinate systems of the appropriate version
176 are considered. The same applies if the 'toplevel' coordinate
177 system is specified, however in this case the version is
178 ignored. The coordinate system should always be specified if
179 it is known, since this is unambiguous and faster.
181 Some fuzzy matching is performed if no exact match for
182 the provided name is found. This allows clones to be
183 fetched even when their version is not known. For
184 example fetch_by_region('clone', 'AC008066') will
185 retrieve the sequence_region with name 'AC008066.4'.
187 The fuzzy matching can be turned off by setting the
188 $no_fuzz argument to a true value.
190 If the requested seq_region is not found in the database undef
194 Exceptions : throw if no seq_region_name is provided
195 throw if invalid coord_system_name is provided
196 throw if start > end is provided
204 # ARNE: This subroutine needs simplification!!
206 sub fetch_by_region {
207 my ( $self, $coord_system_name, $seq_region_name, $start, $end,
208 $strand, $version, $no_fuzz )
211 assert_integer($start,
'start') if defined $start;
212 assert_integer($end, 'end') if defined $end;
214 if ( !defined($start) ) { $start = 1 }
215 if ( !defined($strand) ) { $strand = 1 }
217 if ( !defined($seq_region_name) ) {
218 throw(
'seq_region_name argument is required');
222 my $csa = $self->db->get_CoordSystemAdaptor();
224 if ( defined($coord_system_name) ) {
225 $cs = $csa->fetch_by_name( $coord_system_name, $version );
227 if ( !defined($cs) ) {
228 # deal with cases where undefined coordsystem name requested is 'chromosome'
229 if ( $coord_system_name eq
"chromosome" ) {
231 # set the 'chromosome' alias, if not yet set
232 $cs = $self->_create_chromosome_alias();
235 throw( sprintf(
"Unknown coordinate system:\n"
236 .
"name='%s' version='%s' seq region name='%s'\n",
237 $coord_system_name, $version, $seq_region_name ) );
241 # fetching by toplevel is same as fetching w/o name or version
242 if ( $cs->is_top_level() ) {
247 } ## end
if ( defined($coord_system_name...))
254 if ( defined($cs) ) {
256 # if chromosome alias is defined, use karyotype_cache to access seq region data
257 # rather than a database query
258 if ( defined($cs->alias_to()) && $cs->alias_to() eq
"chromosome" ) {
260 $key =
"karyotype_cache";
262 }
else { # otherwise, search database to access seq region data, etc.
264 $sql = sprintf(
"SELECT sr.name, sr.seq_region_id, sr.length, %d "
265 .
"FROM seq_region sr ",
268 $constraint =
"AND sr.coord_system_id = ?";
269 push( @bind_params, [ $cs->dbID(), SQL_INTEGER ] );
271 $key =
"$seq_region_name:" . $cs->dbID();
276 "SELECT sr.name, sr.seq_region_id, sr.length, cs.coord_system_id "
277 .
"FROM seq_region sr, coord_system cs ";
279 $constraint =
"AND sr.coord_system_id = cs.coord_system_id "
280 .
"AND cs.species_id = ? ";
281 push( @bind_params, [ $self->species_id(), SQL_INTEGER ] );
283 if ( defined($version) ) {
284 $constraint .=
"AND cs.version = ? ";
285 push( @bind_params, [ $version, SQL_VARCHAR ] );
288 $constraint .=
"ORDER BY cs.rank ASC";
291 # check the cache so we only go to the db if necessary
295 # use $key to access karyotype cache and define $arr
296 if ( defined($key) && ($key eq
"karyotype_cache") ) {
298 my $coord_system_id = $cs->dbID();
300 # retrieve values from karyotype cache
301 my $seq_region_id = $cs->{$key}{$seq_region_name}{$coord_system_id}{
'seq_region_id'};
302 $length = $cs->{$key}{$seq_region_name}{$coord_system_id}{
'length'};
304 # populate $arr with values from karyotype cache
305 $arr = [ $seq_region_name, $seq_region_id, $length, $coord_system_id ];
307 # define $end, if not yet defined
308 if ( !defined($end) ) { $end = $length }
310 # add in check that $arr is populated
311 throw(
"Unable to popular $arr with values from karyotype cache") unless $arr;
315 if ( defined($key) ) { $arr = $self->{
'sr_name_cache'}->{$key} }
317 if ( defined($arr) ) {
321 $self->prepare( $sql .
"WHERE sr.name = ? " . $constraint );
323 unshift( @bind_params, [ $seq_region_name, SQL_VARCHAR ] );
326 foreach my $param (@bind_params) {
327 $sth->bind_param( ++$pos, $param->[0], $param->[1] );
331 my @row = $sth->fetchrow_array();
336 # deal with cases where a coordsystem might not be defined by user
339 $slice = $self->_fetch_by_seq_region_synonym( undef, $seq_region_name, $start, $end, $strand, $version, $no_fuzz );
341 $slice = $self->_fetch_by_seq_region_synonym( $cs, $seq_region_name, $start, $end, $strand, $version, $no_fuzz );
344 # check whether any slice data has been returned
345 if ( $slice && $slice->seq_region_name ) {
346 my $matched_name = $slice->seq_region_name;
348 # if matched name is different to query name, skip fuzzy matching
349 if ( $matched_name ne $seq_region_name ) {
350 $seq_region_name = $matched_name;
353 my $tmp_key_string =
"$seq_region_name:" . $slice->coord_system()->dbID();
354 $arr = $self->{
'sr_name_cache'}->{$tmp_key_string};
356 $cs = $slice->coord_system()
if (!$cs);
359 }
else { #
if no slice
object with a match returned,
try using a fuzzy match
362 my ($fuzzy_matched_name, $cs_new) = $self->_fetch_by_fuzzy_matching( $cs, $seq_region_name, $sql, $constraint, \@bind_params );
365 if (!$fuzzy_matched_name) {
370 my $tmp_key_string = $fuzzy_matched_name .
":" . $cs_new->dbID();
371 if (exists $self->{
'sr_name_cache'}->{$tmp_key_string}) {
372 $seq_region_name = $fuzzy_matched_name;
373 $arr = $self->{
'sr_name_cache'}->{$tmp_key_string};
385 ( $seq_region_name, $id, $length, $cs_id ) = @row;
387 # cache to speed up for future queries
388 my $arr = [ $id, $seq_region_name, $cs_id, $length ];
389 $self->{
'sr_name_cache'}->{
"$seq_region_name:$cs_id"} = $arr;
390 $self->{
'sr_id_cache'}->{
"$id"} = $arr;
391 $cs = $csa->fetch_by_dbID($cs_id);
393 } ## end
else [
if ( defined($arr) ) ]
396 if ( !defined($end) ) { $end = $length }
398 #If this was given then check if we've got a circular seq region otherwise
399 #let it fall through to the normal Slice method
400 if ( $end + 1 < $start ) {
401 my $cs_id = $cs->dbID();
402 my $seq_region_id = $self->{
'sr_name_cache'}->{
"$seq_region_name:$cs_id"}->[0];
403 if($self->is_circular($seq_region_id)) {
406 -COORD_SYSTEM => $cs,
407 -SEQ_REGION_NAME => $seq_region_name,
408 -SEQ_REGION_LENGTH => $length,
418 if ( defined( $self->{
'lrg_region_test'} )
419 and substr( $cs->name, 0, 3 ) eq $self->{
'lrg_region_test'} )
423 -SEQ_REGION_NAME => $seq_region_name,
424 -SEQ_REGION_LENGTH => $length,
432 'coord_system' => $cs,
433 'seq_region_name' => $seq_region_name,
434 'seq_region_length' => $length,
438 'adaptor' => $self } );
440 } ## end sub fetch_by_region
442 =head2 fetch_by_toplevel_location
444 Arg [1] :
string $location
445 Ensembl formatted location. Can be a format like
446 C<name:start-end>, C<name:start..end>, C<name:start:end>,
447 C<name:start>, C<name>. We can also support strand
448 specification as a +/- or 1/-1.
450 Location names must be separated by a C<:>. All others can be
451 separated by C<..>, C<:> or C<->.
452 Arg[2] :
boolean $no_warnings
453 Suppress warnings from
this method
454 Arg[3] :
boolean $no_fuzz
455 Stop fuzzy matching of sequence regions from occuring
456 Arg[4] :
boolean $ucsc
457 If we are unsuccessful at retriving a location retry taking any
458 possible chr prefix into account e.g. chrX and X are treated as
460 Example : my $slice = $sa->fetch_by_toplevel_location(
'X:1-10000')
461 my $slice = $sa->fetch_by_toplevel_location(
'X:1-10000:-1')
462 Description : Converts an Ensembl location/region into the sequence region
463 name, start and end and passes them onto C<fetch_by_region()>.
464 The code assumes that the required slice is on the top level
465 coordinate system. The code assumes that location formatting
466 is not perfect and will perform basic
cleanup before parsing.
468 Exceptions : If $location is
false otherwise see C<fetch_by_location()>
469 or C<fetch_by_region()>
475 sub fetch_by_toplevel_location {
476 my ($self, $location, $no_warnings, $no_fuzz, $ucsc) = @_;
477 return $self->fetch_by_location($location,
'toplevel', undef, $no_warnings, $no_fuzz, $ucsc);
480 =head2 fetch_by_location
482 Arg [1] :
string $location
483 Ensembl formatted location. Can be a format like
484 C<name:start-end>, C<name:start..end>, C<name:start:end>,
485 C<name:start>, C<name>. We can also support strand
486 specification as a +/- or 1/-1.
488 Location names must be separated by a C<:>. All others can be
489 separated by C<..>, C<:>, C<_> or C<->.
490 Arg[2] : String $coord_system_name
491 The coordinate system to retrieve
492 Arg[3] : String $coord_system_version
493 Optional parameter. Version of the coordinate system to fetch
494 Arg[4] :
boolean $no_warnings
495 Suppress warnings from
this method
496 Arg[5] :
boolean $no_fuzz
497 Stop fuzzy matching of sequence regions from occuring
498 Arg[6] :
boolean $ucsc
499 If we are unsuccessful at retriving a location retry taking any
500 possible chr prefix into account e.g. chrX and X are treated as
502 Example : my $slice = $sa->fetch_by_location(
'X:1-10000',
'chromosome')
503 my $slice = $sa->fetch_by_location(
'X:1-10000:-1',
'toplevel')
504 Description : Converts an Ensembl location/region into the sequence region
505 name, start and end and passes them onto C<fetch_by_region()>.
506 The code assumes that location formatting is not perfect and
507 will perform basic
cleanup before parsing.
509 Exceptions : If $location or coordinate system is
false otherwise
510 see C<fetch_by_region()>
516 sub fetch_by_location {
517 my ($self, $location, $coord_system_name, $coord_system_version, $no_warnings, $no_fuzz, $ucsc) = @_;
518 throw "No coordinate system name specified" unless $coord_system_name;
520 my ($seq_region_name, $start, $end, $strand) = $self->parse_location_to_values($location, $no_warnings);
522 if(! $seq_region_name) {
526 if(defined $start && defined $end && $start > $end) {
527 throw "Cannot request a slice whose start is greater than its end. Start: $start. End: $end";
530 $seq_region_name =~ s/^chr
531 my $slice = $self->fetch_by_region($coord_system_name, $seq_region_name, $start, $end, $strand, $coord_system_version, $no_fuzz);
532 if(! defined $slice) {
534 my $ucsc_seq_region_name = $seq_region_name;
535 $ucsc_seq_region_name =~ s/^chr
536 if($ucsc_seq_region_name ne $seq_region_name) {
537 $slice = $self->fetch_by_region($coord_system_name, $ucsc_seq_region_name, $start, $end, $strand, $coord_system_version, $no_fuzz);
538 return if ! defined $slice; #
if we had no slice still then bail
541 return; #If it was not different then we didn
't have the prefix so just return (same bail as before)
545 return; #We didn't have a slice and no UCSC specifics are being triggered
550 my $name = $slice->seq_region_name();
551 if(defined $start && $start > $srl) {
552 throw "Cannot request a slice whose start ($start) is greater than $srl for $name.";
554 if(defined $end && $end > $srl) {
555 warning
"Requested end ($end) is greater than $srl for $name. Resetting to $srl" if ! $no_warnings;
556 $slice->{end} = $srl;
562 =head2 parse_location_to_values
564 Arg [1] :
string $location
565 Ensembl formatted location. Can be a format like
566 C<name:start-end>, C<name:start..end>, C<name:start:end>,
567 C<name:start>, C<name>. We can also support strand
568 specification as a +/- or 1/-1.
570 Location names must be separated by a C<:>. All others can be
571 separated by C<..>, C<:> C<_>, or C<->.
573 If the start is negative, start will be reset to 1 (e.g.: 1: -10-1,000
')
574 If both start and end are negative, returns undef (e.g.: 1: -10--1,000')
575 Arg[2] :
boolean $no_warnings
576 Suppress warnings from
this method
577 Arg[3] :
boolean $no_errors
578 Supress errors being thrown from
this method
579 Example : my ($name, $start, $end, $strand) = $sa->parse_location_to_values(
'X:1..100:1);
580 Description : Takes in an Ensembl location String and returns the parsed
582 Returntype : List. Contains name, start, end and strand
587 sub parse_location_to_values {
588 my ($self, $location, $no_warnings, $no_errors) = @_;
590 throw 'You must specify a location
' if ! $location;
592 #cleanup any nomenclature like 1 000 or 1,000
593 my $number_seps_regex = qr/\s+|,/;
594 my $separator_regex = qr/(?:-|[.]{2}|\:|_)?/; # support -, .., : and _ as separators
595 my $hgvs_nomenclature_regex = qr/(?:g\.)?/; # check for HGVS looking locations e.g. X:g.1-100
596 my $number_regex = qr/[0-9, EMKG]+/xmsi;
597 my $number_regex_signed = qr/-?[0-9, EMKG]+/xmsi; # to capture negative locations as sometimes we end up in negative location if the location is padded
598 my $strand_regex = qr/[+-1]|-1/xms;
600 my $regex = qr/^((?:\w|\.|_|-)+) \s* :? \s* $hgvs_nomenclature_regex ($number_regex_signed)? $separator_regex ($number_regex)? $separator_regex ($strand_regex)? $/xms;
601 my ($seq_region_name, $start, $end, $strand);
602 if(($seq_region_name, $start, $end, $strand) = $location =~ $regex) {
604 if(defined $strand) {
605 if(!looks_like_number($strand)) {
606 $strand = ($strand eq '+
') ? 1 : -1;
611 $start =~ s/$number_seps_regex//g;
613 warning "Start was less than 1 (${start}) which is not allowed. Resetting to 1" if ! $no_warnings;
615 unless(defined $end) {
616 # We will reach here only when the location is given without start and '-
' is used as seperator eg: 1:-10 (expected to return 1:1-10)
623 $end =~ s/$number_seps_regex//g;
625 throw "Cannot request negative or 0 end indexes through this interface. Given $end but expected something greater than 0" unless $no_errors;
631 return ($seq_region_name, $start, $end, $strand);
634 =head2 fetch_by_region_unique
636 Arg [1] : string $coord_system_name (optional)
637 The name of the coordinate system of the slice to be created
638 This may be a name of an actual coordinate system or an alias
639 to a coordinate system. Valid aliases are 'seqlevel
' or
641 Arg [2] : string $seq_region_name
642 The name of the sequence region that the slice will be
644 Arg [3] : int $start (optional, default = 1)
645 The start of the slice on the sequence region
646 Arg [4] : int $end (optional, default = seq_region length)
647 The end of the slice on the sequence region
648 Arg [5] : int $strand (optional, default = 1)
649 The orientation of the slice on the sequence region
650 Arg [6] : string $version (optional, default = default version)
651 The version of the coordinate system to use (e.g. NCBI33)
652 Arg [7] : boolean $no_fuzz (optional, default = undef (false))
653 If true (non-zero), do not use "fuzzy matching" (see below).
654 Example : $slice = $slice_adaptor->fetch_by_region_unique('chromosome
', 'HSCHR6_MHC_COX
');
655 Description: Retrieves a slice on the requested region but returns only the unique
656 parts of the slice. At a minimum the
657 name the name of the seq_region to fetch must be provided.
659 If no coordinate system name is provided than a slice on the
660 highest ranked coordinate system with a matching
661 seq_region_name will be returned. If a version but no
662 coordinate system name is provided, the same behaviour will
663 apply, but only coordinate systems of the appropriate version
664 are considered. The same applies if the 'toplevel
' coordinate
665 system is specified, however in this case the version is
666 ignored. The coordinate system should always be specified if
667 it is known, since this is unambiguous and faster.
669 Some fuzzy matching is performed if no exact match for
670 the provided name is found. This allows clones to be
671 fetched even when their version is not known. For
672 example fetch_by_region('clone
', 'AC008066
') will
673 retrieve the sequence_region with name 'AC008066.4
'.
675 The fuzzy matching can be turned off by setting the
676 $no_fuzz argument to a true value.
678 If the requested seq_region is not found in the database undef
681 Returntype : listref Bio::EnsEMBL::Slice
682 Exceptions : throw if no seq_region_name is provided
683 throw if invalid coord_system_name is provided
684 throw if start > end is provided
690 sub fetch_by_region_unique {
694 my $slice = $self->fetch_by_region(@_);
697 if ( !exists( $self->{'asm_exc_cache
'} ) ) {
698 $self->_build_exception_cache();
702 $self->{'asm_exc_cache
'}->{ $self->get_seq_region_id($slice) }
705 # Dereference symlinked assembly regions. Take out any regions
706 # which are symlinked because these are duplicates.
708 @{ $self->fetch_normalized_slice_projection($slice) };
710 foreach my $segment (@projection) {
711 if ( $segment->[2]->seq_region_name() eq $slice->seq_region_name()
712 && $segment->[2]->coord_system->equals( $slice->coord_system ) )
714 push( @out, $segment->[2] );
722 } ## end sub fetch_by_region_unique
726 Arg [1] : string $name
727 Example : $name = 'chromosome:NCBI34:X:1000000:2000000:1
';
728 $slice = $slice_adaptor->fetch_by_name($name);
729 $slice2 = $slice_adaptor->fetch_by_name($slice3->name());
730 Description: Fetches a slice using a slice name (i.e. the value returned by
731 the Slice::name method). This is useful if you wish to
732 store a unique identifier for a slice in a file or database or
733 pass a slice over a network.
734 Slice::name allows you to serialise/marshall a slice and this
735 method allows you to deserialise/unmarshal it.
737 Returns undef if no seq_region with the provided name exists in
740 Returntype : Bio::EnsEMBL::Slice or undef
741 Exceptions : throw if incorrent arg provided
752 throw("name argument is required");
755 my @array = split(/:/,$name);
757 if(scalar(@array) < 3 || scalar(@array) > 6) {
758 throw("Malformed slice name [$name]. Format is " .
759 "coord_system:version:name:start:end:strand");
762 # Rearrange arguments to suit fetch_by_region
766 $targetarray[0]=$array[0];
767 $targetarray[5]=(($array[1]&&$array[1] ne "")?$array[1]:undef);
768 $targetarray[1]=(($array[2]&&$array[2] ne "")?$array[2]:undef);
769 $targetarray[2]=(($array[3]&&$array[3] ne "")?$array[3]:undef);
770 $targetarray[3]=(($array[4]&&$array[4] ne "")?$array[4]:undef);
771 $targetarray[4]=(($array[5]&&$array[5] ne "")?$array[5]:undef);
772 return $self->fetch_by_region(@targetarray);
777 =head2 fetch_by_seq_region_id
779 Arg [1] : string $seq_region_id
780 The internal identifier of the seq_region to create this slice
782 Arg [2] : optional start
783 Arg [3] : optional end
784 Arg [4] : optional strand
785 Example : $slice = $slice_adaptor->fetch_by_seq_region_id(34413);
786 Description: Creates a slice object of an entire seq_region using the
787 seq_region internal identifier to resolve the seq_region.
788 Returns undef if no such slice exists.
789 Returntype : Bio::EnsEMBL::Slice or undef
796 sub fetch_by_seq_region_id {
797 my ( $self, $seq_region_id, $start, $end, $strand, $check_prior_ids ) = @_;
799 my $csa = $self->db->get_CoordSystemAdaptor();
800 my $arr = $self->{'sr_id_cache
'}->{$seq_region_id};
801 my ( $name, $length, $cs, $cs_id );
804 if ( $arr && defined( $arr->[2] ) ) {
805 ( $name, $cs_id, $length ) = ( $arr->[1], $arr->[2], $arr->[3] );
806 $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id);
809 $self->prepare( "SELECT sr.name, sr.coord_system_id, sr.length "
810 . "FROM seq_region sr "
811 . "WHERE sr.seq_region_id = ? " );
813 $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
816 my @row = $sth->fetchrow_array();
818 # This could have been an old seq region id so see if we can
819 # translate it into a more recent version.
820 if($check_prior_ids) {
821 if(exists $csa->{_external_seq_region_mapping}->{$seq_region_id}) {
822 my $new_seq_region_id = $csa->{_external_seq_region_mapping}->{$seq_region_id};
823 # No need to pass check prior ids flag because it's a 1 step relationship
824 return $self->fetch_by_seq_region_id($new_seq_region_id, $start, $end, $strand);
830 ( $name, $cs_id, $length ) = @row;
833 $cs = $self->db->get_CoordSystemAdaptor->fetch_by_dbID($cs_id);
835 #cache results to speed up repeated queries
836 my $arr = [ $seq_region_id, $name, $cs_id, $length ];
838 $self->{
'sr_name_cache'}->{
"$name:$cs_id"} = $arr;
839 $self->{
'sr_id_cache'}->{
"$seq_region_id"} = $arr;
844 'coord_system' => $cs,
845 'seq_region_name' => $name,
846 'seq_region_length'=> $length,
847 'start' => $start || 1,
848 'end' => $end || $length,
849 'strand' => $strand || 1,
850 'adaptor' => $self} );
851 } ## end sub fetch_by_seq_region_id
858 The slice to fetch a seq_region_id
for
860 Description: Retrieves the seq_region id (in
this database) given a slice
861 Seq region ids are not stored on the slices themselves
862 because they are intended to be somewhat database independant
863 and seq_region_ids vary accross databases.
865 Exceptions :
throw if the seq_region of the slice is not in the db
866 throw if incorrect arg provided
867 Caller : BaseFeatureAdaptor
876 if(!$slice || !ref($slice) || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
877 throw(
'Slice argument is required');
880 my $seq_region_name = $slice->seq_region_name();
881 my $key = $seq_region_name.
":".$slice->coord_system->dbID();
882 my $arr = $self->{
'sr_name_cache'}->{
"$key"};
888 my $cs_id = $slice->coord_system->dbID();
890 my $sth = $self->prepare(
"SELECT seq_region_id, length " .
892 "WHERE name = ? AND coord_system_id = ?");
894 #force seq_region_name cast to string so mysql cannot treat as int
895 $sth->bind_param(1,
"$seq_region_name",SQL_VARCHAR);
896 $sth->bind_param(2,$cs_id,SQL_INTEGER);
899 my @row = $sth->fetchrow_array();
901 throw(
"No-existent seq_region [$seq_region_name] in coord system [$cs_id]");
903 my @more = $sth->fetchrow_array();
905 throw(
"Ambiguous seq_region [$seq_region_name] in coord system [$cs_id]");
908 my($seq_region_id, $length) = @row;
911 #cache information for future requests
912 $arr = [ $seq_region_id, $seq_region_name, $cs_id, $length ];
914 $self->{
'sr_name_cache'}->{
"$seq_region_name:$cs_id"} = $arr;
915 $self->{
'sr_id_cache'}->{
"$seq_region_id"} = $arr;
917 return $seq_region_id;
924 Arg [1] :
string $coord_system_name
925 The name of the coordinate system to retrieve slices of.
926 This may be a name of an acutal coordinate system or an alias
927 to a coordinate system. Valid aliases are
'seqlevel' or
929 Arg [2] :
string $coord_system_version (optional)
930 The version of the coordinate system to retrieve slices of
931 Arg [3] :
bool $include_non_reference (optional)
932 If
this argument is not provided then only reference slices
933 will be returned. If set, both reference and non reference
934 slices will be returned.
935 Arg [4] :
int $include_duplicates (optional)
936 If set duplicate regions will be returned.
938 NOTE:
if you
do not use
this option and you have a PAR
939 (pseudo-autosomal region) at the beginning of your seq_region
940 then your slice will not start at position 1, so coordinates
941 retrieved from
this slice might not be what you expected.
943 Arg[5] :
bool $include_lrg (optional) (
default 0)
944 If set lrg regions will be returned aswell.
947 Example : @chromos = @{$slice_adaptor->fetch_all(
'chromosome',
'NCBI33')};
948 @contigs = @{$slice_adaptor->fetch_all(
'contig')};
950 # get even non-reference regions
951 @slices = @{$slice_adaptor->fetch_all(
'toplevel',undef,1)};
953 # include duplicate regions (such as pseudo autosomal regions)
954 @slices = @{$slice_adaptor->fetch_all(
'toplevel', undef,0,1)};
956 Description: Retrieves slices of all seq_regions
for a given coordinate
957 system. This is analagous to the methods fetch_all which were
958 formerly on the ChromosomeAdaptor, RawContigAdaptor and
959 CloneAdaptor classes. Slices fetched span the entire
960 seq_regions and are on the forward strand.
961 If the coordinate system with the provided name and version
962 does not exist an empty list is returned.
963 If the coordinate system name provided is
'toplevel', all
964 non-redundant toplevel slices are returned (note that any
965 coord_system_version argument is ignored in that
case).
967 Retrieved slices can be broken into smaller slices
using the
970 Returntype : listref of Bio::EnsEMBL::Slices
980 my $cs_version = shift ||
'';
982 my ($include_non_reference, $include_duplicates, $include_lrg) = @_;
985 # verify existance of requested coord system and get its id
987 my $csa = $self->db->get_CoordSystemAdaptor();
988 my $orig_cs = $csa->fetch_by_name($cs_name, $cs_version);
990 return []
if ( !$orig_cs );
996 # Get a hash of non reference seq regions
998 if ( !$include_non_reference ) {
1000 $self->prepare(
'SELECT sr.seq_region_id '
1001 .
'FROM seq_region sr, seq_region_attrib sra, '
1002 .
'attrib_type at, coord_system cs '
1003 .
'WHERE at.code = "non_ref" '
1004 .
'AND sra.seq_region_id = sr.seq_region_id '
1005 .
'AND at.attrib_type_id = sra.attrib_type_id '
1006 .
'AND sr.coord_system_id = cs.coord_system_id '
1007 .
'AND cs.species_id = ?' );
1009 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1012 my ($seq_region_id);
1013 $sth->bind_columns( \$seq_region_id );
1015 while ( $sth->fetch() ) {
1016 $bad_vals{$seq_region_id} = 1;
1021 # if we do not want lrg's then add them to the bad list;
1023 if ( !$include_lrg ) {
1025 $self->prepare(
'SELECT sr.seq_region_id '
1026 .
'FROM seq_region sr, seq_region_attrib sra, '
1027 .
'attrib_type at, coord_system cs '
1028 .
'WHERE at.code = "LRG" '
1029 .
'AND sra.seq_region_id = sr.seq_region_id '
1030 .
'AND at.attrib_type_id = sra.attrib_type_id '
1031 .
'AND sr.coord_system_id = cs.coord_system_id '
1032 .
'AND cs.species_id = ?' );
1034 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1037 my ($seq_region_id);
1038 $sth->bind_columns( \$seq_region_id );
1040 while ( $sth->fetch() ) {
1041 $bad_vals{$seq_region_id} = 1;
1046 # Retrieve the seq_regions from the database
1050 if ( $orig_cs->is_top_level() ) {
1052 $self->prepare(
'SELECT sr.seq_region_id, sr.name, '
1053 .
'sr.length, sr.coord_system_id '
1054 .
'FROM seq_region sr, seq_region_attrib sra, '
1055 .
'attrib_type at, coord_system cs '
1056 .
'WHERE at.code = "toplevel" '
1057 .
'AND at.attrib_type_id = sra.attrib_type_id '
1058 .
'AND sra.seq_region_id = sr.seq_region_id '
1059 .
'AND sr.coord_system_id = cs.coord_system_id '
1060 .
'AND cs.species_id = ?' );
1062 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1066 $self->prepare(
'SELECT sr.seq_region_id, sr.name, '
1067 .
'sr.length, sr.coord_system_id '
1068 .
'FROM seq_region sr '
1069 .
'WHERE sr.coord_system_id = ?' );
1071 $sth->bind_param( 1, $orig_cs->dbID, SQL_INTEGER );
1075 my ( $seq_region_id, $name, $length, $cs_id );
1076 $sth->bind_columns( \( $seq_region_id, $name, $length, $cs_id ) );
1078 my $cache_count = 0;
1081 while($sth->fetch()) {
1082 if(!defined($bad_vals{$seq_region_id})){
1083 my $cs = $csa->fetch_by_dbID($cs_id);
1086 throw(
"seq_region $name references non-existent coord_system $cs_id.");
1089 #cache values for future reference, but stop adding to the cache once we
1090 #we know we have filled it up
1091 if($cache_count < $Bio::EnsEMBL::Utils::SeqRegionCache::SEQ_REGION_CACHE_SIZE) {
1092 my $arr = [ $seq_region_id, $name, $cs_id, $length ];
1094 $self->{
'sr_name_cache'}->{
"$name:$cs_id"} = $arr;
1095 $self->{
'sr_id_cache'}->{
"$seq_region_id"} = $arr;
1104 'seq_region_name' => $name,
1105 'seq_region_length'=> $length,
1106 'coord_system' => $cs,
1107 'adaptor' => $self});
1109 if(!defined($include_duplicates) or !$include_duplicates){
1110 # test if this slice *could* have a duplicate (exception) region
1111 $self->_build_exception_cache()
if(!exists $self->{
'asm_exc_cache'});
1112 if(exists $self->{asm_exc_cache}->{$seq_region_id}) {
1114 # Dereference symlinked assembly regions. Take out
1115 # any regions which are symlinked because these are duplicates
1116 my @projection = @{$self->fetch_normalized_slice_projection($slice)};
1117 foreach my $segment ( @projection) {
1119 $segment->[2]->coord_system->equals($slice->coord_system)) {
1120 push @out, $segment->[2];
1124 # no duplicate regions
1128 # we want duplicates anyway so do not do any checks
1137 =head2 fetch_all_by_genome_component
1139 Arg [1] :
string $genome_component_name
1140 The name of the genome component to retrieve slices of.
1141 Example : @slices = @{$slice_adaptor->fetch_all_by_genome_component(
'A')};
1142 Description: Returns the list of all top level slices
for a a given
1144 Returntype : listref of Bio::EnsEMBL::Slices
1145 Exceptions : If argument is not provided or is not a valid genome
1152 sub fetch_all_by_genome_component {
1154 my $genome_component = shift;
1155 defined $genome_component or
1156 throw "Undefined genome component";
1158 # check the provided genome component is valid
1159 my $gc = $self->db->get_adaptor(
'GenomeContainer');
1160 my $is_valid_component = grep { $_ eq $genome_component }
1161 @{$gc->get_genome_components};
1162 throw "Invalid genome component"
1163 unless $is_valid_component;
1166 # Retrieve the toplevel seq_regions from the database
1169 $self->prepare(
"SELECT sr.seq_region_id, sr.name, sr.length, sr.coord_system_id "
1170 .
"FROM seq_region sr "
1171 .
"JOIN seq_region_attrib sa1 USING (seq_region_id) "
1172 .
"JOIN attrib_type a1 ON sa1.attrib_type_id = a1.attrib_type_id "
1173 .
"JOIN seq_region_attrib sa2 USING (seq_region_id) "
1174 .
"JOIN attrib_type a2 ON sa2.attrib_type_id = a2.attrib_type_id "
1175 .
"WHERE sa2.value=? and a1.code='toplevel' and a2.code='genome_component'"
1178 if (looks_like_number($genome_component)) {
1179 $sth->bind_param( 1, $genome_component, SQL_INTEGER );
1181 $sth->bind_param( 1, $genome_component, SQL_VARCHAR );
1186 my ( $seq_region_id, $name, $length, $cs_id );
1187 $sth->bind_columns( \( $seq_region_id, $name, $length, $cs_id ) );
1189 my $cache_count = 0;
1192 my $csa = $self->db->get_CoordSystemAdaptor();
1194 while($sth->fetch()) {
1195 my $cs = $csa->fetch_by_dbID($cs_id);
1196 throw(
"seq_region $name references non-existent coord_system $cs_id.")
1199 # cache values for future reference, but stop adding to the cache once we
1200 # we know we have filled it up
1201 if($cache_count < $Bio::EnsEMBL::Utils::SeqRegionCache::SEQ_REGION_CACHE_SIZE) {
1202 my $arr = [ $seq_region_id, $name, $cs_id, $length ];
1204 $self->{
'sr_name_cache'}->{
"$name:$cs_id"} = $arr;
1205 $self->{
'sr_id_cache'}->{
"$seq_region_id"} = $arr;
1214 'seq_region_name' => $name,
1215 'seq_region_length'=> $length,
1216 'coord_system' => $cs,
1217 'adaptor' => $self});
1223 =head2 get_genome_component_for_slice
1227 Description: Returns the genome component of a slice
1228 Returntype : Scalar; the identifier of the genome component of the slice
1235 sub get_genome_component_for_slice {
1236 my ($self, $slice) = @_;
1238 throw "Undefined slice" unless defined $slice;
1239 throw "Argument is not a slice"
1240 unless $slice->isa(
"Bio::EnsEMBL::Slice");
1242 my $seq_region_id = $self->get_seq_region_id($slice);
1245 $self->prepare(
"SELECT sa.value "
1246 .
"FROM seq_region_attrib sa "
1247 .
"JOIN seq_region sr USING (seq_region_id) "
1248 .
"JOIN attrib_type at ON sa.attrib_type_id = at.attrib_type_id "
1249 .
"WHERE sr.seq_region_id=? AND at.code='genome_component'"
1251 $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
1254 my $genome_component;
1255 $sth->bind_columns( \( $genome_component ) );
1258 return $genome_component;
1261 =head2 fetch_all_karyotype
1263 Example : my $top = $slice_adptor->fetch_all_karyotype()
1264 Description: returns the list of all slices which are part of the karyotype
1265 Returntype : listref of Bio::EnsEMBL::Slices
1271 sub fetch_all_karyotype {
1274 my $csa = $self->db->get_CoordSystemAdaptor();
1277 $self->prepare(
'SELECT sr.seq_region_id, sr.name, '
1278 .
'sr.length, sr.coord_system_id, sra.value '
1279 .
'FROM seq_region sr, seq_region_attrib sra, '
1280 .
'attrib_type at, coord_system cs '
1281 .
'WHERE at.code = "karyotype_rank" '
1282 .
'AND at.attrib_type_id = sra.attrib_type_id '
1283 .
'AND sra.seq_region_id = sr.seq_region_id '
1284 .
'AND sr.coord_system_id = cs.coord_system_id '
1285 .
'AND cs.species_id = ?');
1286 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
1288 my ( $seq_region_id, $name, $length, $cs_id, $rank );
1289 $sth->bind_columns( \( $seq_region_id, $name, $length, $cs_id, $rank ) );
1292 while($sth->fetch()) {
1293 my $cs = $csa->fetch_by_dbID($cs_id);
1297 'end' => ($length+0),
1299 'seq_region_name' => $name,
1300 'seq_region_length'=> ($length+0),
1301 'coord_system' => $cs,
1304 'karyotype_rank' => ($rank+0),
1310 #Sort using Perl as value in MySQL is a text field and not portable
1311 @out = sort { $a->{karyotype_rank} <=> $b->{karyotype_rank} } @out;
1317 Arg :
int seq_region_id
1318 Example : my $top = $slice_adptor->
is_toplevel($seq_region_id)
1319 Description: Returns 1
if slice is a toplevel slice
else 0
1321 Caller : Slice method is_toplevel
1330 my $sth = $self->prepare(
1331 "SELECT at.code from seq_region_attrib sra, attrib_type at "
1332 .
"WHERE sra.seq_region_id = ? "
1333 .
"AND at.attrib_type_id = sra.attrib_type_id "
1334 .
"AND at.code = 'toplevel'" );
1336 $sth->bind_param( 1, $id, SQL_INTEGER );
1340 $sth->bind_columns( \$code );
1342 while ( $sth->fetch ) {
1353 Arg :
int seq_region_id
1354 Example : my $karyotype = $slice_adptor->has_karyotype($seq_region_id)
1355 Description: Returns 1
if slice is a part of a karyotype
else 0
1366 my $sth = $self->prepare(
1367 "SELECT at.code from seq_region_attrib sra, attrib_type at "
1368 .
"WHERE sra.seq_region_id = ? "
1369 .
"AND at.attrib_type_id = sra.attrib_type_id "
1370 .
"AND at.code = 'karyotype_rank'" );
1372 $sth->bind_param( 1, $id, SQL_INTEGER );
1376 $sth->bind_columns( \$code );
1378 while ( $sth->fetch ) {
1387 =head2 get_karyotype_rank
1388 Arg :
int seq_region_id
1389 Example : my $rank = $slice_adptor->get_karyotype_rank($seq_region_id)
1390 Description: Returns the rank of a slice
if it is part of the karyotype
else 0
1392 Caller : Slice method get_karyotype_rank
1397 sub get_karyotype_rank {
1401 my $sth = $self->prepare(
1402 "SELECT sra.value from seq_region_attrib sra, attrib_type at "
1403 .
"WHERE sra.seq_region_id = ? "
1404 .
"AND at.attrib_type_id = sra.attrib_type_id "
1405 .
"AND at.code = 'karyotype_rank'" );
1407 $sth->bind_param( 1, $id, SQL_INTEGER );
1411 $sth->bind_columns( \$code );
1413 my $rank = $sth->fetchrow_array();
1422 Arg :
int seq_region_id
1423 Example : my $reference = $slice_adptor->is_reference($seq_region_id)
1424 Description: Returns 1
if slice is a reference slice
else 0
1426 Caller : Slice method is_reference
1435 my $sth = $self->prepare(
1436 "SELECT at.code from seq_region_attrib sra, attrib_type at "
1437 .
"WHERE sra.seq_region_id = ? "
1438 .
"AND at.attrib_type_id = sra.attrib_type_id "
1439 .
"AND at.code = 'non_ref'" );
1441 $sth->bind_param( 1, $id, SQL_INTEGER );
1445 $sth->bind_columns( \$code );
1447 while ( $sth->fetch ) {
1458 Arg[1] :
int seq_region_id
1459 Example : my $circular = $slice_adptor->is_circular($seq_region_id);
1460 Description : Indicates
if the sequence region was circular or not
1461 Returntype : Boolean
1466 my ($self, $id) = @_;
1468 if (! defined $self->{is_circular}) {
1469 $self->_build_circular_slice_cache();
1472 return 0
if $self->{is_circular} == 0;
1473 return (exists $self->{circular_sr_id_cache}->{$id}) ? 1 : 0;
1477 =head2 fetch_by_chr_band
1479 Title : fetch_by_chr_band
1481 Function: create a Slice representing a series of bands
1484 Args : the band name
1489 sub fetch_by_chr_band {
1490 my ( $self, $chr, $band ) = @_;
1492 my $chr_slice = $self->fetch_by_region(
'toplevel', $chr );
1496 $self->prepare(
'SELECT MIN(k.seq_region_start), '
1497 .
'MAX(k.seq_region_end) '
1498 .
'FROM karyotype k '
1499 .
'WHERE k.seq_region_id = ? '
1500 .
'AND k.band LIKE ?' );
1502 $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
1503 $sth->bind_param( 2,
"$band%", SQL_VARCHAR );
1506 my ( $slice_start, $slice_end ) = $sth->fetchrow_array;
1508 if ( defined $slice_start ) {
1510 $self->fetch_by_region(
'toplevel', $chr,
1511 $slice_start, $slice_end );
1514 throw(
"Band not recognised in database");
1515 } ## end sub fetch_by_chr_band
1519 =head2 fetch_by_exon_stable_id
1521 Arg [1] :
string $exonid
1522 The stable
id of the
exon around which the slice is
1524 Arg [2] : (optional)
int $size
1525 The length of the flanking regions the slice should encompass
1526 on either side of the
exon (0 by
default)
1527 Example : $slc = $sa->fetch_by_exon_stable_id(
'ENSE00000302930',10);
1528 Description: Creates a slice around the region of the specified
exon.
1529 If a context size is given, the slice is extended by that
1530 number of basepairs on either side of the
exon.
1532 The slice will be created in the
exon's native coordinate system
1533 and in the forward orientation.
1534 Returntype : Bio::EnsEMBL::Slice
1535 Exceptions : Thrown if the exon is not in the database.
1541 sub fetch_by_exon_stable_id{
1542 my ($self,$exonid,$size) = @_;
1544 throw('Exon argument is required.
') if(!$exonid);
1546 my $ea = $self->db->get_ExonAdaptor;
1547 my $exon = $ea->fetch_by_stable_id($exonid);
1549 throw("Exon [$exonid] does not exist in DB.") if(!$exon);
1551 return $self->fetch_by_Feature($exon, $size);
1554 =head2 fetch_by_transcript_stable_id
1556 Arg [1] : string $transcriptid
1557 The stable id of the transcript around which the slice is
1559 Arg [2] : (optional) int $size
1560 The length of the flanking regions the slice should encompass
1561 on either side of the transcript (0 by default)
1562 Example : $slc = $sa->fetch_by_transcript_stable_id('ENST00000302930
',10);
1563 Description: Creates a slice around the region of the specified transcript.
1564 If a context size is given, the slice is extended by that
1565 number of basepairs on either side of the
1568 The slice will be created in the transcript's native coordinate
1569 system and in the forward orientation.
1571 Exceptions : Thrown
if the
transcript is not in the database.
1577 sub fetch_by_transcript_stable_id{
1578 my ($self,$transcriptid,$size) = @_;
1580 throw(
'Transcript argument is required.')
if(!$transcriptid);
1582 my $ta = $self->db->get_TranscriptAdaptor;
1583 my $transcript = $ta->fetch_by_stable_id($transcriptid);
1585 throw(
"Transcript [$transcriptid] does not exist in DB.")
if(!$transcript);
1587 return $self->fetch_by_Feature($transcript, $size);
1590 =head2 fetch_by_transcript_id
1592 Arg [1] :
int $transcriptid
1593 The unique database identifier of the
transcript around which
1594 the slice is desired
1595 Arg [2] : (optional)
int $size
1596 The length of the flanking regions the slice should encompass
1597 on either side of the
transcript (0 by
default)
1598 Example : $slc = $sa->fetch_by_transcript_id(24, 1000);
1599 Description: Creates a slice around the region of the specified
transcript.
1600 If a context size is given, the slice is extended by that
1601 number of basepairs on either side of the
1604 The slice will be created in the
transcript's native coordinate
1605 system and in the forward orientation.
1606 Returntype : Bio::EnsEMBL::Slice
1607 Exceptions : throw on incorrect args
1608 throw if transcript is not in database
1614 sub fetch_by_transcript_id {
1615 my ($self,$transcriptid,$size) = @_;
1617 throw('Transcript
id argument is required.
') if(!$transcriptid);
1619 my $transcript_adaptor = $self->db()->get_TranscriptAdaptor();
1620 my $transcript = $transcript_adaptor->fetch_by_dbID($transcriptid);
1622 throw("Transcript [$transcriptid] does not exist in DB.") if(!$transcript);
1624 return $self->fetch_by_Feature($transcript, $size);
1629 =head2 fetch_by_gene_stable_id
1631 Arg [1] : string $geneid
1632 The stable id of the gene around which the slice is
1634 Arg [2] : (optional) int $size
1635 The length of the flanking regions the slice should encompass
1636 on either side of the gene (0 by default)
1637 Example : $slc = $sa->fetch_by_gene_stable_id('ENSG00000012123
',10);
1638 Description: Creates a slice around the region of the specified gene.
1639 If a context size is given, the slice is extended by that
1640 number of basepairs on either side of the gene.
1642 The slice will be created in the gene's native coordinate system
1643 and in the forward orientation.
1645 Exceptions :
throw on incorrect args
1652 sub fetch_by_gene_stable_id {
1653 my ($self,$geneid,$size) = @_;
1655 throw(
'Gene argument is required.')
if(!$geneid);
1657 my $gene_adaptor = $self->db->get_GeneAdaptor();
1658 my $gene = $gene_adaptor->fetch_by_stable_id($geneid);
1660 throw(
"Gene [$geneid] does not exist in DB.")
if(!$gene);
1662 return $self->fetch_by_Feature($gene, $size);
1667 =head2 fetch_by_Feature
1670 The feature to fetch the slice around
1671 Arg [2] :
int size (optional)
1672 The desired number of flanking basepairs around the feature.
1673 The size may also be provided as a percentage of the feature
1674 size such as 200% or 80.5%.
1675 Example : $slice = $slice_adaptor->fetch_by_Feature($feat, 100);
1676 Description: Retrieves a slice around a specific feature. All
this really
1677 does is
return a resized version of the slice that the feature
1678 is already on. Note that slices returned from
this method
1679 are always on the forward strand of the seq_region regardless of
1680 the strandedness of the feature passed in.
1682 Exceptions :
throw if the feature does not have an attached slice
1683 throw if feature argument is not provided
1684 Caller : fetch_by_gene_stable_id, fetch_by_transcript_stable_id,
1685 fetch_by_gene_id, fetch_by_transcript_id
1690 sub fetch_by_Feature{
1691 my ($self, $feature, $size) = @_;
1695 if(!ref($feature) || !$feature->isa(
'Bio::EnsEMBL::Feature')) {
1696 throw(
'Feature argument expected.');
1699 my $slice = $feature->
slice();
1700 if(!$slice || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice') )) {
1701 throw(
'Feature must be attached to a valid slice.');
1705 my $fstart = $feature->start();
1706 my $fend = $feature->end();
1707 if(!defined($fstart) || !defined($fend)) {
1708 throw(
'Feature must have defined start and end.');
1711 #convert the feature slice coordinates to seq_region coordinates
1712 my $slice_start = $slice->start();
1713 my $slice_end = $slice->end();
1714 my $slice_strand = $slice->strand();
1715 if($slice_start != 1 || $slice_strand != 1) {
1716 if($slice_strand == 1) {
1717 $fstart = $fstart + $slice_start - 1;
1718 $fend = $fend + $slice_start - 1;
1720 my $tmp_start = $fstart;
1721 $fstart = $slice_end - $fend + 1;
1722 $fend = $slice_end - $tmp_start + 1;
1726 ## Size may be stored as a %age of the length of the feature
1727 ## Size = 100% gives no context
1728 ## Size = 200% gives context - 50% the size of the feature either side of
1731 $size = int( ($1-100)/200 * ($fend-$fstart+1) ) if( $size =~/([\d+\.]+)%/ );
1733 #return a new slice covering the region of the feature
1735 'seq_region_name' => $slice->seq_region_name,
1736 'seq_region_length' => $slice->seq_region_length,
1737 'coord_system' => $slice->coord_system,
1738 'start' => $fstart - $size,
1739 'end' => $fend + $size,
1741 'adaptor' => $self});
1742 $S->{
'_raw_feature_strand'} = $feature->
strand * $slice_strand
if $feature->can(
'strand');
1748 =head2 fetch_by_misc_feature_attribute
1750 Arg [1] :
string $attribute_type
1751 The code of the attribute type
1752 Arg [2] : (optional)
string $attribute_value
1753 The value of the attribute to fetch by
1754 Arg [3] : (optional)
int $size
1755 The amount of flanking region around the misc feature desired.
1756 Example : $slice = $sa->fetch_by_misc_feature_attribute(
'superctg',
1758 $slice = $sa->fetch_by_misc_feature_attribute(
'synonym',
1761 Description: Fetches a slice around a MiscFeature with a particular
1762 attribute type and value. If no value is specified then
1763 the feature with the particular attribute is used.
1764 If no size is specified then 0 is used.
1766 Exceptions : Throw
if no feature with the specified attribute type and value
1767 exists in the database
1768 Warning
if multiple features with the specified attribute type
1769 and value exist in the database.
1775 sub fetch_by_misc_feature_attribute {
1776 my ($self, $attrib_type_code, $attrib_value, $size) = @_;
1778 my $mfa = $self->db()->get_MiscFeatureAdaptor();
1780 my $feats = $mfa->fetch_all_by_attribute_type_value($attrib_type_code,
1784 throw(
"MiscFeature with $attrib_type_code=$attrib_value does " .
1785 "not exist in DB.");
1789 warning(
"MiscFeature with $attrib_type_code=$attrib_value is " .
1790 "ambiguous - using first one found.");
1793 my ($feat) = @$feats;
1795 return $self->fetch_by_Feature($feat, $size);
1798 =head2 fetch_by_misc_feature_set
1800 Arg [1] :
string $attribute_type
1801 The code of the attribute type
1802 Arg [2] : (optional)
string $attribute_value
1803 The value of the attribute to fetch by
1804 Arg [3] : (optional) the name of the set
1805 Arg [4] : (optional)
int $size
1806 The amount of flanking region around the misc feature desired.
1807 Example : $slice = $sa->fetch_by_misc_feature_set(
'clone',
1810 Description: Fetches a slice around a MiscFeature with a particular
1811 attribute type, value and set. If no value is specified then
1812 the feature with the particular attribute is used.
1813 A size can be specified to include flanking region
1814 If no size is specified then 0 is used.
1816 Exceptions : Throw
if no feature with the specified attribute type, value and set
1817 exists in the database
1818 Warning
if multiple features with the specified attribute type, set
1819 and value exist in the database.
1825 sub fetch_by_misc_feature_set {
1826 my ($self, $attrib_type_code, $attrib_value, $misc_set, $size) = @_;
1828 my $mfa = $self->db()->get_MiscFeatureAdaptor();
1830 my $feat = $mfa->fetch_by_attribute_set_value($attrib_type_code,
1834 return $self->fetch_by_Feature($feat, $size);
1837 =head2 fetch_normalized_slice_projection
1840 Arg [2] :
boolean $filter_projections
1841 Optionally filter the projections to remove anything
1842 which is the same sequence region as the given slice
1843 Example : ( optional )
1844 Description: gives back a project style result. The returned slices
1845 represent the areas to which there are symlinks
for the
1846 given slice. start, end show which area on given slice is
1848 Returntype : [[start,end,$slice][]]
1850 Caller : BaseFeatureAdaptor
1856 sub fetch_normalized_slice_projection {
1859 my $filter_projections = shift;
1863 $self->_build_exception_cache()
if(!exists($self->{
'asm_exc_cache'}));
1865 my $result = $self->{
'asm_exc_cache'}->{$slice_seq_region_id};
1871 foreach my $row (@$result) {
1872 my ( $seq_region_id, $seq_region_start, $seq_region_end,
1873 $exc_type, $exc_seq_region_id, $exc_seq_region_start,
1874 $exc_seq_region_end ) = @$row;
1876 # need overlapping PAR and all HAPs if any
1877 if( $exc_type eq
"PAR" ) {
1878 if( $seq_region_start <= $slice->end() &&
1879 $seq_region_end >= $slice->start() ) {
1880 push( @pars, [ $seq_region_start, $seq_region_end, $exc_seq_region_id,
1881 $exc_seq_region_start, $exc_seq_region_end ] );
1884 push( @haps, [ $seq_region_start, $seq_region_end, $exc_seq_region_id,
1885 $exc_seq_region_start, $exc_seq_region_end ] );
1889 if(!@pars && !@haps) {
1890 #just return this slice, there were no haps or pars
1891 return [bless ( [1,$slice->length, $slice],
"Bio::EnsEMBL::ProjectionSegment")];
1896 my @sort_haps = sort { $a->[1] <=> $b->[1] } @haps;
1903 my $seq_region_slice = $self->fetch_by_seq_region_id($slice_seq_region_id);
1904 my $exc_slice = $self->fetch_by_seq_region_id( $sort_haps[0][2] );
1905 my $len1 = $seq_region_slice->length();
1906 my $len2 = $exc_slice->length();
1907 my $max_len = ($len1 > $len2) ? $len1 : $len2;
1909 while($count <= scalar(@sort_haps) and !$last){
1912 if(defined($sort_haps[$count]) and defined($sort_haps[$count][0]) ){
1913 $hap_end = $sort_haps[$count][0]-1;
1914 $chr_end = $sort_haps[$count][3]-1
1920 my $diff = ($hap_end-$hap_start)-($chr_end-$chr_start);
1922 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end+$diff] );
1925 push( @syms, [ $hap_start, $hap_end - $diff, $sort_haps[0][2], $chr_start, $chr_end] );
1928 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] );
1932 if($hap_end and $hap_start < $len1){ #
if hap at start or end of chromosome
1933 push( @syms, [ $hap_start, $hap_end, $sort_haps[0][2], $chr_start, $chr_end] );
1935 $chr_start = $chr_end + ($sort_haps[$count][4]-$sort_haps[$count][3]) + 2;
1936 $hap_start = $hap_end + ($sort_haps[$count][1]-$sort_haps[$count][0]) + 2;
1944 # for now haps and pars should not be both there, but in theory we
1945 # could handle it here by cleverly merging the pars into the existing syms,
1947 push( @syms, @pars );
1951 for my $sym ( @syms ) {
1952 $mapper->add_map_coordinates( $slice_seq_region_id, $sym->[0], $sym->[1],
1953 1, $sym->[2], $sym->[3], $sym->[4] );
1956 my @linked = $mapper->map_coordinates( $slice_seq_region_id,
1957 $slice->start(), $slice->end(),
1958 $slice->strand(),
"sym" );
1960 # gaps are regions where there is no mapping to another region
1963 #if there was only one coord and it is a gap, we know it is just the
1964 #same slice with no overlapping symlinks
1965 if(@linked == 1 && $linked[0]->isa(
'Bio::EnsEMBL::Mapper::Gap')) {
1966 return [bless( [1,$slice->length, $slice],
"Bio::EnsEMBL::ProjectionSegment" )];
1970 for my $coord ( @linked ) {
1971 if( $coord->isa(
"Bio::EnsEMBL::Mapper::Gap" )) {
1973 'start' => $coord->start(),
1974 'end' => $coord->end(),
1975 'strand' => $slice->strand(),
1976 'coord_system' => $slice->coord_system(),
1978 'seq_region_name' => $slice->seq_region_name(),
1979 'seq_region_length' => $slice->seq_region_length()});
1980 push( @out, bless ( [ $rel_start, $coord->length()+$rel_start-1,
1981 $exc_slice ],
"Bio::EnsEMBL::ProjectionSegment") );
1983 my $exc_slice = $self->fetch_by_seq_region_id( $coord->id() );
1986 'start' => $coord->start(),
1987 'end' => $coord->end(),
1988 'strand' => $coord->strand(),
1989 'seq_region_name' => $exc_slice->seq_region_name(),
1990 'seq_region_length' => $exc_slice->seq_region_length(),
1991 'coord_system' => $exc_slice->coord_system(),
1995 push( @out, bless( [ $rel_start, $coord->length() + $rel_start - 1,
1996 $exc2_slice ],
"Bio::EnsEMBL::ProjectionSegment") );
1998 $rel_start += $coord->length();
2001 if($filter_projections) {
2002 return $self->_filter_Slice_projections($slice, \@out);
2007 =head2 _filter_Slice_projections
2010 Arg [2] : Array The projections which were fetched from the previous slice
2011 Description : Removes any projections which occur within the same sequence
2012 region as the given Slice
object
2014 of projected segments
2017 sub _filter_Slice_projections {
2018 my ($self, $slice, $projections) = @_;
2019 my @proj = @{ $projections };
2021 throw(
'Was not given any projections to filter. Database may have incorrect assembly_exception information loaded');
2024 # Want to get features on the FULL original slice as well as any
2027 # Filter out partial slices from projection that are on same
2028 # seq_region as original slice.
2030 my $sr_id = $slice->get_seq_region_id();
2034 my $segment = bless( [ 1, $slice->length(), $slice ],
2035 'Bio::EnsEMBL::ProjectionSegment' );
2036 push( @proj, $segment );
2044 Arg [2] : (optional) $seqref reference to a
string
2045 The sequence associated with the slice to be stored.
2047 $seq_region_id = $slice_adaptor->store($slice, \$sequence);
2048 Description: This stores a slice as a sequence region in the database
2049 and returns the seq region
id. The passed in slice must
2050 start at 1, and must have a valid seq_region name and coordinate
2051 system. The attached coordinate system must already be stored in
2052 the database. The sequence region is assumed to start at 1 and
2053 to have a length equalling the length of the slice. The end of
2054 the slice must equal the seq_region_length.
2055 If the slice coordinate system is the sequence level coordinate
2056 system then the seqref argument must also be passed. If the
2057 slice coordinate system is NOT a sequence level coordinate
2058 system then the sequence argument cannot be passed.
2060 Exceptions :
throw if slice has no coord system.
2061 throw if slice coord system is not already stored.
2062 throw if slice coord system is seqlevel and no sequence is
2064 throw if slice coord system is not seqlevel and sequence is
2066 throw if slice does not start at 1
2067 throw if sequence is provided and the sequence length does not
2068 match the slice length.
2069 throw if the SQL insert fails (e.g. on duplicate seq region)
2070 throw if slice argument is not passed
2071 throw if the slice end is not equal to seq_region_length
2072 Caller : database loading scripts
2083 my $not_dna = shift;
2086 # Get all of the sanity checks out of the way before storing anything
2089 if(!ref($slice) || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2090 throw(
'Slice argument is required');
2094 throw(
"Slice must have attached CoordSystem.")
if(!$cs);
2096 my $db = $self->db();
2097 if(!$cs->is_stored($db)) {
2098 throw(
"Slice CoordSystem must already be stored in DB.")
2101 if($slice->start != 1 || $slice->strand != 1) {
2102 throw(
"Slice must have start==1 and strand==1.");
2105 if($slice->end() != $slice->seq_region_length()) {
2106 throw(
"Slice must have end==seq_region_length");
2109 my $sr_len = $slice->length();
2110 my $sr_name = $slice->seq_region_name();
2112 if($sr_name eq
'') {
2113 throw(
"Slice must have valid seq region name.");
2116 if($cs->is_sequence_level() && !$not_dna) {
2118 throw(
"Must provide sequence for sequence level coord system.");
2120 if(ref($seqref) ne
'SCALAR') {
2121 throw(
"Sequence must be a scalar reference.");
2123 my $seq_len = length($$seqref);
2125 if($seq_len != $sr_len) {
2126 throw(
"Sequence length ($seq_len) must match slice length ($sr_len).");
2130 throw(
"Cannot provide sequence for non-sequence level seq regions.");
2134 #store the seq_region
2136 my $sth = $db->dbc->prepare(
"INSERT INTO seq_region " .
2137 " ( name, length, coord_system_id ) " .
2138 " VALUES ( ?, ?, ? )"
2141 $sth->bind_param(1,$sr_name,SQL_VARCHAR);
2142 $sth->bind_param(2,$sr_len,SQL_INTEGER);
2143 $sth->bind_param(3,$cs->dbID,SQL_INTEGER);
2147 my $seq_region_id = $self->last_insert_id(
'seq_region_id', undef,
'seq_region');
2149 if(!$seq_region_id) {
2150 throw(
"Database seq_region insertion failed.");
2153 if($cs->is_sequence_level() && !$not_dna) {
2154 #store sequence if it was provided
2155 my $seq_adaptor = $db->get_SequenceAdaptor();
2156 $seq_adaptor->store($seq_region_id, $$seqref);
2160 if(defined($slice->{
'synonym'})){
2161 foreach my $syn (@{$slice->{
'synonym'}} ){
2162 $syn->seq_region_id($seq_region_id); # set the seq_region_id
2163 $syn->adaptor->store($syn);
2168 $slice->adaptor($self);
2170 return $seq_region_id;
2179 # Get all of the sanity checks out of the way before storing anything
2182 if(!ref($slice) || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2183 throw(
'Slice argument is required');
2186 my $cs = $slice->coord_system();
2187 throw(
"Slice must have attached CoordSystem.")
if(!$cs);
2189 my $db = $self->db();
2191 if($slice->start != 1 || $slice->strand != 1) {
2192 throw(
"Slice must have start==1 and strand==1.");
2195 if($slice->end() != $slice->seq_region_length()) {
2196 throw(
"Slice must have end==seq_region_length");
2199 my $sr_len = $slice->length();
2200 my $sr_name = $slice->seq_region_name();
2203 throw(
"Slice must have valid seq region name.");
2206 #update the seq_region
2208 my $seq_region_id = $slice->get_seq_region_id();
2209 my $update_sql = qq(
2214 WHERE seq_region_id = ?
2217 my $sth = $db->dbc->prepare($update_sql);
2219 $sth->bind_param(1,$sr_name,SQL_VARCHAR);
2220 $sth->bind_param(2,$sr_len,SQL_INTEGER);
2221 $sth->bind_param(3,$cs->dbID,SQL_INTEGER);
2223 $sth->bind_param(4, $seq_region_id, SQL_INTEGER);
2228 if(defined($slice->{
'synonym'})){
2229 foreach my $syn (@{$slice->{
'synonym'}} ){
2230 $syn->seq_region_id($seq_region_id); # set the seq_region_id
2231 my $syn_adaptor = $db->get_SeqRegionSynonymAdaptor();
2232 $syn_adaptor->store($syn);
2240 The slice to remove from the database
2241 Example : $slice_adaptor->remove($slice);
2242 Description: Removes a slice completely from the database.
2243 All associated seq_region_attrib are removed as well.
2244 If dna is attached to the slice, it is also removed.
2246 Exceptions :
throw if slice has no coord system.
2247 throw if slice argument is not passed
2248 warning
if slice is not stored in
this database
2260 # Get all of the sanity checks out of the way before storing anything
2263 if(!ref($slice) || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2264 throw(
'Slice argument is required');
2268 throw(
"Slice must have attached CoordSystem.")
if(!$cs);
2270 my $db = $self->db();
2271 my $seq_region_id = $slice->get_seq_region_id();
2273 if ($cs->is_sequence_level()) {
2274 my $seq_adaptor = $db->get_SequenceAdaptor();
2275 $seq_adaptor->remove($seq_region_id);
2278 if(defined($slice->{
'synonym'})){
2279 foreach my $syn (@{$slice->{
'synonym'}} ){
2280 $syn->seq_region_id($seq_region_id); # set the seq_region_id
2285 my $attrib_adaptor = $self->
db->get_AttributeAdaptor();
2286 $attrib_adaptor->remove_from_Slice($slice);
2287 $self->remove_assembly($slice);
2289 my $sr_name = $slice->seq_region_name();
2291 #remove the seq_region
2293 my $sth = $db->
dbc->
prepare(
"DELETE FROM seq_region " .
2294 "WHERE name = ? AND " .
2295 " coord_system_id = ?" );
2297 $sth->bind_param(1,$sr_name,SQL_VARCHAR);
2298 $sth->bind_param(2,$cs->dbID,SQL_INTEGER);
2306 =head2 remove_assembly
2309 Example : $slice_adaptor->remove_assembly( $slice );
2310 Description: Deletes from the assembly table
2311 where
asm or cmp corresponds to slice
2312 Do not call
this method unless you really know what you are doing
2314 Exceptions :
throw if slice has no coord system (cs).
2315 throw if no slice provided, or argument is not a slice
2317 Status : Experimental
2322 sub remove_assembly {
2326 if(!ref($slice) || !($slice->isa(
'Bio::EnsEMBL::Slice') or $slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2327 throw(
'Slice argument is required');
2331 throw(
"Slice must have attached CoordSystem.")
if(!$cs);
2334 # Checks complete. Delete the data
2336 my $sth = $self->db->dbc->prepare
2337 (
"DELETE FROM assembly " .
2338 "WHERE asm_seq_region_id = ? OR " .
2339 " cmp_seq_region_id = ? ");
2341 my $asm_seq_region_id = $self->get_seq_region_id( $slice );
2342 my $cmp_seq_region_id = $self->get_seq_region_id( $slice );
2344 $sth->bind_param(1,$asm_seq_region_id,SQL_INTEGER);
2345 $sth->bind_param(2,$cmp_seq_region_id,SQL_INTEGER);
2354 =head2 fetch_assembly
2358 Example : $asm = $slice_adaptor->fetch_assembly( $slice1, $slice2 );
2359 Description: Fetches from the assembly table based on the
2360 coordinates of the two slices supplied.
2361 Returns a mapper
object mapping the two slices
2362 Do not call
this method unless you really know what you are doing
2364 Exceptions :
throw if either slice has no coord system (cs).
2365 throw if there is no mapping path between coord systems
2366 throw if there are existing mappings between either slice
2369 Status : Experimental
2374 sub fetch_assembly {
2376 my $asm_slice = shift;
2377 my $cmp_slice = shift;
2379 if(!ref($asm_slice) || !($asm_slice->isa(
'Bio::EnsEMBL::Slice') or $asm_slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2380 throw(
'Assembled Slice argument is required');
2382 if(!ref($cmp_slice) || !($cmp_slice->isa(
'Bio::EnsEMBL::Slice') or $cmp_slice->isa(
'Bio::EnsEMBL::LRGSlice')) ) {
2383 throw(
'Assembled Slice argument is required');
2386 my $asm_cs = $asm_slice->coord_system();
2387 throw(
"Slice must have attached CoordSystem.")
if(!$asm_cs);
2388 my $cmp_cs = $cmp_slice->coord_system();
2389 throw(
"Slice must have attached CoordSystem.")
if(!$cmp_cs);
2392 @{ $asm_cs->adaptor()->get_mapping_path( $asm_cs, $cmp_cs ) };
2395 throw(
"No mapping path defined between "
2396 . $asm_cs->name() .
" and "
2397 . $cmp_cs->name() );
2401 # Checks complete. Fetch the data
2403 my $sth = $self->db->dbc->prepare
2404 (
"SELECT * FROM assembly " .
2405 "WHERE asm_seq_region_id = ? AND " .
2406 " cmp_seq_region_id = ? AND " .
2407 " asm_start = ? AND " .
2408 " asm_end = ? AND " .
2409 " cmp_start = ? AND " .
2410 " cmp_end = ? AND " .
2413 my $asm_seq_region_id = $self->get_seq_region_id( $asm_slice );
2414 my $cmp_seq_region_id = $self->get_seq_region_id( $cmp_slice );
2415 my $ori = $asm_slice->strand * $cmp_slice->strand;
2417 $sth->bind_param(1,$asm_seq_region_id,SQL_INTEGER);
2418 $sth->bind_param(2,$cmp_seq_region_id,SQL_INTEGER);
2419 $sth->bind_param(3,$asm_slice->start,SQL_INTEGER);
2420 $sth->bind_param(4,$asm_slice->end,SQL_INTEGER);
2421 $sth->bind_param(5,$cmp_slice->start,SQL_INTEGER);
2422 $sth->bind_param(6,$cmp_slice->end,SQL_INTEGER);
2423 $sth->bind_param(7,$ori,SQL_INTEGER);
2427 my $results = $sth->fetchrow_array();
2439 =head2 store_assembly
2443 Example : $asm = $slice_adaptor->store_assembly( $slice1, $slice2 );
2444 Description: Creates an entry in the assembly table based on the
2445 coordinates of the two slices supplied. Returns a
string
2446 representation of the assembly that gets created.
2448 Exceptions :
throw if either slice has no coord system (cs).
2449 throw unless the cs rank of the asm_slice is lower than the
2451 throw if there is no mapping path between coord systems
2452 throw if the lengths of each slice are not equal
2453 throw if there are existing mappings between either slice
2455 Caller : database loading scripts
2456 Status : Experimental
2462 my $asm_slice = shift;
2463 my $cmp_slice = shift;
2466 # Get all of the sanity checks out of the way before storing anything
2469 if(!ref($asm_slice) || !($asm_slice->isa(
'Bio::EnsEMBL::Slice') or $asm_slice->isa(
'Bio::EnsEMBL::LRGSlice'))) {
2470 throw(
'Assembled Slice argument is required');
2472 if(!ref($cmp_slice) || !($cmp_slice->isa(
'Bio::EnsEMBL::Slice') or $cmp_slice->isa(
'Bio::EnsEMBL::LRGSlice')) ) {
2473 throw(
'Assembled Slice argument is required');
2477 throw(
"Slice must have attached CoordSystem.")
if(!$asm_cs);
2478 my $cmp_cs = $cmp_slice->coord_system();
2479 throw(
"Slice must have attached CoordSystem.")
if(!$cmp_cs);
2482 @{ $asm_cs->
adaptor()->get_mapping_path( $asm_cs, $cmp_cs ) };
2485 throw(
"No mapping path defined between "
2486 . $asm_cs->name() .
" and "
2487 . $cmp_cs->name() );
2490 if( $asm_slice->length != $cmp_slice->length ){
2491 throw(
"The lengths of the assembled and component slices are not equal" );
2494 # For now we disallow any existing mappings between the asm slice and cmp
2495 # CoordSystem and vice-versa.
2496 # Some cases of multiple mappings may be allowable by the API, but their
2497 # logic needs to be coded below.
2499 my $asm_proj = $asm_slice->project( $cmp_cs->name, $cmp_cs->version );
2500 if( @$asm_proj && $cmp_cs->name ne
'lrg' && $asm_cs->name ne
'lrg'){
2501 throw(
"Regions of the assembled slice are already assembled ".
2502 "into the component CoordSystem" );
2504 my $cmp_proj = $cmp_slice->project( $asm_cs->name, $asm_cs->version );
2505 if( @$cmp_proj && $cmp_cs->name ne
'lrg' && $asm_cs->name ne
'lrg'){
2506 throw(
"Regions of the component slice are already assembled ".
2507 "into the assembled CoordSystem" );
2511 # Checks complete. Store the data
2514 (
"INSERT INTO assembly " .
2515 " ( asm_seq_region_id, " .
2516 " cmp_seq_region_id, " .
2522 "VALUES ( ?, ?, ?, ?, ?, ?, ? )");
2524 my $asm_seq_region_id = $self->get_seq_region_id( $asm_slice );
2525 my $cmp_seq_region_id = $self->get_seq_region_id( $cmp_slice );
2526 my $ori = $asm_slice->strand * $cmp_slice->strand;
2528 $sth->bind_param(1,$asm_seq_region_id,SQL_INTEGER);
2529 $sth->bind_param(2,$cmp_seq_region_id,SQL_INTEGER);
2530 $sth->bind_param(3,$asm_slice->start,SQL_INTEGER);
2531 $sth->bind_param(4,$asm_slice->end,SQL_INTEGER);
2532 $sth->bind_param(5,$cmp_slice->start,SQL_INTEGER);
2533 $sth->bind_param(6,$cmp_slice->end,SQL_INTEGER);
2534 $sth->bind_param(7,$ori,SQL_INTEGER);
2538 #use Data::Dumper qw( Dumper );
2539 #warn Dumper( $self->db->{seq_region_cache} );
2540 #$self->db->{seq_region_cache} = undef;
2541 #$self->_cache_seq_regions();
2543 my $ama = $self->db->get_AssemblyMapperAdaptor();
2544 $ama->delete_cache();
2547 return $asm_slice->name .
"<>" . $cmp_slice->name;
2554 Arg [1] : String $sql
2555 Example : ( optional )
2556 Description: overrides the
default adaptor prepare method.
2557 All slice sql will usually use the dna_db.
2558 Returntype : DBD::sth
2560 Caller :
internal, convenience method
2566 my ( $self, $sql ) = @_;
2567 return $self->db()->dnadb()->dbc->prepare($sql);
2570 sub _build_exception_cache {
2573 # build up a cache of the entire assembly exception table
2574 # it should be small anyway
2576 $self->prepare(
'SELECT ae.seq_region_id, ae.seq_region_start, '
2577 .
'ae.seq_region_end, ae.exc_type, ae.exc_seq_region_id, '
2578 .
'ae.exc_seq_region_start, ae.exc_seq_region_end '
2579 .
'FROM assembly_exception ae, '
2580 .
'seq_region sr, coord_system cs '
2581 .
'WHERE sr.seq_region_id = ae.seq_region_id '
2582 .
'AND sr.coord_system_id = cs.coord_system_id '
2583 .
'AND cs.species_id = ?' );
2585 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
2589 $self->{
'asm_exc_cache'} = \%hash;
2592 while ( $row = $sth->fetchrow_arrayref() ) {
2594 $hash{ $result[0] } ||= [];
2595 push( @{ $hash{ $result[0] } }, \@result );
2598 } ## end sub _build_exception_cache
2600 =head2 cache_toplevel_seq_mappings
2603 Example : $slice_adaptor->cache_toplevel_seq_mappings();
2604 Description: caches all the assembly mappings needed
for genes
2609 : New experimental code
2613 sub cache_toplevel_seq_mappings {
2616 # Get the sequence level to map too
2621 WHERE attrib like
"%sequence_level%"
2625 my $sth = $self->prepare($sql);
2626 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
2629 my $sequence_level = $sth->fetchrow_array();
2633 my $csa = $self->db->get_CoordSystemAdaptor();
2634 my $ama = $self->db->get_AssemblyMapperAdaptor();
2636 my $cs1 = $csa->fetch_by_name($sequence_level);
2638 #get level to map too.
2641 SELECT DISTINCT(cs.name)
2643 seq_region_attrib sra,
2646 WHERE sra.seq_region_id = sr.seq_region_id
2647 AND sra.attrib_type_id = at.attrib_type_id
2648 AND at.code = "toplevel"
2649 AND cs.coord_system_id = sr.coord_system_id
2650 AND cs.species_id = ?
2653 $sth = $self->prepare($sql);
2654 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
2657 while ( my $csn = $sth->fetchrow_array() ) {
2658 if ( $csn eq $sequence_level ) { next }
2659 my $cs2 = $csa->fetch_by_name($csn);
2660 my $am = $ama->fetch_by_CoordSystems( $cs1, $cs2 );
2661 $am->register_all();
2664 } ## end sub cache_toplevel_seq_mappings
2667 sub _build_circular_slice_cache {
2670 # build up a cache of circular sequence region ids
2672 $self->prepare(
"SELECT sra.seq_region_id FROM seq_region_attrib sra "
2673 .
"INNER JOIN attrib_type at ON sra.attrib_type_id = at.attrib_type_id "
2674 .
"INNER JOIN seq_region sr ON sra.seq_region_id = sr.seq_region_id "
2675 .
"INNER JOIN coord_system cs ON sr.coord_system_id = cs.coord_system_id "
2676 .
"WHERE code = 'circular_seq' and cs.species_id = ?");
2678 $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
2683 if ( ($id) = $sth->fetchrow_array() ) {
2684 $self->{
'circular_sr_id_cache'} = \%hash;
2685 $self->{
'is_circular'} = 1;
2687 while ( ($id) = $sth->fetchrow_array() ) {
2691 $self->{
'is_circular'} = 0;
2694 } ## end _build_circular_slice_cache
2696 =head2 _create_chromosome_alias
2699 Example : $self->create_chromosome_alias();
2700 Description: create chromosome alias in coordinate system with karyotype attributes
2708 sub _create_chromosome_alias {
2710 my $csa = $self->db->get_CoordSystemAdaptor();
2712 # to store reformatted db query results
2713 my $karyotype_seq_regions;
2715 my $karyotype_rank_string =
"karyotype_rank";
2717 # to store coord_system_id to alias
2721 # check whether seq region cache exists, otherwise run database query
2722 if ( $self->{
'karyotype_cache'} ) {
2723 $karyotype_seq_regions = $self->{
'karyotype_cache'};
2725 # get candidate coordSystem(s) to alias
2726 foreach my $seq_region_name ( keys %{$karyotype_seq_regions} ) {
2727 foreach my $coord_system_id ( keys %{ $karyotype_seq_regions->{ $seq_region_name } } ) {
2728 my $rank = $karyotype_seq_regions->{ $seq_region_name }->{ $coord_system_id }->{ rank };
2729 $cs_id_rank{ $coord_system_id } = $rank;
2733 # cannot find a coordssystem called chromosome
2734 # look through attribs to find seq_regions with karyotype
2737 $self->prepare(
"SELECT sr.seq_region_id, sr.name, sr.coord_system_id, sr.length, a.code, cs.rank "
2738 .
"FROM seq_region sr, seq_region_attrib sra, attrib_type a, coord_system cs "
2739 .
"WHERE sr.seq_region_id = sra.seq_region_id "
2740 .
"AND a.attrib_type_id = sra.attrib_type_id "
2741 .
"AND cs.coord_system_id = sr.coord_system_id "
2742 .
"AND a.code = ?" );
2744 $sth->bind_param( 1, $karyotype_rank_string );
2747 # fetch SQL results and identify coord_system_id(s) that
2748 # has(have) karyotype attribs
2749 while ( my $hashref = $sth->fetchrow_hashref() ) {
2750 my $seq_region_id = $hashref->{ seq_region_id };
2751 my $seq_region_name = $hashref->{ name };
2752 my $coord_system_id = $hashref->{ coord_system_id };
2753 my $length = $hashref->{ length };
2754 my $code = $hashref->{ code };
2755 my $rank = $hashref->{ rank };
2757 # account for possibility of multiple coord_system_ids returned from db query
2758 $karyotype_seq_regions->{ $seq_region_name }->{ $coord_system_id } = {
2761 seq_region_id => $seq_region_id,
2765 # hash to help decide which cs id to use, if multiple
2766 $cs_id_rank{ $coord_system_id } = $rank;
2771 my $cs_id_count = keys %cs_id_rank;
2772 # if number of coordSystem ids retrieved is one, simply use this coordSystem id
2773 # otherwise, choose the coordSystem with the best-rank (i.e. lowest number)
2774 if ( $cs_id_count == 1 ) {
2775 $cs_id = (keys %cs_id_rank)[0]; # get only key name in hash
2777 foreach my $id (sort { $cs_id_rank{$a} <=> $cs_id_rank{$b} } keys %cs_id_rank) {
2779 last; # exit loop after getting lowest-ranked coordSystem
id
2784 throw(
"No coordinate system to create a chromosome slice from (because there is no suitable coordinate system to create an alias to).\n");
2787 # use appropriate 'coord_system_id' to set 'alias_to' variable
2788 # first check that a chromosome object does not already exist
2789 my $cs = $csa->fetch_by_dbID( $cs_id );
2791 throw(
"Unable to retrieve CoordSystem object using dbID: $cs_id");
2793 if ( $cs->name eq
"chromosome" ) {
2794 throw(
"A chromosome CoordSystem object already exists. Cannot create chromosome alias.");
2798 # create karyotype cache in retrieved coordsystem
2799 $cs->{
'karyotype_cache'} = $karyotype_seq_regions;
2803 } ## end create_chromosome_alias
2806 =head2 _fetch_by_seq_region_synonym
2807 Args : $cs, $seq_region_name, $start, $end, $strand, $version, $no_fuzz
2808 Example : $self->fetch_by_seq_region_synonym( $cs, $seq_region_name, $start, $end, $strand, $version, $no_fuzz );
2809 Description: fetches all the seq region synonyms (or uses wildcard) when requested
2810 Returntype : Bio::EnsEMBL::SliceAdaptor, or none
2813 Status : (refactored from fetch_by_region)
2816 sub _fetch_by_seq_region_synonym {
2818 my ( $self, $cs, $seq_region_name, $start, $end, $strand, $version, $no_fuzz ) = @_;
2821 assert_integer($start,
'start') if defined $start;
2822 assert_integer($end, 'end') if defined $end;
2824 if ( !defined($start) ) { $start = 1 }
2825 if ( !defined($strand) ) { $strand = 1 }
2827 if ( !defined($seq_region_name) ) {
2828 throw(
'seq_region_name argument is required');
2831 my $coord_system_name;
2833 my $syn_sql =
"select s.name, cs.name, cs.version from seq_region s join seq_region_synonym ss using (seq_region_id) join coord_system cs using (coord_system_id) where ss.synonym like ? and cs.species_id =? ";
2835 $coord_system_name = $cs->
name;
2836 $syn_sql .=
"AND cs.name = '" . $coord_system_name .
"' ";
2838 if (defined $version) {
2839 $syn_sql .=
"AND cs.version = '" . $version .
"' ";
2841 $syn_sql .=
"ORDER BY cs.rank ASC";
2842 my $syn_sql_sth = $self->prepare($syn_sql);
2843 $syn_sql_sth->bind_param(1, $seq_region_name, SQL_VARCHAR);
2844 $syn_sql_sth->bind_param(2, $self->species_id(), SQL_INTEGER);
2845 $syn_sql_sth->execute();
2846 my ($new_name, $new_coord_system, $new_version);
2847 $syn_sql_sth->bind_columns( \$new_name, \$new_coord_system, \$new_version);
2849 # we want to see if there was an exact match for the seq_region_synonym.
2850 # The overall logic is:
2851 # Case 1 - A synonym exactly matches the query, and either:
2852 # a - the coord_system matches
2854 # b - the user is looking for toplevel
2855 # In this case, we now have the real seq_region name, so we can re-run
2856 # fetch_by_region using that name, coord_system, and version
2857 # Case 2 - A synonym exactly matches, but none of the coord_systems match
2858 # and the user is not looking for toplevel.
2859 # In this case, we return nothing, and move on to try fuzzy-matching
2861 # Case 3 - No synonyms match, so repeat the query using wildcards
2862 my $matched_synonym = 0;
2863 while ($syn_sql_sth->fetch){
2864 $matched_synonym = 1;
2865 if ((not defined($cs)) || ($cs->name eq $new_coord_system && $cs->version eq $new_version)) {
2866 return $self->fetch_by_region($new_coord_system, $new_name, $start, $end, $strand, $new_version, $no_fuzz);
2870 if ($matched_synonym == 0) {
2871 # Try wildcard searching if no exact synonym was found
2872 $syn_sql_sth = $self->prepare($syn_sql);
2873 my $escaped_seq_region_name = $seq_region_name;
2874 my $escape_char = $self->dbc->db_handle->get_info(14);
2875 $escaped_seq_region_name =~ s/([_%])/$escape_char$1/g;
2876 $syn_sql_sth->bind_param(1,
"$escaped_seq_region_name%", SQL_VARCHAR);
2877 $syn_sql_sth->bind_param(2, $self->species_id(), SQL_INTEGER);
2878 $syn_sql_sth->execute();
2879 $syn_sql_sth->bind_columns( \$new_name, \$new_coord_system, \$new_version);
2881 my @matched_syn_wrong_cs;
2882 while ($syn_sql_sth->fetch){
2883 if ((not defined($cs)) || ($cs->name eq $new_coord_system && $cs->version eq $new_version)) {
2884 return $self->fetch_by_region($new_coord_system, $new_name, $start, $end, $strand, $new_version, $no_fuzz);
2886 push(@matched_syn_wrong_cs, [$new_coord_system, $new_version]);
2890 if (scalar(@matched_syn_wrong_cs) > 0) {
2891 my $cs_warning_text =
"Searched for a known feature on coordniate system: " .
2893 " but found it on " .
2894 join(
", ",
map {
if ($_->[1]) {
2895 $_->[0] .
":" . $_->[1]
2899 } @matched_syn_wrong_cs) .
2900 "\n No result returned, consider searching without coordinate system or use toplevel.";
2901 warning($cs_warning_text);
2905 $syn_sql_sth->finish;
2910 =head2 _fetch_by_fuzzy_matching
2911 Args : $cs, $seq_region_name, $sql, $constraint, $bind_params
2912 Example : my $fuzzy_matched_name = $self->fetch_by_fuzzy_matching( $cs, $seq_region_name, $sql, $constraint, $bind_params );
2913 Description: fetches all the fuzzy matches
for a given seq_region_name when requested
2917 Status : (refactored from fetch_by_region)
2920 sub _fetch_by_fuzzy_matching {
2922 my ($self, $cs, $seq_region_name, $sql, $constraint, $bind_params) = @_;
2924 my $csa = $self->db->get_CoordSystemAdaptor();
2927 # Do fuzzy matching, assuming that we are just missing a version
2928 # on the end of the seq_region name.
2931 $self->prepare( $sql .
" WHERE sr.name LIKE ? " . $constraint );
2933 @$bind_params[0] = [ sprintf(
'%s.%%', $seq_region_name ), SQL_VARCHAR ];
2936 foreach my $param (@$bind_params) {
2937 $sth->bind_param( ++$pos, $param->[0], $param->[1] );
2942 my $prefix_len = length($seq_region_name) + 1;
2943 my $high_ver = undef;
2946 # Find the fuzzy-matched seq_region with the highest postfix
2947 # (which ought to be a version).
2949 my ( $tmp_name, $id, $tmp_length, $cs_id );
2950 $sth->bind_columns( \( $tmp_name, $id, $tmp_length, $cs_id ) );
2954 while ( $sth->fetch ) {
2956 ( defined($cs) ? $cs : $csa->fetch_by_dbID($cs_id) );
2958 # cache values for future reference
2959 my $arr = [ $id, $tmp_name, $cs_id, $tmp_length ];
2960 $self->{
'sr_name_cache'}->{
"$tmp_name:$cs_id"} = $arr;
2961 $self->{
'sr_id_cache'}->{
"$id"} = $arr;
2963 my $tmp_ver = substr( $tmp_name, $prefix_len );
2965 # skip versions which are non-numeric and apparently not
2967 if ( $tmp_ver !~ /^\d+$/ ) { next }
2969 # take version with highest num, if two versions match take one
2970 # with highest ranked coord system (lowest num)
2971 if ( !defined($high_ver)
2972 || $tmp_ver > $high_ver
2973 || ( $tmp_ver == $high_ver && $tmp_cs->rank < $high_cs->rank )
2976 $seq_region_name = $tmp_name;
2977 $length = $tmp_length;
2978 $high_ver = $tmp_ver;
2983 } ## end
while ( $sth->fetch )
2986 # warn if fuzzy matching found more than one result
2990 "Fuzzy matching of seq_region_name "
2991 .
"returned more than one result.\n"
2992 .
"You might want to check whether the returned seq_region\n"
2993 .
"(%s:%s) is the one you intended to fetch.\n",
2994 $high_cs->name(), $seq_region_name ) );
2999 # return if we did not find any appropriate match:
3000 if ( !defined($high_ver) ) {
return; }
3002 return ($seq_region_name, $cs);