3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
34 Performs database interaction related to PredictionTranscripts
38 # get a prediction transcript adaptor from the database
39 $pta = $database_adaptor->get_PredictionTranscriptAdaptor();
41 # get a slice on a region of chromosome 1
42 $sa = $database_adaptor->get_SliceAdaptor();
44 $slice = $sa->fetch_by_region(
'chromosome',
'x', 100000, 200000 );
46 # get all the prediction transcripts from the slice region
53 package Bio::EnsEMBL::DBSQL::PredictionTranscriptAdaptor;
70 # Description: Implements abstract superclass method to define the table used
71 # to retrieve prediction transcripts from the database
74 # Caller : generic_fetch
79 return [
'prediction_transcript',
'pt'];
87 # Description: Implements abstract superclass method to define the columns
88 # retrieved in database queries used to create prediction
90 # Returntype : list of strings
92 # Caller : generic_fetch
98 return qw( pt.prediction_transcript_id
108 =head2 fetch_by_stable_id
110 Arg [1] :
string $stable_id
112 Example : $trans = $trans_adptr->fetch_by_stable_id(
'GENSCAN00000001234');
113 Description: Retrieves a prediction
transcript via its display_label.
114 This method is called fetch_by_stable_id
for polymorphism with
115 the TranscriptAdaptor. Prediction
transcript display_labels are
116 not necessarily stable in that the same identifier may be reused
117 for a completely different prediction
transcript in a subsequent
125 sub fetch_by_stable_id {
127 my $stable_id = shift;
129 throw(
'Stable_id argument expected')
if(!$stable_id);
131 my $syn = $self->_tables()->[1];
133 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
134 my $pts = $self->generic_fetch(
"$syn.display_label = ?");
136 return (@$pts) ? $pts->[0] : undef;
141 =head2 fetch_all_by_Slice
144 The slice to fetch transcripts on.
145 Arg [3] : (optional)
boolean $load_exons
146 if true, exons will be loaded immediately rather than
148 Example : $transcripts = $
149 Description: Overrides superclass method to optionally load exons
150 immediately rather than lazy-loading them later. This
151 is more efficient when there are a lot of transcripts whose
152 exons are going to be used.
153 Returntype : reference to list of transcripts
155 Caller : Slice::get_all_Transcripts
160 sub fetch_all_by_Slice {
163 my $logic_name = shift;
164 my $load_exons = shift;
166 my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name);
168 # if there are 0 transcripts still do lazy-loading
169 if(!$load_exons || @$transcripts < 1) {
173 # preload all of the exons now, instead of lazy loading later
174 # faster than 1 query per transcript
176 # get extent of region spanned by transcripts
177 my ($min_start, $max_end);
180 unless ($slice->is_circular()) {
181 foreach my $t (@$transcripts) {
182 if (!defined($min_start) || $t->seq_region_start() < $min_start) {
183 $min_start = $t->seq_region_start();
185 if (!defined($max_end) || $t->seq_region_end() > $max_end) {
186 $max_end = $t->seq_region_end();
190 if ($min_start >= $slice->start() && $max_end <= $slice->end()) {
193 my $sa = $self->db()->get_SliceAdaptor();
194 $ext_slice = $sa->fetch_by_region($slice->coord_system->name(), $slice->seq_region_name(), $min_start, $max_end, $slice->strand(), $slice->coord_system->version());
198 # feature might be crossing the origin of replication (i.e. seq_region_start > seq_region_end)
199 # the computation of min_start|end based on seq_region_start|end is not safe
200 # use feature start/end relative to the slice instead
201 my ($min_start_feature, $max_end_feature);
202 foreach my $t (@$transcripts) {
203 if (!defined($min_start) || ($t->start() >= 0 && $t->start() < $min_start)) {
204 $min_start = $t->start();
205 $min_start_feature = $t;
207 if (!defined($max_end) || ($t->end() >= 0 && $t->end() > $max_end)) {
208 $max_end = $t->end();
209 $max_end_feature = $t;
213 # now we can reassign min_start|end to seq_region_start|end of
214 # the feature which spans the largest region
215 $min_start = $min_start_feature->seq_region_start();
216 $max_end = $max_end_feature->seq_region_end();
218 my $sa = $self->db()->get_SliceAdaptor();
220 $sa->fetch_by_region($slice->coord_system->name(),
221 $slice->seq_region_name(),
225 $slice->coord_system->version());
229 # associate exon identifiers with transcripts
231 my %tr_hash =
map {$_->dbID => $_} @$transcripts;
233 my $tr_id_str =
'(' . join(
',', keys %tr_hash) .
')';
235 my $sth = $self->prepare
236 (
"SELECT prediction_transcript_id, prediction_exon_id, exon_rank " .
237 "FROM prediction_exon " .
238 "WHERE prediction_transcript_id IN $tr_id_str");
242 my ($ex_id, $tr_id, $rank);
243 $sth->bind_columns(\$tr_id, \$ex_id, \$rank);
247 while($sth->fetch()) {
248 $ex_tr_hash{$ex_id} ||= [];
249 push @{$ex_tr_hash{$ex_id}}, [$tr_hash{$tr_id}, $rank];
254 my $ea = $self->db()->get_PredictionExonAdaptor();
255 my $exons = $ea->fetch_all_by_Slice($ext_slice);
257 # move exons onto transcript slice, and add them to transcripts
258 foreach my $ex (@$exons) {
259 $ex = $ex->transfer($slice)
if($slice != $ext_slice);
262 throw(
"Unexpected. PredictionExon could not be transfered onto " .
263 "PredictionTranscript slice.");
266 foreach my $row (@{$ex_tr_hash{$ex->dbID()}}) {
267 my ($tr, $rank) = @$row;
268 $tr->add_Exon($ex, $rank);
276 =head2 fetch_by_prediction_exon_id
278 Arg [1] : Int $prediction_exon_id
279 Unique database identifier
for the prediction
exon
280 whose prediction
transcript should be retrieved.
281 Example : $prediction_transcript = $prediction_transcript_adaptor->fetch_by_exon_id(1241);
282 Description: Retrieves a prediction
transcript from the database via the database identifier
291 sub fetch_by_prediction_exon_id {
292 my ($self, $prediction_exon_id) = @_;
294 # this is a cheap SQL call
295 my $sth = $self->prepare(
297 SELECT pe.prediction_transcript_id
298 FROM prediction_exon pe
299 WHERE pe.prediction_exon_id = ?
302 $sth->bind_param(1, $prediction_exon_id, SQL_INTEGER);
305 my ($prediction_transcript_id) = $sth->fetchrow_array();
309 return undef
if (!defined $prediction_transcript_id);
311 my $prediction_transcript = $self->fetch_by_dbID($prediction_transcript_id);
312 return $prediction_transcript;
319 =head2 _objs_from_sth
321 Arg [1] : DBI:st $sth
322 An executed DBI statement handle
324 An mapper to be used to convert contig coordinates
325 to assembly coordinates.
328 Example : $p_transcripts = $self->_objs_from_sth($sth);
329 Description: Creates a list of Prediction transcripts from an executed DBI
330 statement handle. The columns retrieved via the statement
331 handle must be in the same order as the columns defined by the
332 _columns method. If the slice argument is provided then the
333 the prediction transcripts will be in returned in the coordinate
334 system of the $slice argument. Otherwise the prediction
335 transcripts will be returned in the RawContig coordinate system.
336 Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts
338 Caller : superclass generic_fetch
344 my ($self, $sth, $mapper, $dest_slice) = @_;
347 # This code is ugly because an attempt has been made to remove as many
348 # function calls as possible for speed purposes. Thus many caches and
349 # a fair bit of gymnastics is used.
352 my $sa = $self->db()->get_SliceAdaptor();
353 my $aa = $self->db()->get_AnalysisAdaptor();
362 $prediction_transcript_id, $seq_region_id, $seq_region_start,
363 $seq_region_end, $seq_region_strand, $analysis_id,
366 $sth->bind_columns(\(
367 $prediction_transcript_id, $seq_region_id, $seq_region_start,
368 $seq_region_end, $seq_region_strand, $analysis_id,
371 my $dest_slice_start;
373 my $dest_slice_strand;
374 my $dest_slice_length;
376 my $dest_slice_sr_name;
377 my $dest_slice_sr_id;
381 $dest_slice_start = $dest_slice->start();
382 $dest_slice_end = $dest_slice->end();
383 $dest_slice_strand = $dest_slice->strand();
384 $dest_slice_length = $dest_slice->length();
385 $dest_slice_cs = $dest_slice->coord_system();
386 $dest_slice_sr_name = $dest_slice->seq_region_name();
387 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
388 $asma = $self->db->get_AssemblyMapperAdaptor();
391 FEATURE:
while($sth->fetch()) {
393 #get the analysis object
394 my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
395 $analysis_hash{$analysis_id} = $analysis;
397 #need to get the internal_seq_region, if present
398 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
399 my $slice = $slice_hash{
"ID:".$seq_region_id};
402 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
403 $slice_hash{
"ID:".$seq_region_id} = $slice;
404 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
405 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
408 #obtain a mapper if none was defined, but a dest_seq_region was
409 if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
410 $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
413 my $sr_name = $sr_name_hash{$seq_region_id};
414 my $sr_cs = $sr_cs_hash{$seq_region_id};
417 # remap the feature coordinates to another coord system
418 # if a mapper was provided
423 if (defined $dest_slice && $mapper->isa(
'Bio::EnsEMBL::ChainedAssemblyMapper') ) {
424 ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
425 $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
428 ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
429 $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs);
432 #skip features that map to gaps or coord system boundaries
433 next FEATURE
if (!defined($seq_region_id));
435 #get a slice in the coord system we just mapped to
436 $slice = $slice_hash{
"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
440 # If a destination slice was provided convert the coords.
442 if (defined($dest_slice)) {
443 my $seq_region_len = $dest_slice->seq_region_length();
445 if ( $dest_slice_strand == 1 ) {
446 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
447 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
449 if ( $dest_slice->is_circular ) {
450 # Handle circular chromosomes.
452 if ( $seq_region_start > $seq_region_end ) {
453 # Looking at a feature overlapping the chromosome origin.
455 if ( $seq_region_end > $dest_slice_start ) {
456 # Looking at the region in the beginning of the chromosome
457 $seq_region_start -= $seq_region_len;
459 if ( $seq_region_end < 0 ) {
460 $seq_region_end += $seq_region_len;
463 if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
464 # Looking at the region overlapping the chromosome
465 # origin and a feature which is at the beginning of the
467 $seq_region_start += $seq_region_len;
468 $seq_region_end += $seq_region_len;
474 my $start = $dest_slice_end - $seq_region_end + 1;
475 my $end = $dest_slice_end - $seq_region_start + 1;
477 if ($dest_slice->is_circular()) {
479 if ($dest_slice_start > $dest_slice_end) {
480 # slice spans origin or replication
482 if ($seq_region_start >= $dest_slice_start) {
483 $end += $seq_region_len;
484 $start += $seq_region_len
if $seq_region_end > $dest_slice_start;
486 } elsif ($seq_region_start <= $dest_slice_end) {
488 } elsif ($seq_region_end >= $dest_slice_start) {
489 $start += $seq_region_len;
490 $end += $seq_region_len;
492 } elsif ($seq_region_end <= $dest_slice_end) {
493 $end += $seq_region_len
if $end < 0;
495 } elsif ($seq_region_start > $seq_region_end) {
496 $end += $seq_region_len;
501 if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
503 } elsif ($seq_region_start > $seq_region_end) {
504 if ($seq_region_start <= $dest_slice_end) {
505 $start -= $seq_region_len;
506 } elsif ($seq_region_end >= $dest_slice_start) {
507 $end += $seq_region_len;
513 $seq_region_start = $start;
514 $seq_region_end = $end;
515 $seq_region_strand *= -1;
517 } ## end
else [
if ( $dest_slice_strand...)]
519 # Throw away features off the end of the requested slice or on
520 # different seq_region.
521 if ($seq_region_end < 1
522 || $seq_region_start > $dest_slice_length
523 || ($dest_slice_sr_id != $seq_region_id)) {
526 $slice = $dest_slice;
529 # Finally, create the new PredictionTranscript.
531 $self->_create_feature(
'Bio::EnsEMBL::PredictionTranscript', {
532 '-start' => $seq_region_start,
533 '-end' => $seq_region_end,
534 '-strand' => $seq_region_strand,
537 '-analysis' => $analysis,
538 '-dbID' => $prediction_transcript_id,
539 '-display_label' => $display_label
544 return \@ptranscripts;
552 Example : $prediction_transcript_adaptor->store(@pre_transcripts);
553 Description: Stores a list of given prediction transcripts in database.
554 Puts dbID and Adaptor into each
object stored
object.
556 Exceptions : on wrong argument type
563 my ( $self, @pre_transcripts ) = @_;
565 my $ptstore_sth = $self->prepare
566 (qq{INSERT INTO prediction_transcript (seq_region_id, seq_region_start,
567 seq_region_end, seq_region_strand,
568 analysis_id, display_label)
569 VALUES( ?, ?, ?, ?, ?, ?)});
571 my $ptupdate_sth = $self->prepare
572 (qq{UPDATE prediction_transcript SET display_label = ?
573 WHERE prediction_transcript_id = ?});
575 my $db = $self->db();
576 my $analysis_adaptor = $db->get_AnalysisAdaptor();
577 my $pexon_adaptor = $db->get_PredictionExonAdaptor();
579 FEATURE:
foreach my $pt (@pre_transcripts) {
580 if(!ref($pt) || !$pt->isa(
'Bio::EnsEMBL::PredictionTranscript')) {
581 throw(
'Expected PredictionTranscript argument not [' . ref($pt).
']');
584 #skip prediction transcripts that have already been stored
585 if($pt->is_stored($db)) {
586 warning(
'Not storing already stored prediction transcript '. $pt->dbID);
590 #get analysis and store it if it is not in the db
591 my $analysis = $pt->analysis();
593 throw(
'Prediction transcript must have analysis to be stored.');
595 if(!$analysis->is_stored($db)) {
596 $analysis_adaptor->store($analysis);
599 #ensure that the transcript coordinates are correct, they may not be,
600 #if somebody has done some exon coordinate juggling and not recalculated
601 #the transcript coords.
602 $pt->recalculate_coordinates();
606 ($pt, $seq_region_id) = $self->_pre_store($pt);
608 #store the prediction transcript
609 $ptstore_sth->bind_param(1,$seq_region_id,SQL_INTEGER);
610 $ptstore_sth->bind_param(2,$pt->start,SQL_INTEGER);
611 $ptstore_sth->bind_param(3,$pt->end,SQL_INTEGER);
612 $ptstore_sth->bind_param(4,$pt->strand,SQL_TINYINT);
613 $ptstore_sth->bind_param(5,$analysis->dbID,SQL_INTEGER);
614 $ptstore_sth->bind_param(6,$pt->display_label,SQL_VARCHAR);
616 $ptstore_sth->execute();
618 my $pt_id = $self->last_insert_id(
'prediction_transcript_id', undef,
'prediction_transcript');
619 $original->dbID($pt_id);
620 $original->adaptor($self);
624 foreach my $pexon (@{$original->get_all_Exons}) {
625 $pexon_adaptor->store($pexon, $pt_id, $rank++);
628 # if a display label was not defined autogenerate one
629 if(!defined($pt->display_label())) {
630 my $zeros =
'0' x (11 - length($pt_id));
631 my $display_label = uc($analysis->logic_name()) . $zeros . $pt_id;
632 $ptupdate_sth->bind_param(1,$display_label,SQL_VARCHAR);
633 $ptupdate_sth->bind_param(2,$pt_id,SQL_INTEGER);
634 $ptupdate_sth->execute();
635 $original->display_label($display_label);
645 Example : $prediction_transcript_adaptor->remove($pt);
646 Description: removes given prediction
transcript $pt from database.
656 my $pre_trans = shift;
658 if(!ref($pre_trans)||!$pre_trans->isa(
'Bio::EnsEMBL::PredictionTranscript')){
659 throw(
'Expected PredictionTranscript argument.');
662 if(!$pre_trans->is_stored($self->db())) {
663 warning(
'PredictionTranscript is not stored in this DB - not removing.');
667 #remove all associated prediction exons
668 my $pexon_adaptor = $self->db()->get_PredictionExonAdaptor();
669 foreach my $pexon (@{$pre_trans->get_all_Exons}) {
670 $pexon_adaptor->remove($pexon);
673 #remove the prediction transcript
674 my $sth = $self->prepare(
"DELETE FROM prediction_transcript
675 WHERE prediction_transcript_id = ?" );
676 $sth->bind_param(1,$pre_trans->dbID,SQL_INTEGER);
679 #unset the adaptor and internal id
680 $pre_trans->dbID(undef);
681 $pre_trans->adaptor(undef);
688 Example : @feature_ids = @{$prediction_transcript_adaptor->list_dbIDs()};
689 Description: Gets an array of
internal ids
for all prediction
transcript
690 features in the current db
691 Arg[1] : <optional>
int. not 0
for the ids to be sorted by the seq_region.
692 Returntype : list of ints
700 my ($self, $ordered) = @_;
702 return $self->_list_dbIDs(
"prediction_transcript", undef, $ordered);