3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
34 interaction relating to the storage and retrieval of Transcripts
41 -host =>
'ensembldb.ensembl.org',
49 $transcript = $transcript_adaptor->fetch_by_dbID(1234);
52 $transcript_adaptor->fetch_by_stable_id(
'ENST00000201961');
55 $slice_adaptor->fetch_by_region(
'Chromosome',
'3', 1, 1000000 );
56 @transcripts = @{ $transcript_adaptor->fetch_all_by_Slice($slice) };
59 @{ $transcript_adaptor->fetch_all_by_external_name(
'NP_065811.1') };
63 This adaptor provides a means to retrieve and store information related
64 to Transcripts. Primarily
this involves the retrieval or storage of
71 package Bio::EnsEMBL::DBSQL::TranscriptAdaptor;
89 # Description: PROTECTED implementation of superclass abstract method.
90 # Returns the names, aliases of the tables to use for queries.
91 # Returntype : list of listrefs of strings
98 [
'transcript',
't' ],
100 [
'external_db',
'exdb' ] );
106 # Description: PROTECTED implementation of superclass abstract method.
107 # Returns a list of columns to use for queries.
108 # Returntype : list of strings
117 $self->db()->dbc()->from_date_to_seconds(
"created_date");
119 $self->db()->dbc()->from_date_to_seconds(
"modified_date");
123 't.transcript_id',
't.seq_region_id',
124 't.seq_region_start',
't.seq_region_end',
125 't.seq_region_strand',
't.analysis_id',
126 't.gene_id',
't.is_current',
127 't.stable_id',
't.version',
128 $created_date, $modified_date,
129 't.description',
't.biotype',
131 'exdb.status',
'exdb.db_display_name',
132 'x.xref_id',
'x.display_label',
133 'x.dbprimary_acc',
'x.version',
134 'x.description',
'x.info_type',
135 'x.info_text',
'exdb.db_release'
138 $self->schema_version > 74 and push @columns,
't.source';
145 [
'xref',
"x.xref_id = t.display_xref_id" ],
146 [
'external_db',
"exdb.external_db_id = x.external_db_id" ]
151 =head2 fetch_by_stable_id
153 Arg [1] : String $stable_id
155 Example : my $tr = $tr_adaptor->fetch_by_stable_id(
'ENST00000309301');
156 Description: Retrieves a
transcript via its stable
id.
164 sub fetch_by_stable_id {
165 my ($self, $stable_id) = @_;
167 my $constraint =
"t.stable_id = ? AND t.is_current = 1";
169 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
171 my ($transcript) = @{ $self->generic_fetch($constraint) };
173 # If we didn't get anything back, desperately try to see if there's
174 # a version number in the stable_id
175 if(!defined($transcript) && (my $vindex = rindex($stable_id,
'.'))) {
176 $transcript = $self->fetch_by_stable_id_version(substr($stable_id,0,$vindex),
177 substr($stable_id,$vindex+1));
184 =head2 fetch_by_stable_id_version
188 Arg [2] : Integer $version
189 The version of the stable_id to retrieve
190 Example : $tr = $tr_adaptor->fetch_by_stable_id(
'ENST00000309301', 3);
191 Description: Retrieves a
transcript object from the database via its
192 stable
id and version.
193 The
transcript will be retrieved in its native coordinate system (i.e.
194 in the coordinate system it is stored in the database). It may
195 be converted to a different coordinate system through a call to
196 transform() or transfer(). If the
transcript is not found
197 undef is returned instead.
198 Returntype :
Bio::
EnsEMBL::Transcript or undef
199 Exceptions : if we cant get the
transcript in given coord system
205 sub fetch_by_stable_id_version {
206 my ($self, $stable_id, $version) = @_;
208 # Enforce that version be numeric
209 return unless($version =~ /^\d+$/);
211 my $constraint =
"t.stable_id = ? AND t.version = ? AND t.is_current = 1";
212 $self->bind_param_generic_fetch($stable_id, SQL_VARCHAR);
213 $self->bind_param_generic_fetch($version, SQL_INTEGER);
214 my ($transcript) = @{$self->generic_fetch($constraint)};
222 my $constraint =
't.biotype != "LRG_gene" and t.is_current = 1';
223 my @trans = @{ $self->generic_fetch($constraint) };
227 =head2 fetch_all_versions_by_stable_id
229 Arg [1] : String $stable_id
231 Example : my $tr = $tr_adaptor->fetch_all_version_by_stable_id
233 Description : Similar to fetch_by_stable_id, but retrieves all versions of a
236 Exceptions :
if we cant get the gene in given coord system
242 sub fetch_all_versions_by_stable_id {
243 my ($self, $stable_id) = @_;
245 my $constraint =
"t.stable_id = ?";
247 $self->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
249 return $self->generic_fetch($constraint);
253 =head2 fetch_by_rnaproduct_id
256 The
internal identifier of the RNAProduct whose
transcript
258 Example : my $tr = $tr_adaptor->fetch_by_rnaproduct_id($rnap->dbID);
259 Description: Given the
internal identifier of a RNAProduct
this method
260 retrieves the
transcript associated with that RNAProduct.
261 If the
transcript cannot be found undef is returned instead.
269 sub fetch_by_rnaproduct_id {
270 my ($self, $p_dbID) = @_;
272 throw(
"dbID argument is required") unless defined($p_dbID);
274 my $sth = $self->prepare(
275 "SELECT transcript_id FROM rnaproduct WHERE rnaproduct_id = ?"
277 $sth->bind_param(1, $p_dbID, SQL_INTEGER);
280 my ($dbID) = $sth->fetchrow_array();
284 return $self->fetch_by_dbID($dbID);
291 =head2 fetch_by_translation_stable_id
293 Arg [1] : String $transl_stable_id
294 The stable identifier of the translation of the
transcript to
296 Example : my $tr = $tr_adaptor->fetch_by_translation_stable_id
298 Description: Retrieves a Transcript
object using the stable identifier of
307 sub fetch_by_translation_stable_id {
308 my ($self, $transl_stable_id ) = @_;
310 my $sth = $self->prepare(qq(
311 SELECT t.transcript_id
314 WHERE tl.stable_id = ?
315 AND tl.transcript_id = t.transcript_id
319 $sth->bind_param(1, $transl_stable_id, SQL_VARCHAR);
322 my ($id) = $sth->fetchrow_array;
325 return $self->fetch_by_dbID($id);
326 } elsif(my $vindex = rindex($transl_stable_id,
'.')) {
327 return $self->fetch_by_translation_stable_id_version(substr($transl_stable_id,0,$vindex),
328 substr($transl_stable_id,$vindex+1));
334 =head2 fetch_by_translation_stable_id_version
336 Arg [1] : String $transl_stable_id
337 The stable identifier of the translation of the
transcript to
339 Arg [2] : Integer $version
340 The version of the translation of the
transcript to retrieve
341 Example : my $tr = $tr_adaptor->fetch_by_translation_stable_id_version
342 (
'ENSP00000311007', 2);
343 Description: Retrieves a Transcript
object using the stable identifier and
344 version of its translation.
352 sub fetch_by_translation_stable_id_version {
353 my ($self, $transl_stable_id, $transl_version ) = @_;
355 # Enforce that version be numeric
356 return unless($transl_version =~ /^\d+$/);
358 my $sth = $self->prepare(qq(
359 SELECT t.transcript_id
362 WHERE tl.stable_id = ?
364 AND tl.transcript_id = t.transcript_id
368 $sth->bind_param(1, $transl_stable_id, SQL_VARCHAR);
369 $sth->bind_param(2, $transl_version, SQL_INTEGER);
372 my ($id) = $sth->fetchrow_array;
375 return $self->fetch_by_dbID($id);
381 =head2 fetch_by_translation_id
384 The
internal identifier of the translation whose
transcript
386 Example : my $tr = $tr_adaptor->fetch_by_translation_id($transl->dbID);
387 Description: Given the
internal identifier of a translation
this method
388 retrieves the
transcript associated with that translation.
389 If the
transcript cannot be found undef is returned instead.
397 sub fetch_by_translation_id {
398 my ( $self, $p_dbID ) = @_;
400 if ( !defined($p_dbID) ) {
401 throw(
"dbID argument is required");
405 $self->prepare(
"SELECT transcript_id "
406 .
"FROM translation "
407 .
"WHERE translation_id = ?" );
409 $sth->bind_param( 1, $p_dbID, SQL_INTEGER );
412 my ($dbID) = $sth->fetchrow_array();
416 return $self->fetch_by_dbID($dbID);
422 =head2 fetch_all_by_Gene
425 The gene to fetch transcripts of
426 Example : my $gene = $gene_adaptor->fetch_by_stable_id(
'ENSG0000123');
427 my @transcripts = { $tr_adaptor->fetch_all_by_Gene($gene) };
428 Description: Retrieves Transcript objects
for given gene. Puts Genes slice
432 Caller : Gene->get_all_Transcripts()
437 sub fetch_all_by_Gene {
438 my ( $self, $gene ) = @_;
440 my $constraint =
"t.gene_id = " . $gene->
dbID();
442 # Use the fetch_all_by_Slice_constraint method because it handles the
443 # difficult Haps/PARs and coordinate remapping.
445 # Get a slice that entirely overlaps the gene. This is because we
446 # want all transcripts to be retrieved, not just ones overlapping
447 # the slice the gene is on (the gene may only partially overlap the
448 # slice). For speed reasons, only use a different slice if necessary
451 my $gslice = $gene->slice();
453 if ( !defined($gslice) ) {
454 throw(
"Gene must have attached slice to retrieve transcripts.");
459 if ( $gene->start() < 1 || $gene->end() > $gslice->length() ) {
460 if ( $gslice->is_circular() ) {
463 $slice = $self->db->get_SliceAdaptor->fetch_by_Feature($gene);
470 $self->fetch_all_by_Slice_constraint( $slice, $constraint );
472 if ( $slice != $gslice ) {
474 foreach my $tr ( @{$transcripts} ) {
475 push( @out, $tr->transfer($gslice) );
477 $transcripts = \@out;
480 my $canonical_t = $gene->canonical_transcript();
482 foreach my $t ( @{$transcripts} ) {
483 if ( $t->equals($canonical_t) ) {
490 } ## end sub fetch_all_by_Gene
493 =head2 fetch_all_by_Slice
496 The slice to fetch transcripts on
497 Arg [2] : (optional) Boolean $load_exons
498 If
true, exons will be loaded immediately rather than
500 Arg [3] : (optional) String $logic_name
501 The logic name of the type of features to obtain
502 ARG [4] : (optional) String $constraint
504 Example : my @transcripts = @{ $tr_adaptor->fetch_all_by_Slice($slice) };
505 Description: Overrides superclass method to optionally load exons
506 immediately rather than lazy-loading them later. This
507 is more efficient when there are a lot of transcripts whose
508 exons are going to be used.
511 Caller : Slice::get_all_Transcripts
516 sub fetch_all_by_Slice {
517 my ( $self, $slice, $load_exons, $logic_name, $constraint, $source, $biotype ) = @_;
519 if (defined $constraint and $constraint ne
'') {
520 $constraint .=
' AND t.is_current = 1';
522 $constraint .=
't.is_current = 1';
524 if (defined($source)) {
525 $constraint .=
" and t.source = '$source'";
527 if (defined($biotype)) {
528 my $inline_variables = 1;
529 $constraint .=
" and ".$self->generate_in_constraint($biotype,
't.biotype', SQL_VARCHAR, $inline_variables);
532 my $transcripts = $self->SUPER::fetch_all_by_Slice_constraint( $slice, $constraint, $logic_name);
534 # if there are 0 transcripts still do lazy-loading
535 if ( !$load_exons || @$transcripts < 1 ) {
539 # preload all of the exons now, instead of lazy loading later
540 # faster than 1 query per transcript
542 # first check if the exons are already preloaded
543 # @todo FIXME: Should test all exons.
544 if ( exists( $transcripts->[0]->{
'_trans_exon_array'} ) ) {
548 # get extent of region spanned by transcripts
549 my ($min_start, $max_end);
552 unless ($slice->is_circular()) {
553 foreach my $t (@$transcripts) {
554 if (!defined($min_start) || $t->seq_region_start() < $min_start) {
555 $min_start = $t->seq_region_start();
557 if (!defined($max_end) || $t->seq_region_end() > $max_end) {
558 $max_end = $t->seq_region_end();
562 if ($min_start >= $slice->start() && $max_end <= $slice->end()) {
565 my $sa = $self->db()->get_SliceAdaptor();
566 $ext_slice = $sa->fetch_by_region($slice->coord_system->name(), $slice->seq_region_name(), $min_start, $max_end, $slice->strand(), $slice->coord_system->version());
570 # feature might be crossing the origin of replication (i.e. seq_region_start > seq_region_end)
571 # the computation of min_start|end based on seq_region_start|end is not safe
572 # use feature start/end relative to the slice instead
573 my ($min_start_feature, $max_end_feature);
574 foreach my $t (@$transcripts) {
575 if (!defined($min_start) || ($t->start >= 0 && $t->start() < $min_start)) {
576 $min_start = $t->start();
577 $min_start_feature = $t;
579 if (!defined($max_end) || ($t->end() >= 0 && $t->end() > $max_end)) {
580 $max_end = $t->end();
581 $max_end_feature = $t;
585 # now we can reassign min_start|end to seq_region_start|end of
586 # the feature which spans the largest region
587 $min_start = $min_start_feature->seq_region_start();
588 $max_end = $max_end_feature->seq_region_end();
590 my $sa = $self->db()->get_SliceAdaptor();
592 $sa->fetch_by_region($slice->coord_system->name(),
593 $slice->seq_region_name(),
597 $slice->coord_system->version());
602 # associate exon identifiers with transcripts
604 my %tr_hash =
map { $_->dbID => $_ } @{$transcripts};
606 my $tr_id_str = join(
',', keys(%tr_hash) );
609 $self->prepare(
"SELECT `transcript_id`, `exon_id`, `rank` "
610 .
"FROM exon_transcript "
611 .
"WHERE transcript_id IN ($tr_id_str)" );
615 my ( $tr_id, $ex_id, $rank );
616 $sth->bind_columns( \( $tr_id, $ex_id, $rank ) );
620 while ( $sth->fetch() ) {
621 $ex_tr_hash{$ex_id} ||= [];
622 push( @{ $ex_tr_hash{$ex_id} }, [ $tr_hash{$tr_id}, $rank ] );
625 my $ea = $self->db()->get_ExonAdaptor();
626 my $exons = $ea->fetch_all_by_Slice_constraint(
628 sprintf(
"e.exon_id IN (%s)",
629 join(
',', sort { $a <=> $b } keys(%ex_tr_hash) ) ) );
631 # move exons onto transcript slice, and add them to transcripts
632 foreach my $ex ( @{$exons} ) {
634 if ( $slice != $ext_slice ) {
635 $new_ex = $ex->transfer($slice);
636 if ( !defined($new_ex) ) {
638 .
"Exon could not be transfered onto Transcript slice." );
644 foreach my $row ( @{ $ex_tr_hash{ $new_ex->dbID() } } ) {
645 my ( $tr, $rank ) = @{$row};
646 $tr->add_Exon( $new_ex, $rank );
650 my $tla = $self->db()->get_TranslationAdaptor();
652 # load all of the translations at once
653 $tla->fetch_all_by_Transcript_list($transcripts);
656 } ## end sub fetch_all_by_Slice
659 =head2 fetch_all_by_external_name
661 Arg [1] : String $external_name
662 An external identifier of the
transcript to be obtained
663 Arg [2] : (optional) String $external_db_name
664 The name of the external database from which the
665 identifier originates.
666 Arg [3] : Boolean
override. Force SQL regex matching
for users
667 who really
do want to find all
'NM%'
668 Example : my @transcripts =
669 @{ $tr_adaptor->fetch_all_by_external_name(
'NP_065811.1') };
670 my @more_transcripts =
671 @{$tr_adaptor->fetch_all_by_external_name(
'NP_0658__._')};
672 Description: Retrieves all transcripts which are associated with
673 an external identifier such as a GO term, Swissprot
674 identifer, etc. Usually there will only be a single
675 transcript returned in the list reference, but not
676 always. Transcripts are returned in their native
677 coordinate system, i.e. the coordinate system in which
678 they are stored in the database. If they are required
679 in another coordinate system the Transcript::transfer or
680 Transcript::transform method can be used to convert them.
681 If no transcripts with the external identifier are found,
682 a reference to an empty list is returned.
683 SQL wildcards % and _ are supported in the $external_name
684 but their use is somewhat restricted
for performance reasons.
685 Users that really
do want % and _ in the first three characters
686 should use argument 3 to prevent optimisations
694 sub fetch_all_by_external_name {
695 my ( $self, $external_name, $external_db_name, $override) = @_;
697 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
700 $entryAdaptor->list_transcript_ids_by_extids( $external_name,
701 $external_db_name, $override );
703 my @features = @{ $self->fetch_all_by_dbID_list( \@ids ) };
705 my @non_reference = grep { ! $_->slice()->is_reference() } @features;
706 return [ @reference, @non_reference ];
709 =head2 fetch_all_by_GOTerm
712 The GO term
for which transcripts should be fetched.
714 Example: @transcripts = @{
715 $transcript_adaptor->fetch_all_by_GOTerm(
716 $go_adaptor->fetch_by_accession(
'GO:0030326') ) };
718 Description : Retrieves a list of transcripts that are
719 associated with the given GO term, or with any of
720 its descendent GO terms. The transcripts returned
721 are in their native coordinate system, i.e. in
722 the coordinate system in which they are stored
723 in the database. If another coordinate system
724 is required then the Transcript::transfer or
725 Transcript::transform method can be used.
728 Exceptions : Throws of argument is not a GO term
734 sub fetch_all_by_GOTerm {
735 my ( $self, $term ) = @_;
737 assert_ref( $term,
'Bio::EnsEMBL::OntologyTerm' );
738 if ( $term->ontology() ne
'GO' ) {
739 throw(
'Argument is not a GO term');
742 my $entryAdaptor = $self->db->get_DBEntryAdaptor();
745 foreach my $accession (
map { $_->accession() }
746 ( $term, @{ $term->descendants() } ) )
749 $entryAdaptor->list_transcript_ids_by_extids( $accession,
'GO' );
750 foreach my $dbID (@ids) { $unique_dbIDs{$dbID} = 1 }
754 $self->fetch_all_by_dbID_list(
755 [ sort { $a <=> $b } keys(%unique_dbIDs) ]
759 } ## end sub fetch_all_by_GOTerm
761 =head2 fetch_all_by_GOTerm_accession
764 The GO term
accession for which genes should be
770 @{ $gene_adaptor->fetch_all_by_GOTerm_accession(
773 Description : Retrieves a list of genes that are associated with
774 the given GO term, or with any of its descendent
775 GO terms. The genes returned are in their native
776 coordinate system, i.e. in the coordinate system
777 in which they are stored in the database. If
778 another coordinate system is required then the
779 Gene::transfer or Gene::transform method can be
783 Exceptions : Throws of argument is not a GO term
accession
789 sub fetch_all_by_GOTerm_accession {
790 my ( $self, $accession ) = @_;
792 if ( $accession !~ /^GO:/ ) {
793 throw(
'Argument is not a GO term accession');
800 my $term = $goAdaptor->fetch_by_accession($accession);
802 return $self->fetch_all_by_GOTerm($term);
805 =head2 fetch_by_display_label
807 Arg [1] : String $label - display label of
transcript to fetch
808 Example : my $tr = $tr_adaptor->fetch_by_display_label(
"BRCA2");
809 Description: Returns the
transcript which has the given display label or
810 undef
if there is none. If there are more than 1, only the first
819 sub fetch_by_display_label {
823 my $constraint =
"x.display_label = ? AND t.is_current = 1";
825 $self->bind_param_generic_fetch($label,SQL_VARCHAR);
827 my ($transcript) = @{ $self->generic_fetch($constraint) };
833 =head2 fetch_all_by_exon_stable_id
835 Arg [1] : String $stable_id
837 Example : my $tr = $tr_adaptor->fetch_all_by_exon_stable_id
839 Description: Retrieves a list of transcripts via an
exon stable
id.
847 sub fetch_all_by_exon_stable_id {
848 my ($self, $stable_id) = @_;
852 my $sth = $self->prepare(qq(
853 SELECT t.transcript_id
855 WHERE e.exon_id = et.exon_id
856 AND et.transcript_id = t.transcript_id
861 $sth->bind_param(1, $stable_id, SQL_VARCHAR);
864 while( my $id = $sth->fetchrow_array ) {
865 my $transcript = $self->fetch_by_dbID($id);
866 push(@trans, $transcript)
if $transcript;
876 =head2 fetch_all_by_source
878 Arg [1] : String $source
880 The source of the
transcript to retrieve. You can have as an argument a reference
882 Example : $transcripts = $transcript_adaptor->fetch_all_by_source(
'havana');
883 $transcripts = $transcript_adaptor->fetch_all_by_source([
'ensembl',
'vega']);
884 Description: Retrieves an array reference of
transcript objects from the database via its source or sources.
885 The
transcript will be retrieved in its native coordinate system (i.e.
886 in the coordinate system it is stored in the database). It may
887 be converted to a different coordinate system through a call to
888 transform() or transfer(). If the gene or
exon is not found
889 undef is returned instead.
890 Returntype : listref of
Bio::
EnsEMBL::Transcript
891 Exceptions : if we cant get the gene in given coord system
897 sub fetch_all_by_source {
898 my ($self, $source) = @_;
899 my @transcripts = @{$self->generic_fetch($self->source_constraint($source))};
900 return \@transcripts;
903 =head2 source_constraint
905 Arg [1] : String $source
907 The source of the
transcript to retrieve. You can have as an argument a reference
909 Description: Used internally to generate a SQL constraint to restrict a
transcript query by source
911 Exceptions : If source is not supplied
917 sub source_constraint {
918 my ($self, $sources, $inline_variables) = @_;
919 my $constraint =
"t.is_current = 1";
920 my $in_statement = $self->generate_in_constraint($sources,
't.source', SQL_VARCHAR, $inline_variables);
921 $constraint .=
" and $in_statement";
925 =head2 count_all_by_source
927 Arg [1] : String $source
929 The source of the
transcript to retrieve. You can have as an argument a reference
931 Example : $cnt = $transcript_adaptor->count_all_by_source(
'ensembl');
932 $cnt = $transcript_adaptor->count_all_by_source([
'havana',
'vega']);
933 Description : Retrieves count of
transcript objects from the database via its source or sources.
940 sub count_all_by_source {
941 my ($self, $source) = @_;
942 return $self->generic_count($self->source_constraint($source));
945 =head2 count_all_by_Slice
948 The slice to count transcripts on.
949 Arg [2] : (optional) biotype(s)
string or arrayref of strings
950 the biotype of the features to count.
951 Arg [1] : (optional)
string $source
952 the source name of the features to count.
953 Example : $cnt = $transcript_adaptor->count_all_by_Slice();
954 Description: Method to count transcripts on a given slice, filtering by biotype and source
961 sub count_all_by_Slice {
962 my ($self, $slice, $biotype, $source) = @_;
964 my $constraint =
't.is_current = 1';
965 if (defined($source)) {
966 $constraint .=
" and t.source = '$source'";
968 if (defined($biotype)) {
969 $constraint .=
" and " . $self->biotype_constraint($biotype);
972 return $self->count_by_Slice_constraint($slice, $constraint);
975 =head2 fetch_all_by_biotype
977 Arg [1] : String $biotype
979 The biotype of the
transcript to retrieve. You can have as an argument a reference
980 to a list of biotypes
981 Example : $gene = $transcript_adaptor->fetch_all_by_biotype(
'protein_coding');
982 $gene = $transcript_adaptor->fetch_all_by_biotypes([
'protein_coding',
'sRNA',
'miRNA']);
983 Description: Retrieves an array reference of
transcript objects from the database via its biotype or biotypes.
984 The
transcript will be retrieved in its native coordinate system (i.e.
985 in the coordinate system it is stored in the database). It may
986 be converted to a different coordinate system through a call to
987 transform() or transfer(). If the gene or
exon is not found
988 undef is returned instead.
989 Returntype : listref of
Bio::
EnsEMBL::Transcript
990 Exceptions : if we cant get the gene in given coord system
996 sub fetch_all_by_biotype {
997 my ($self, $biotype) = @_;
998 my @transcripts = @{$self->generic_fetch($self->biotype_constraint($biotype))};
999 return \@transcripts;
1002 =head2 biotype_constraint
1004 Arg [1] : String $biotypes
1005 listref of $biotypes
1006 The biotype of the
transcript to retrieve. You can have as an argument a reference
1007 to a list of biotypes
1008 Description: Used internally to generate a SQL constraint to restrict a
transcript query by biotype
1010 Exceptions : If biotype is not supplied
1016 sub biotype_constraint {
1017 my ($self, $biotypes, $inline_variables) = @_;
1018 my $constraint =
"t.is_current = 1";
1019 my $in_statement = $self->generate_in_constraint($biotypes,
't.biotype', SQL_VARCHAR, $inline_variables);
1020 $constraint .=
" and $in_statement";
1024 =head2 count_all_by_biotype
1026 Arg [1] : String $biotype
1027 listref of $biotypes
1028 The biotype of the
transcript to retrieve. You can have as an argument a reference
1029 to a list of biotypes
1030 Example : $cnt = $transcript_adaptor->count_all_by_biotype(
'protein_coding');
1031 $cnt = $transcript_adaptor->count_all_by_biotypes([
'protein_coding',
'sRNA',
'miRNA']);
1032 Description : Retrieves count of
transcript objects from the database via its biotype or biotypes.
1033 Returntype : integer
1039 sub count_all_by_biotype {
1040 my ($self, $biotype) = @_;
1041 return $self->generic_count($self->biotype_constraint($biotype));
1048 Arg [2] : Int $gene_dbID
1049 The identifier of the gene that
this transcript is associated
1051 Arg [3] : DEPRECATED (optional) Int $analysis_id
1052 The analysis_id to use when storing
this gene. This is
for
1053 backward compatibility only and used to fall back to the gene
1054 analysis_id
if no analysis
object is attached to the
transcript
1055 (which you should
do for new code).
1056 Arg [4] : prevent coordinate recalculation
if you are persisting
1057 transcripts with
this gene
1058 Example : $transID = $tr_adaptor->store($transcript, $gene->dbID);
1059 Description: Stores a
transcript in the database and returns the
new
1060 internal identifier
for the stored
transcript.
1069 my ( $self, $transcript, $gene_dbID, $analysis_id, $skip_recalculating_coordinates ) = @_;
1071 if ( !ref($transcript)
1072 || !$transcript->isa(
'Bio::EnsEMBL::Transcript') )
1074 throw(
"$transcript is not a EnsEMBL transcript - not storing");
1077 my $db = $self->db();
1079 if ( $transcript->is_stored($db) ) {
1080 return $transcript->dbID();
1083 # Force lazy-loading of exons and ensure coords are correct.
1084 # If we have been told not to do this then skip doing this
1085 # and we assume the user knows what they are doing. You have been
1087 if(! $skip_recalculating_coordinates) {
1088 $transcript->recalculate_coordinates();
1091 my $is_current = ( defined( $transcript->is_current() )
1092 ? $transcript->is_current()
1096 my $analysis = $transcript->analysis();
1097 my $new_analysis_id;
1100 if ( $analysis->is_stored($db) ) {
1101 $new_analysis_id = $analysis->dbID;
1103 $new_analysis_id = $db->get_AnalysisAdaptor->store($analysis);
1106 throw(
"Need an analysis_id to store the Transcript.");
1110 # Store exons - this needs to be done before the possible transfer
1111 # of the transcript to another slice (in _prestore()). Transfering
1112 # results in copies being made of the exons and we need to preserve
1113 # the object identity of the exons so that they are not stored twice
1114 # by different transcripts.
1116 my $exons = $transcript->get_all_Exons();
1117 my $exonAdaptor = $db->get_ExonAdaptor();
1118 foreach my $exon ( @{$exons} ) {
1119 $exonAdaptor->store($exon);
1122 my $original_translation = $transcript->translation();
1123 my $original = $transcript;
1125 ( $transcript, $seq_region_id ) = $self->_pre_store($transcript);
1127 # First store the transcript without a display xref. The display xref
1128 # needs to be set after xrefs are stored which needs to happen after
1129 # transcript is stored.
1135 # my $store_transcript_sql =
1136 # sprintf "INSERT INTO transcript SET gene_id = ?, analysis_id = ?, seq_region_id = ?, seq_region_start = ?, seq_region_end = ?, seq_region_strand = ?,%s biotype = ?, description = ?, is_current = ?, canonical_translation_id = ?", ($self->schema_version > 74)?" source = ?,":'';
1147 push @columns,
'source' if ($self->schema_version > 74);
1153 canonical_translation_id
1159 if ( defined( $transcript->stable_id() ) ) {
1160 push @columns,
'stable_id',
'version';
1162 my $created = $self->db->dbc->from_seconds_to_date($transcript->created_date());
1163 my $modified = $self->db->dbc->from_seconds_to_date($transcript->modified_date());
1166 push @canned_columns,
'created_date';
1167 push @canned_values, $created;
1170 push @canned_columns,
'modified_date';
1171 push @canned_values, $modified;
1176 my $columns = join(
', ', @columns, @canned_columns);
1177 my $values = join(
', ', (
'?') x @columns, @canned_values);
1178 my $store_transcript_sql = qq(
1179 INSERT INTO
transcript ( $columns ) VALUES ( $values )
1182 my $tst = $self->prepare($store_transcript_sql);
1184 $tst->bind_param( ++$i, $gene_dbID, SQL_INTEGER );
1185 $tst->bind_param( ++$i, $new_analysis_id, SQL_INTEGER );
1186 $tst->bind_param( ++$i, $seq_region_id, SQL_INTEGER );
1187 $tst->bind_param( ++$i, $transcript->start(), SQL_INTEGER );
1188 $tst->bind_param( ++$i, $transcript->end(), SQL_INTEGER );
1189 $tst->bind_param( ++$i, $transcript->strand(), SQL_TINYINT );
1191 $self->schema_version > 74 and
1192 $tst->bind_param( ++$i, $transcript->source(), SQL_VARCHAR );
1194 $tst->bind_param( ++$i, $transcript->get_Biotype->name, SQL_VARCHAR );
1195 $tst->bind_param( ++$i, $transcript->description(), SQL_LONGVARCHAR );
1196 $tst->bind_param( ++$i, $is_current, SQL_TINYINT );
1198 # If the transcript has a translation, this is updated later:
1199 $tst->bind_param( ++$i, undef, SQL_INTEGER );
1201 if ( defined( $transcript->stable_id() ) ) {
1203 $tst->bind_param( ++$i, $transcript->stable_id(), SQL_VARCHAR );
1204 $tst->bind_param( ++$i, $transcript->version(), SQL_INTEGER );
1210 my $transc_dbID = $self->last_insert_id(
'transcript_id', undef,
'transcript');
1216 my $alt_translations =
1217 $transcript->get_all_alternative_translations();
1218 my $translation = $transcript->translation();
1220 if ( defined($translation) ) {
1221 # Make sure that the start and end exon are set correctly.
1222 my $start_exon = $translation->start_Exon();
1223 my $end_exon = $translation->end_Exon();
1225 if ( !defined($start_exon) ) {
1226 throw(
"Translation does not define a start exon.");
1229 if ( !defined($end_exon) ) {
1230 throw(
"Translation does not defined an end exon.");
1233 # If the dbID is not set, this means the exon must have been a
1234 # different object in memory than the the exons of the transcript.
1235 # Try to find the matching exon in all of the exons we just stored.
1236 if ( !defined( $start_exon->dbID() ) ) {
1237 my $key = $start_exon->hashkey();
1238 ($start_exon) = grep { $_->hashkey() eq $key } @$exons;
1240 if ( defined($start_exon) ) {
1241 $translation->start_Exon($start_exon);
1243 throw(
"Translation's start_Exon does not appear "
1244 .
"to be one of the exons in "
1245 .
"its associated Transcript" );
1249 if ( !defined( $end_exon->dbID() ) ) {
1250 my $key = $end_exon->hashkey();
1251 ($end_exon) = grep { $_->hashkey() eq $key } @$exons;
1253 if ( defined($end_exon) ) {
1254 $translation->end_Exon($end_exon);
1256 throw(
"Translation's end_Exon does not appear "
1257 .
"to be one of the exons in "
1258 .
"its associated Transcript." );
1262 my $old_dbid = $translation->dbID();
1263 $db->get_TranslationAdaptor()->store( $translation, $transc_dbID );
1265 # Need to update the canonical_translation_id for this transcript.
1267 my $sth = $self->prepare(
1270 SET canonical_translation_id = ?
1271 WHERE transcript_id = ?)
1274 $sth->bind_param( 1, $translation->dbID(), SQL_INTEGER );
1275 $sth->bind_param( 2, $transc_dbID, SQL_INTEGER );
1279 # Set values of the original translation, we may have copied it when
1280 # we transformed the transcript.
1281 $original_translation->dbID( $translation->dbID() );
1282 $original_translation->adaptor( $translation->adaptor() );
1283 } ## end
if ( defined($translation...))
1286 # Store the alternative translations,
if there are any.
1289 if ( defined($alt_translations)
1290 && scalar( @{$alt_translations} ) > 0 )
1292 foreach my $alt_translation ( @{$alt_translations} ) {
1293 my $start_exon = $alt_translation->start_Exon();
1294 my $end_exon = $alt_translation->end_Exon();
1296 if ( !defined($start_exon) ) {
1297 throw(
"Translation does not define a start exon.");
1298 } elsif ( !defined($end_exon) ) {
1299 throw(
"Translation does not defined an end exon.");
1302 if ( !defined( $start_exon->dbID() ) ) {
1303 my $key = $start_exon->hashkey();
1304 ($start_exon) = grep { $_->hashkey() eq $key } @{$exons};
1306 if ( defined($start_exon) ) {
1307 $alt_translation->start_Exon($start_exon);
1309 throw(
"Translation's start_Exon does not appear "
1310 .
"to be one of the exon in"
1311 .
"its associated Transcript" );
1314 if ( !defined( $end_exon->dbID() ) ) {
1315 my $key = $end_exon->hashkey();
1316 ($end_exon) = grep { $_->hashkey() eq $key } @$exons;
1318 if ( defined($end_exon) ) {
1319 $alt_translation->end_Exon($end_exon);
1321 throw(
"Translation's end_Exon does not appear "
1322 .
"to be one of the exons in "
1323 .
"its associated Transcript." );
1327 $db->get_TranslationAdaptor()
1328 ->store( $alt_translation, $transc_dbID );
1329 } ## end
foreach my $alt_translation...
1330 } ## end
if ( defined($alt_translations...))
1333 # Store the xrefs/
object xref mapping.
1335 my $dbEntryAdaptor = $db->get_DBEntryAdaptor();
1337 foreach my $dbe ( @{ $transcript->get_all_DBEntries() } ) {
1338 $dbEntryAdaptor->store( $dbe, $transc_dbID,
"Transcript", 1 );
1342 # Update transcript to point to display xref if it is set.
1344 if ( my $dxref = $transcript->display_xref() ) {
1347 if ( $dxref->is_stored($db) ) {
1348 $dxref_id = $dxref->dbID();
1350 $dxref_id = $dbEntryAdaptor->exists($dxref);
1353 if ( defined($dxref_id) ) {
1355 $self->prepare(
"UPDATE transcript "
1356 .
"SET display_xref_id = ? "
1357 .
"WHERE transcript_id = ?" );
1358 $sth->bind_param( 1, $dxref_id, SQL_INTEGER );
1359 $sth->bind_param( 2, $transc_dbID, SQL_INTEGER );
1361 $dxref->dbID($dxref_id);
1362 $dxref->adaptor($dbEntryAdaptor);
1366 "Display_xref %s:%s is not stored in database.\n"
1367 .
"Not storing relationship to this transcript.",
1368 $dxref->dbname(), $dxref->display_id() ) );
1369 $dxref->dbID(undef);
1370 $dxref->adaptor(undef);
1372 } ## end
if ( my $dxref = $transcript...)
1375 # Link transcript to exons in exon_transcript table
1377 my $etst = $self->prepare(
1378 "INSERT INTO exon_transcript (`exon_id`,`transcript_id`,`rank`) "
1379 .
"VALUES (?,?,?)" );
1381 foreach my $exon ( @{ $transcript->get_all_Exons } ) {
1382 $etst->bind_param( 1, $exon->dbID, SQL_INTEGER );
1383 $etst->bind_param( 2, $transc_dbID, SQL_INTEGER );
1384 $etst->bind_param( 3, $rank, SQL_INTEGER );
1391 # Now the supporting evidence
1392 my $tsf_adaptor = $db->get_TranscriptSupportingFeatureAdaptor();
1393 $tsf_adaptor->store( $transc_dbID,
1394 $transcript->get_all_supporting_features() );
1396 # store transcript attributes if there are any
1397 my $attr_adaptor = $db->get_AttributeAdaptor();
1399 $attr_adaptor->store_on_Transcript( $transc_dbID,
1400 $transcript->get_all_Attributes() );
1402 # Check if transcript is canonical
1403 if ($transcript->is_canonical()) {
1404 my $gene_adaptor = $self->db()->get_GeneAdaptor();
1405 my $gene = $gene_adaptor->fetch_by_dbID($gene_dbID);
1406 $transcript->dbID($transc_dbID);
1407 $gene->canonical_transcript($transcript);
1408 $gene_adaptor->update($gene);
1411 # store the IntronSupportingEvidence features
1412 my $ise_adaptor = $db->get_IntronSupportingEvidenceAdaptor();
1413 my $intron_supporting_evidence = $transcript->get_all_IntronSupportingEvidence();
1414 foreach my $ise (@{$intron_supporting_evidence}) {
1415 $ise_adaptor->store($ise);
1416 $ise_adaptor->store_transcript_linkage($ise, $transcript, $transc_dbID);
1419 # Update the original transcript object - not the transfered copy that
1420 # we might have created.
1421 $original->dbID($transc_dbID);
1422 $original->adaptor($self);
1424 return $transc_dbID;
1428 =head2 get_Interpro_by_transid
1430 Arg [1] : String $trans_stable_id
1432 Example : @i = $tr_adaptor->get_Interpro_by_transid($trans->stable_id());
1434 A hack really - we should have a much more structured
1436 Returntype : listref of strings (Interpro_acc:description)
1438 Caller : domainview? , GeneView
1443 sub get_Interpro_by_transid {
1444 my ($self,$trans_stable_id) = @_;
1446 my $straight_join = $self->_can_straight_join ?
'STRAIGHT_JOIN' :
'';
1447 my $sth = $self->prepare(qq(
1448 SELECT ${straight_join} i.interpro_ac, x.description
1454 WHERE t.stable_id = ?
1455 AND tl.transcript_id = t.transcript_id
1456 AND tl.translation_id = pf.translation_id
1457 AND i.id = pf.hit_name
1458 AND i.interpro_ac = x.dbprimary_acc
1459 AND t.is_current = 1
1462 $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR);
1467 while( (my $arr = $sth->fetchrow_arrayref()) ) {
1468 if( $h{$arr->[0]} ) { next; }
1470 my $string = $arr->[0] .
":".$arr->[1];
1477 =head2 is_Transcript_canonical()
1479 Arg [1] :
Bio::
EnsEMBL::Transcript $transcript
1481 Example : $tr_adaptor->is_Transcript_canonical($transcript);
1482 Description : Returns a
boolean if the given
transcript is considered
1483 canonical with respect to a gene
1484 Returntype : Boolean
1492 sub is_Transcript_canonical {
1493 my ($self, $transcript) = @_;
1494 return $self->dbc()->sql_helper()->execute_single_result(
1495 -SQL =>
'select count(*) from gene where canonical_transcript_id =?',
1496 -PARAMS => [$transcript->dbID()]
1505 Arg [2] : Boolean, update Gene coordinates after removal. WARNING:
this does not alter any other copies of the
1506 gene currently in memory. Other copies will retain their original coordinates. Either refetch them
1507 or go directly through Gene->remove_Transcript first, then remove the Transcript here.
1508 Example : $tr_adaptor->remove($transcript);
1509 Description: Removes a
transcript completely from the database, and all
1510 associated information.
1511 This method is usually called by the GeneAdaptor::remove method
1512 because
this method will not preform the removal of genes
1513 which are associated with
this transcript. Do not call
this
1514 method directly unless you know there are no genes associated
1517 Exceptions :
throw on incorrect arguments
1518 warning
if transcript is not in
this database
1519 Caller : GeneAdaptor::remove
1526 my $transcript = shift;
1528 if(!ref($transcript) || !$transcript->isa(
'Bio::EnsEMBL::Transcript')) {
1529 throw(
"Bio::EnsEMBL::Transcript argument expected");
1532 # sanity check: make sure nobody tries to slip past a prediction transcript
1533 # which inherits from transcript but actually uses different tables
1534 if($transcript->isa(
'Bio::EnsEMBL::PredictionTranscript')) {
1535 throw(
"TranscriptAdaptor can only remove Transcripts " .
1536 "not PredictionTranscripts");
1539 if ( !$transcript->is_stored($self->db()) ) {
1540 warning(
"Cannot remove transcript ". $transcript->dbID .
". Is not stored ".
1541 "in this database.");
1545 # remove the supporting features of this transcript
1547 my $prot_adp = $self->db->get_ProteinAlignFeatureAdaptor;
1548 my $dna_adp = $self->db->get_DnaAlignFeatureAdaptor;
1550 my $sfsth = $self->prepare(
"SELECT feature_type, feature_id " .
1551 "FROM transcript_supporting_feature " .
1552 "WHERE transcript_id = ?");
1554 $sfsth->bind_param(1, $transcript->dbID, SQL_INTEGER);
1557 # statements to check for shared align_features
1558 my $sth1 = $self->prepare(
"SELECT count(*) FROM supporting_feature " .
1559 "WHERE feature_type = ? AND feature_id = ?");
1560 my $sth2 = $self->prepare(
"SELECT count(*) " .
1561 "FROM transcript_supporting_feature " .
1562 "WHERE feature_type = ? AND feature_id = ?");
1565 while(my ($type, $feature_id) = $sfsth->fetchrow()){
1567 # only remove align_feature if this is the last reference to it
1568 $sth1->bind_param(1, $type, SQL_VARCHAR);
1569 $sth1->bind_param(2, $feature_id, SQL_INTEGER);
1571 $sth2->bind_param(1, $type, SQL_VARCHAR);
1572 $sth2->bind_param(2, $feature_id, SQL_INTEGER);
1574 my ($count1) = $sth1->fetchrow;
1575 my ($count2) = $sth2->fetchrow;
1576 if ($count1 + $count2 > 1) {
1577 #warn "transcript: shared feature, not removing $type|$feature_id\n";
1578 next SUPPORTING_FEATURE;
1581 #warn "transcript: removing $type|$feature_id\n";
1583 if($type eq
'protein_align_feature'){
1584 my $f = $prot_adp->fetch_by_dbID($feature_id);
1585 $prot_adp->remove($f);
1587 elsif($type eq
'dna_align_feature'){
1588 my $f = $dna_adp->fetch_by_dbID($feature_id);
1589 $dna_adp->remove($f);
1592 warning(
"Unknown supporting feature type $type. Not removing feature.");
1599 # delete the association to supporting features
1601 $sfsth = $self->prepare(
"DELETE FROM transcript_supporting_feature WHERE transcript_id = ?");
1602 $sfsth->bind_param(1, $transcript->dbID, SQL_INTEGER);
1606 # delete the associated IntronSupportingEvidence and if the ISE had no more
1607 # linked transcripts remove it
1608 my $ise_adaptor = $self->db->get_IntronSupportingEvidenceAdaptor();
1609 foreach my $ise (@{$transcript->get_all_IntronSupportingEvidence()}) {
1610 $ise_adaptor->remove_transcript_linkage($ise, $transcript);
1611 if(! $ise->has_linked_transcripts()) {
1612 $ise_adaptor->remove($ise);
1616 # remove all xref linkages to this transcript
1618 my $dbeAdaptor = $self->db->get_DBEntryAdaptor();
1619 foreach my $dbe (@{$transcript->get_all_DBEntries}) {
1620 $dbeAdaptor->remove_from_object($dbe, $transcript,
'Transcript');
1623 # remove the attributes associated with this transcript
1624 my $attrib_adp = $self->db->get_AttributeAdaptor;
1625 $attrib_adp->remove_from_Transcript($transcript);
1627 # remove the translation associated with this transcript
1629 my $translationAdaptor = $self->db->get_TranslationAdaptor();
1630 if( defined($transcript->translation()) ) {
1631 $translationAdaptor->remove( $transcript->translation );
1634 # remove exon associations to this transcript
1636 my $exonAdaptor = $self->db->get_ExonAdaptor();
1637 foreach my $exon ( @{$transcript->get_all_Exons()} ) {
1638 # get the number of transcript references to this exon
1639 # only remove the exon if this is the last transcript to
1642 my $sth = $self->prepare(
"SELECT count(*)
1643 FROM exon_transcript
1644 WHERE exon_id = ?" );
1645 $sth->bind_param(1, $exon->dbID, SQL_INTEGER);
1647 my ($count) = $sth->fetchrow_array();
1651 $exonAdaptor->remove( $exon );
1655 my $sth = $self->prepare(
"DELETE FROM exon_transcript
1656 WHERE transcript_id = ?" );
1657 $sth->bind_param(1, $transcript->dbID, SQL_INTEGER);
1661 my $gene = $transcript->get_Gene;
1663 $sth = $self->prepare(
"DELETE FROM transcript
1664 WHERE transcript_id = ?" );
1665 $sth->bind_param(1, $transcript->dbID, SQL_INTEGER);
1670 $gene->remove_Transcript($transcript);
1673 $transcript->dbID(undef);
1674 $transcript->adaptor(undef);
1684 Example : $tr_adaptor->update($transcript);
1685 Description: Updates a
transcript in the database.
1688 warn
if the method is called on a
transcript that does not exist
1690 Should warn
if trying to update the number of attached exons, but
1691 this is a far more complex process and is not yet implemented.
1698 my ( $self, $transcript ) = @_;
1700 if ( !defined($transcript)
1701 || !ref($transcript)
1702 || !$transcript->isa(
'Bio::EnsEMBL::Transcript') )
1704 throw(
"Must update a transcript object, not a $transcript");
1707 my $update_transcript_sql =
1708 sprintf
"UPDATE transcript SET stable_id = ?, analysis_id = ?, display_xref_id = ?, description = ?,%s biotype = ?, is_current = ?, canonical_translation_id = ?, version = ? WHERE transcript_id = ?", ($self->schema_version > 74)?
" source = ?,":
'';
1710 my $display_xref = $transcript->display_xref();
1711 my $display_xref_id;
1713 if ( defined($display_xref) && $display_xref->dbID() ) {
1714 $display_xref_id = $display_xref->dbID();
1716 $display_xref_id = undef;
1719 my $sth = $self->prepare($update_transcript_sql);
1721 $sth->bind_param( ++$i, $transcript->stable_id(), SQL_VARCHAR );
1722 $sth->bind_param( ++$i, $transcript->analysis()->dbID(), SQL_INTEGER );
1723 $sth->bind_param( ++$i, $display_xref_id, SQL_INTEGER );
1724 $sth->bind_param( ++$i, $transcript->description(), SQL_LONGVARCHAR );
1726 $self->schema_version > 74 and
1727 $sth->bind_param( ++$i, $transcript->source(), SQL_VARCHAR );
1729 $sth->bind_param( ++$i, $transcript->get_Biotype->name, SQL_VARCHAR );
1730 $sth->bind_param( ++$i, $transcript->is_current(), SQL_TINYINT );
1731 $sth->bind_param( ++$i, (
1732 defined( $transcript->translation() )
1733 ? $transcript->translation()->dbID()
1736 $sth->bind_param( ++$i, $transcript->version(), SQL_INTEGER );
1737 $sth->bind_param( ++$i, $transcript->dbID(), SQL_INTEGER );
1741 # Check if transcript is canonical
1742 if ($transcript->is_canonical()) {
1743 my $gene = $transcript->get_Gene();
1744 my $gene_adaptor = $self->db()->get_GeneAdaptor();
1745 $gene->canonical_transcript($transcript);
1746 $gene_adaptor->update($gene);
1754 Example : @transcript_ids = @{ $t_adaptor->list_dbIDs };
1755 Description: Gets a list of
internal ids
for all transcripts in the db.
1756 Arg[1] : <optional>
int. not 0
for the ids to be sorted by the seq_region. Returntype : Listref of Ints
1764 my ($self, $ordered) = @_;
1766 return $self->_list_dbIDs(
"transcript",undef, $ordered);
1770 =head2 list_stable_ids
1772 Example : @stable_trans_ids = @{ $transcript_adaptor->list_stable_ids };
1773 Description: Gets a list of stable ids
for all transcripts in the current
1775 Returntype : Listref of Strings
1782 sub list_stable_ids {
1785 return $self->_list_dbIDs(
"transcript",
"stable_id");
1791 # Arg [1] : StatementHandle $sth
1792 # Arg [2] : Bio::EnsEMBL::AssemblyMapper $mapper
1793 # Arg [3] : Bio::EnsEMBL::Slice $dest_slice
1794 # Description: PROTECTED implementation of abstract superclass method.
1795 # Responsible for the creation of Transcripts.
1796 # Returntype : Listref of Bio::EnsEMBL::Transcripts in target coord system
1801 sub _objs_from_sth {
1802 my ($self, $sth, $mapper, $dest_slice) = @_;
1805 # This code is ugly because an attempt has been made to remove as many
1806 # function calls as possible for speed purposes. Thus many caches and
1807 # a fair bit of gymnastics is used.
1810 my $sa = $self->db()->get_SliceAdaptor();
1811 my $aa = $self->db()->get_AnalysisAdaptor();
1812 my $dbEntryAdaptor = $self->db()->get_DBEntryAdaptor();
1821 $transcript_id, $seq_region_id, $seq_region_start,
1822 $seq_region_end, $seq_region_strand, $analysis_id,
1823 $gene_id, $is_current, $stable_id,
1824 $version, $created_date, $modified_date,
1825 $description, $biotype,
1826 $external_db, $external_status, $external_db_name,
1827 $display_xref_id, $xref_display_label, $xref_primary_acc,
1828 $xref_version, $xref_description, $xref_info_type,
1829 $xref_info_text, $external_release, $source
1832 if ($self->schema_version() > 74) {
1835 $transcript_id, $seq_region_id, $seq_region_start,
1836 $seq_region_end, $seq_region_strand, $analysis_id,
1837 $gene_id, $is_current, $stable_id,
1838 $version, $created_date, $modified_date,
1839 $description, $biotype,
1840 $external_db, $external_status, $external_db_name,
1841 $display_xref_id, $xref_display_label, $xref_primary_acc,
1842 $xref_version, $xref_description, $xref_info_type,
1843 $xref_info_text, $external_release, $source
1848 $transcript_id, $seq_region_id, $seq_region_start,
1849 $seq_region_end, $seq_region_strand, $analysis_id,
1850 $gene_id, $is_current, $stable_id,
1851 $version, $created_date, $modified_date,
1852 $description, $biotype,
1853 $external_db, $external_status, $external_db_name,
1854 $display_xref_id, $xref_display_label, $xref_primary_acc,
1855 $xref_version, $xref_description, $xref_info_type,
1856 $xref_info_text, $external_release
1860 my $dest_slice_start;
1862 my $dest_slice_strand;
1863 my $dest_slice_length;
1865 my $dest_slice_sr_name;
1866 my $dest_slice_sr_id;
1870 $dest_slice_start = $dest_slice->start();
1871 $dest_slice_end = $dest_slice->end();
1872 $dest_slice_strand = $dest_slice->strand();
1873 $dest_slice_length = $dest_slice->length();
1874 $dest_slice_cs = $dest_slice->coord_system();
1875 $dest_slice_sr_name = $dest_slice->seq_region_name();
1876 $dest_slice_sr_id = $dest_slice->get_seq_region_id();
1877 $asma = $self->db->get_AssemblyMapperAdaptor();
1880 FEATURE:
while($sth->fetch()) {
1882 #get the analysis object
1883 my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
1884 $analysis_hash{$analysis_id} = $analysis;
1886 #need to get the internal_seq_region, if present
1887 $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
1888 my $slice = $slice_hash{
"ID:".$seq_region_id};
1891 $slice = $sa->fetch_by_seq_region_id($seq_region_id);
1892 $slice_hash{
"ID:".$seq_region_id} = $slice;
1893 $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
1894 $sr_cs_hash{$seq_region_id} = $slice->coord_system();
1897 #obtain a mapper if none was defined, but a dest_seq_region was
1898 if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
1899 $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
1902 my $sr_name = $sr_name_hash{$seq_region_id};
1903 my $sr_cs = $sr_cs_hash{$seq_region_id};
1906 # remap the feature coordinates to another coord system
1907 # if a mapper was provided
1912 if (defined $dest_slice && $mapper->isa(
'Bio::EnsEMBL::ChainedAssemblyMapper') ) {
1913 ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
1914 $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
1917 ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
1918 $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs);
1921 #skip features that map to gaps or coord system boundaries
1922 next FEATURE
if (!defined($seq_region_id));
1924 #get a slice in the coord system we just mapped to
1925 $slice = $slice_hash{
"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
1929 # If a destination slice was provided convert the coords.
1931 if (defined($dest_slice)) {
1932 my $seq_region_len = $dest_slice->seq_region_length();
1934 if ( $dest_slice_strand == 1 ) {
1935 $seq_region_start = $seq_region_start - $dest_slice_start + 1;
1936 $seq_region_end = $seq_region_end - $dest_slice_start + 1;
1938 if ( $dest_slice->is_circular ) {
1939 # Handle circular chromosomes.
1941 if ( $seq_region_start > $seq_region_end ) {
1942 # Looking at a feature overlapping the chromosome origin.
1944 if ( $seq_region_end > $dest_slice_start ) {
1945 # Looking at the region in the beginning of the chromosome
1946 $seq_region_start -= $seq_region_len;
1948 if ( $seq_region_end < 0 ) {
1949 $seq_region_end += $seq_region_len;
1952 if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
1953 # Looking at the region overlapping the chromosome
1954 # origin and a feature which is at the beginning of the
1956 $seq_region_start += $seq_region_len;
1957 $seq_region_end += $seq_region_len;
1963 my $start = $dest_slice_end - $seq_region_end + 1;
1964 my $end = $dest_slice_end - $seq_region_start + 1;
1966 if ($dest_slice->is_circular()) {
1968 if ($dest_slice_start > $dest_slice_end) {
1969 # slice spans origin or replication
1971 if ($seq_region_start >= $dest_slice_start) {
1972 $end += $seq_region_len;
1973 $start += $seq_region_len
if $seq_region_end > $dest_slice_start;
1975 } elsif ($seq_region_start <= $dest_slice_end) {
1977 } elsif ($seq_region_end >= $dest_slice_start) {
1978 $start += $seq_region_len;
1979 $end += $seq_region_len;
1981 } elsif ($seq_region_end <= $dest_slice_end) {
1982 $end += $seq_region_len
if $end < 0;
1984 } elsif ($seq_region_start > $seq_region_end) {
1985 $end += $seq_region_len;
1990 if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
1992 } elsif ($seq_region_start > $seq_region_end) {
1993 if ($seq_region_start <= $dest_slice_end) {
1994 $start -= $seq_region_len;
1995 } elsif ($seq_region_end >= $dest_slice_start) {
1996 $end += $seq_region_len;
2002 $seq_region_start = $start;
2003 $seq_region_end = $end;
2004 $seq_region_strand *= -1;
2006 } ## end
else [
if ( $dest_slice_strand...)]
2008 # Throw away features off the end of the requested slice or on
2009 # different seq_region.
2010 if ($seq_region_end < 1
2011 || $seq_region_start > $dest_slice_length
2012 || ($dest_slice_sr_id != $seq_region_id)) {
2015 $slice = $dest_slice;
2020 if ($display_xref_id) {
2022 'dbID' => $display_xref_id,
2023 'adaptor' => $dbEntryAdaptor,
2024 'display_id' => $xref_display_label,
2025 'primary_id' => $xref_primary_acc,
2026 'version' => $xref_version,
2027 'description' => $xref_description,
2028 'release' => $external_release,
2029 'dbname' => $external_db,
2030 'db_display_name' => $external_db_name,
2031 'info_type' => $xref_info_type,
2032 'info_text' => $xref_info_text
2034 $display_xref->status($external_status);
2037 # Finally, create the new Transcript.
2040 'analysis' => $analysis,
2041 'biotype' => $biotype,
2042 'start' => $seq_region_start,
2043 'end' => $seq_region_end,
2044 'strand' => $seq_region_strand,
2047 'dbID' => $transcript_id,
2048 'stable_id' => $stable_id,
2049 'version' => $version,
2050 'created_date' => $created_date || undef,
2051 'modified_date' => $modified_date || undef,
2052 'description' => $description,
2053 'external_name' => $xref_display_label,
2055 'external_status' => $external_status,
2056 'external_display_name' => $external_db_name,
2057 'external_db' => $external_db,
2058 'display_xref' => $display_xref,
2059 'is_current' => $is_current,
2060 'edits_enabled' => 1
2063 $self->schema_version > 74 and $params->{
'source'} = $source;
2065 $self->_create_feature_fast(
2066 'Bio::EnsEMBL::Transcript',$params) );
2070 return \@transcripts;
2074 =head2 fetch_all_by_exon_supporting_evidence
2076 Arg [1] : String $hit_name
2077 Name of supporting feature
2078 Arg [2] : String $feature_type
2079 one of
"dna_align_feature" or
"protein_align_feature"
2080 Arg [3] : (optional) Bio::Ensembl::Analysis
2081 Example : $tr = $tr_adaptor->fetch_all_by_exon_supporting_evidence
2082 (
'XYZ',
'dna_align_feature');
2083 Description: Gets all the transcripts with exons which have a specified hit
2084 on a particular type of feature. Optionally filter by analysis.
2086 Exceptions : If feature_type is not of correct type.
2092 sub fetch_all_by_exon_supporting_evidence {
2093 my ($self, $hit_name, $feature_type, $analysis) = @_;
2095 if($feature_type !~ /(dna)|(protein)_align_feature/) {
2096 throw(
"feature type must be dna_align_feature or protein_align_feature");
2100 $anal_from =
", analysis a " if ($analysis);
2101 my $anal_where =
"";
2102 $anal_where =
"AND a.analysis_id = f.analysis_id AND a.analysis_id=? "
2106 SELECT DISTINCT(t.transcript_id)
2109 supporting_feature sf,
2112 WHERE t.transcript_id = et.transcript_id
2113 AND t.is_current = 1
2114 AND et.exon_id = sf.exon_id
2115 AND sf.feature_id = f.${feature_type}_id
2116 AND sf.feature_type = ?
2121 my $sth = $self->prepare($sql);
2123 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
2124 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
2125 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER)
if ($analysis);
2131 while( my $id = $sth->fetchrow_array ) {
2132 my $transcript = $self->fetch_by_dbID( $id );
2133 push(@transcripts, $transcript)
if $transcript;
2136 return \@transcripts;
2140 =head2 fetch_all_by_transcript_supporting_evidence
2142 Arg [1] : String $hit_name
2143 Name of supporting feature
2144 Arg [2] : String $feature_type
2145 one of
"dna_align_feature" or
"protein_align_feature"
2146 Arg [3] : (optional) Bio::Ensembl::Analysis
2147 Example : $transcripts = $transcript_adaptor->fetch_all_by_transcript_supporting_evidence(
'XYZ',
'dna_align_feature');
2148 Description: Gets all the transcripts with evidence from a specified hit_name on a particular type of feature, stored in the
2149 transcript_supporting_feature table. Optionally filter by analysis. For hits stored in the supporting_feature
2150 table (linked to exons) use fetch_all_by_exon_supporting_evidence instead.
2152 Exceptions : If feature_type is not of correct type.
2158 sub fetch_all_by_transcript_supporting_evidence {
2160 my ($self, $hit_name, $feature_type, $analysis) = @_;
2162 if($feature_type !~ /(dna)|(protein)_align_feature/) {
2163 throw(
"feature type must be dna_align_feature or protein_align_feature");
2167 $anal_from =
", analysis a " if ($analysis);
2168 my $anal_where =
"";
2169 $anal_where =
"AND a.analysis_id = f.analysis_id AND a.analysis_id=? "
2173 SELECT DISTINCT(t.transcript_id)
2175 transcript_supporting_feature sf,
2178 WHERE t.transcript_id = sf.transcript_id
2179 AND t.is_current = 1
2180 AND sf.feature_id = f.${feature_type}_id
2181 AND sf.feature_type = ?
2186 my $sth = $self->prepare($sql);
2188 $sth->bind_param(1, $feature_type, SQL_VARCHAR);
2189 $sth->bind_param(2, $hit_name, SQL_VARCHAR);
2190 $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER)
if ($analysis);
2196 while( my $id = $sth->fetchrow_array ) {
2197 my $transcript = $self->fetch_by_dbID( $id );
2198 push(@transcripts, $transcript)
if $transcript;
2201 return \@transcripts;
2205 return ' ORDER BY t.transcript_id'
2208 sub update_canonical_attribute {
2209 my ($self, $transcript_id, $old_transcript_id) = @_;
2211 # Get canonical attribute id
2212 my $db = $self->db();
2213 my $attr_adaptor = $db->get_AttributeAdaptor();
2214 my $canonical_attrib_id = @{$attr_adaptor->fetch_by_code(
'is_canonical')}[0];
2215 throw(
"No attrib_type_id found for 'is_canonical' attribute in attrib_type table.")
if (!defined($canonical_attrib_id));
2217 # Check if new canonical transcript attribute exists
2218 my $sth = $self->prepare(
"SELECT value FROM transcript_attrib WHERE transcript_id=? AND attrib_type_id=?");
2219 $sth->execute($transcript_id, $canonical_attrib_id);
2220 if (my ($exists) = $sth->fetchrow_array()) {
2223 $sth = $self->prepare(
"UPDATE transcript_attrib SET value=? WHERE transcript_id=? AND attrib_type_id=?");
2224 $sth->execute(
'1', $transcript_id, $canonical_attrib_id);
2228 $sth = $self->prepare(
"INSERT INTO transcript_attrib (transcript_id, attrib_type_id, value) values(?,?,?)");
2229 $sth->execute($transcript_id, $canonical_attrib_id,
'1');
2233 # Delete old canonical transcript attribute
2234 if (defined($old_transcript_id) && $old_transcript_id ne $transcript_id) {
2235 $sth = $self->prepare(
"DELETE FROM transcript_attrib WHERE transcript_id=? AND attrib_type_id=?");
2236 $sth->execute($old_transcript_id, $canonical_attrib_id);