ensembl-hive  2.7.0
GeneAdaptor.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 
22 =head1 CONTACT
23 
24  Please email comments or questions to the public Ensembl
25  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
26 
27  Questions may also be sent to the Ensembl help desk at
28  <http://www.ensembl.org/Help/Contact>.
29 
30 =cut
31 
32 =head1 NAME
33 Bio::EnsEMBL::DBSQL::GeneAdaptor - Database adaptor for the retrieval and
34 storage of Gene objects
35 
36 =head1 SYNOPSIS
37 
39 
41  -host => 'ensembldb.ensembl.org',
42  -user => 'anonymous',
43  );
44 
45  $gene_adaptor =
46  Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "gene" );
47 
48  $gene = $gene_adaptor->fetch_by_dbID(1234);
49 
50  $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000184129');
51 
52  @genes = @{ $gene_adaptor->fetch_all_by_external_name('BRCA2') };
53 
54  $slice_adaptor =
55  Bio::EnsEMBL::Registry->get_adaptor( "human", "core", "slice" );
56 
57  $slice =
58  $slice_adaptor->fetch_by_region( 'chromosome', '1', 1, 1000000 );
59 
60  @genes = @{ $gene_adaptor->fetch_all_by_Slice($slice) };
61 
62 =head1 DESCRIPTION
63 
64 This is a database aware adaptor for the retrieval and storage of gene
65 objects.
66 
67 =head1 METHODS
68 
69 =cut
70 
71 package Bio::EnsEMBL::DBSQL::GeneAdaptor;
72 
73 use strict;
74 
75 use Bio::EnsEMBL::Utils::Exception qw( throw warning );
76 use Bio::EnsEMBL::Utils::Scalar qw( assert_ref );
81 
82 use vars '@ISA';
84 
85 # _tables
86 # Arg [1] : none
87 # Description: PROTECTED implementation of superclass abstract method.
88 # Returns the names, aliases of the tables to use for queries.
89 # Returntype : list of listrefs of strings
90 # Exceptions : none
91 # Caller : internal
92 # Status : Stable
93 
94 sub _tables {
95  return (['gene', 'g'], ['xref', 'x'], ['external_db', 'exdb']);
96 }
97 
98 # _columns
99 # Arg [1] : none
100 # Example : none
101 # Description: PROTECTED implementation of superclass abstract method.
102 # Returns a list of columns to use for queries.
103 # Returntype : list of strings
104 # Exceptions : none
105 # Caller : internal
106 # Status : Stable
107 
108 sub _columns {
109  my ($self) = @_;
110 
111  my $created_date = $self->db()->dbc()->from_date_to_seconds("g.created_date");
112  my $modified_date = $self->db()->dbc()->from_date_to_seconds("g.modified_date");
113 
114  return ('g.gene_id', 'g.seq_region_id', 'g.seq_region_start', 'g.seq_region_end', 'g.seq_region_strand', 'g.analysis_id', 'g.biotype', 'g.display_xref_id', 'g.description', 'g.source', 'g.is_current', 'g.canonical_transcript_id', 'g.stable_id', 'g.version', $created_date, $modified_date, 'x.display_label', 'x.dbprimary_acc', 'x.description', 'x.version', 'exdb.db_name', 'exdb.status', 'exdb.db_release', 'exdb.db_display_name', 'x.info_type', 'x.info_text');
115 }
116 
117 sub _left_join {
118  return (['xref', "x.xref_id = g.display_xref_id"], ['external_db', "exdb.external_db_id = x.external_db_id"]);
119 }
120 
121 =head2 list_dbIDs
122 
123  Example : @gene_ids = @{$gene_adaptor->list_dbIDs()};
124  Description: Gets an array of internal ids for all genes in the current db
125  Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
126  Returntype : Listref of Ints
127  Exceptions : none
128  Caller : general
129  Status : Stable
130 
131 =cut
132 
133 sub list_dbIDs {
134  my ($self, $ordered) = @_;
135 
136  return $self->_list_dbIDs("gene", undef, $ordered);
137 }
138 
139 =head2 list_stable_ids
140 
141  Example : @stable_gene_ids = @{$gene_adaptor->list_stable_ids()};
142  Description: Gets an listref of stable ids for all genes in the current db
143  Returntype : reference to a list of strings
144  Exceptions : none
145  Caller : general
146  Status : Stable
147 
148 =cut
149 
150 sub list_stable_ids {
151  my ($self) = @_;
152 
153  return $self->_list_dbIDs("gene", "stable_id");
154 }
155 
156 sub list_seq_region_ids {
157  my $self = shift;
158 
159  return $self->_list_seq_region_ids('gene');
160 }
161 
162 =head2 fetch_by_display_label
163 
164  Arg [1] : String $label - display label of gene to fetch
165  Example : my $gene = $geneAdaptor->fetch_by_display_label("BRCA2");
166  Description: Returns the gene which has the given display label or undef if
167  there is none. If there are more than 1, the gene on the
168  reference slice is reported or if none are on the reference,
169  the first one is reported.
170  Returntype : Bio::EnsEMBL::Gene
171  Exceptions : none
172  Caller : general
173  Status : Stable
174 
175 =cut
176 
177 sub fetch_by_display_label {
178  my $self = shift;
179  my $label = shift;
180 
181  my $constraint = "x.display_label = ? AND g.is_current = 1";
182  $self->bind_param_generic_fetch($label, SQL_VARCHAR);
183  my @genes = @{$self->generic_fetch($constraint)};
184  my $gene;
185  if (scalar(@genes) > 1) {
186  foreach my $gene_tmp (@genes) {
187  if ($gene_tmp->slice->is_reference) {
188  $gene = $gene_tmp;
189  }
190  last if ($gene);
191  }
192  if (!$gene) {
193  $gene = $genes[0];
194  }
195 
196  } elsif (scalar(@genes) == 1) {
197  $gene = $genes[0];
198  }
199 
200  return $gene;
201 } ## end sub fetch_by_display_label
202 
203 =head2 fetch_all_by_display_label
204 
205  Arg [1] : String $label - display label of genes to fetch
206  Example : my @genes = @{$geneAdaptor->fetch_all_by_display_label("PPP1R2P1")};
207  Description: Returns all genes which have the given display label or undef if
208  there are none.
209  Returntype : listref of Bio::EnsEMBL::Gene objects
210  Exceptions : none
211  Caller : general
212  Status : Stable
213 
214 =cut
215 
216 sub fetch_all_by_display_label {
217  my $self = shift;
218  my $label = shift;
219 
220  my $constraint = "x.display_label = ? AND g.is_current = 1";
221  $self->bind_param_generic_fetch($label, SQL_VARCHAR);
222  my $genes = $self->generic_fetch($constraint);
223 
224  return $genes;
225 }
226 
227 =head2 fetch_by_stable_id
228 
229  Arg [1] : String $id
230  The stable ID of the gene to retrieve
231  Example : $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000148944');
232  Description: Retrieves a gene object from the database via its stable id.
233  The gene will be retrieved in its native coordinate system (i.e.
234  in the coordinate system it is stored in the database). It may
235  be converted to a different coordinate system through a call to
236  transform() or transfer(). If the gene or exon is not found
237  undef is returned instead.
238  Returntype : Bio::EnsEMBL::Gene or undef
239  Exceptions : if we cant get the gene in given coord system
240  Caller : general
241  Status : Stable
242 
243 =cut
244 
245 sub fetch_by_stable_id {
246  my ($self, $stable_id) = @_;
247 
248  my $constraint = "g.stable_id = ? AND g.is_current = 1";
249  $self->bind_param_generic_fetch($stable_id, SQL_VARCHAR);
250  my ($gene) = @{$self->generic_fetch($constraint)};
251 
252  # If we didn't get anything back, desperately try to see if there's
253  # a version number in the stable_id
254  if(!defined($gene) && (my $vindex = rindex($stable_id, '.'))) {
255  $gene = $self->fetch_by_stable_id_version(substr($stable_id,0,$vindex),
256  substr($stable_id,$vindex+1));
257  }
258 
259  return $gene;
260 }
261 
262 =head2 fetch_by_stable_id_version
263 
264  Arg [1] : String $id
265  The stable ID of the gene to retrieve
266  Arg [2] : Integer $version
267  The version of the stable_id to retrieve
268  Example : $gene = $gene_adaptor->fetch_by_stable_id('ENSG00000148944', 14);
269  Description: Retrieves a gene object from the database via its stable id and version.
270  The gene will be retrieved in its native coordinate system (i.e.
271  in the coordinate system it is stored in the database). It may
272  be converted to a different coordinate system through a call to
273  transform() or transfer(). If the gene or exon is not found
274  undef is returned instead.
275  Returntype : Bio::EnsEMBL::Gene or undef
276  Exceptions : if we cant get the gene in given coord system
277  Caller : general
278  Status : Stable
279 
280 =cut
281 
282 sub fetch_by_stable_id_version {
283  my ($self, $stable_id, $version) = @_;
284 
285  # Enforce that version be numeric
286  return unless($version =~ /^\d+$/);
287 
288  my $constraint = "g.stable_id = ? AND g.version = ? AND g.is_current = 1";
289  $self->bind_param_generic_fetch($stable_id, SQL_VARCHAR);
290  $self->bind_param_generic_fetch($version, SQL_INTEGER);
291  my ($gene) = @{$self->generic_fetch($constraint)};
292 
293  return $gene;
294 }
295 
296 =head2 fetch_all_by_source
297 
298  Arg [1] : String $source
299  listref of $sources
300  The source of the gene to retrieve. You can have as an argument a reference
301  to a list of sources
302  Example : $genes = $gene_adaptor->fetch_all_by_source('havana');
303  $genes = $gene_adaptor->fetch_all_by_source(['ensembl', 'vega']);
304  Description: Retrieves an array reference of gene objects from the database via its source or sources.
305  The gene will be retrieved in its native coordinate system (i.e.
306  in the coordinate system it is stored in the database). It may
307  be converted to a different coordinate system through a call to
308  transform() or transfer(). If the gene or exon is not found
309  undef is returned instead.
310  Returntype : listref of Bio::EnsEMBL::Gene
311  Exceptions : if we cant get the gene in given coord system
312  Caller : general
313  Status : Stable
314 
315 =cut
316 
317 sub fetch_all_by_source {
318  my ($self, $source) = @_;
319  my @genes = @{$self->generic_fetch($self->source_constraint($source))};
320  return \@genes;
321 }
322 
323 =head2 source_constraint
324 
325  Arg [1] : String $source
326  listref of $sources
327  The source of the gene to retrieve. You can have as an argument a reference
328  to a list of sources
329  Description: Used internally to generate a SQL constraint to restrict a transcript query by source
330  Returntype : String
331  Exceptions : If source is not supplied
332  Caller : general
333  Status : Stable
334 
335 =cut
336 
337 sub source_constraint {
338  my ($self, $sources, $inline_variables) = @_;
339  my $constraint = "g.is_current = 1";
340  my $in_statement = $self->generate_in_constraint($sources, 'g.source', SQL_VARCHAR, $inline_variables);
341  $constraint .= " and $in_statement";
342  return $constraint;
343 }
344 
345 =head2 count_all_by_source
346 
347  Arg [1] : String $source
348  listref of $source
349  The source of the gene to retrieve. You can have as an argument a reference
350  to a list of sources
351  Example : $cnt = $gene_adaptor->count_all_by_source('ensembl');
352  $cnt = $gene_adaptor->count_all_by_source(['havana', 'vega']);
353  Description : Retrieves count of gene objects from the database via its source or sources.
354  Returntype : integer
355  Caller : general
356  Status : Stable
357 
358 =cut
359 
360 sub count_all_by_source {
361  my ($self, $source) = @_;
362  return $self->generic_count($self->source_constraint($source));
363 }
364 
365 =head2 fetch_all_by_biotype
366 
367  Arg [1] : String $biotype
368  listref of $biotypes
369  The biotype of the gene to retrieve. You can have as an argument a reference
370  to a list of biotypes
371  Example : $gene = $gene_adaptor->fetch_all_by_biotype('protein_coding');
372  $gene = $gene_adaptor->fetch_all_by_biotypes(['protein_coding', 'sRNA', 'miRNA']);
373  Description: Retrieves an array reference of gene objects from the database via its biotype or biotypes.
374  The genes will be retrieved in its native coordinate system (i.e.
375  in the coordinate system it is stored in the database). It may
376  be converted to a different coordinate system through a call to
377  transform() or transfer(). If the gene or exon is not found
378  undef is returned instead.
379  Returntype : listref of Bio::EnsEMBL::Gene
380  Exceptions : if we cant get the gene in given coord system
381  Caller : general
382  Status : Stable
383 
384 =cut
385 
386 sub fetch_all_by_biotype {
387  my ($self, $biotype) = @_;
388  my @genes = @{$self->generic_fetch($self->biotype_constraint($biotype))};
389  return \@genes;
390 }
391 
392 =head2 biotype_constraint
393 
394  Arg [1] : String $biotypes
395  listref of $biotypes
396  The biotype of the gene to retrieve. You can have as an argument a reference
397  to a list of biotypes
398  Description: Used internally to generate a SQL constraint to restrict a gene query by biotype
399  Returntype : String
400  Exceptions : If biotype is not supplied
401  Caller : general
402  Status : Stable
403 
404 =cut
405 
406 sub biotype_constraint {
407  my ($self, $biotypes, $inline_variables) = @_;
408  my $constraint = "g.is_current = 1";
409  my $in_statement = $self->generate_in_constraint($biotypes, 'g.biotype', SQL_VARCHAR, $inline_variables);
410  $constraint .= " and $in_statement";
411  return $constraint;
412 }
413 
414 =head2 count_all_by_biotype
415 
416  Arg [1] : String $biotype
417  listref of $biotypes
418  The biotype of the gene to retrieve. You can have as an argument a reference
419  to a list of biotypes
420  Example : $cnt = $gene_adaptor->count_all_by_biotype('protein_coding');
421  $cnt = $gene_adaptor->count_all_by_biotypes(['protein_coding', 'sRNA', 'miRNA']);
422  Description : Retrieves count of gene objects from the database via its biotype or biotypes.
423  Returntype : integer
424  Caller : general
425  Status : Stable
426 
427 =cut
428 
429 sub count_all_by_biotype {
430  my ($self, $biotype) = @_;
431  return $self->generic_count($self->biotype_constraint($biotype));
432 }
433 
434 sub fetch_all {
435  my ($self) = @_;
436  my $constraint = 'g.biotype != "LRG_gene" and g.is_current = 1';
437  my @genes = @{$self->generic_fetch($constraint)};
438  return \@genes;
439 }
440 
441 =head2 fetch_all_versions_by_stable_id
442 
443  Arg [1] : String $stable_id
444  The stable ID of the gene to retrieve
445  Example : $gene = $gene_adaptor->fetch_all_versions_by_stable_id
446  ('ENSG00000148944');
447  Description : Similar to fetch_by_stable_id, but retrieves all versions of a
448  gene stored in the database.
449  Returntype : listref of Bio::EnsEMBL::Gene
450  Exceptions : if we cant get the gene in given coord system
451  Caller : general
452  Status : At Risk
453 
454 =cut
455 
456 sub fetch_all_versions_by_stable_id {
457  my ($self, $stable_id) = @_;
458 
459  my $constraint = "g.stable_id = ?";
460  $self->bind_param_generic_fetch($stable_id, SQL_VARCHAR);
461  return $self->generic_fetch($constraint);
462 }
463 
464 =head2 fetch_by_exon_stable_id
465 
466  Arg [1] : String $id
467  The stable id of an exon of the gene to retrieve
468  Example : $gene = $gene_adptr->fetch_by_exon_stable_id('ENSE00000148944');
469  Description: Retrieves a gene object from the database via an exon stable id.
470  The gene will be retrieved in its native coordinate system (i.e.
471  in the coordinate system it is stored in the database). It may
472  be converted to a different coordinate system through a call to
473  transform() or transfer(). If the gene or exon is not found
474  undef is returned instead.
475  Returntype : Bio::EnsEMBL::Gene or undef
476  Exceptions : none
477  Caller : general
478  Status : Stable
479 
480 =cut
481 
482 sub fetch_by_exon_stable_id {
483  my ($self, $stable_id, $version) = @_;
484 
485  my $sql = qq(
486  SELECT t.gene_id
487  FROM transcript as t,
488  exon_transcript as et,
489  exon as e
490  WHERE t.transcript_id = et.transcript_id
491  AND et.exon_id = e.exon_id
492  AND e.stable_id = ?
493  AND e.is_current = 1
494  );
495 
496  my $sth = $self->prepare($sql);
497  $sth->bind_param(1, $stable_id, SQL_VARCHAR);
498  $sth->execute();
499 
500  my ($dbID) = $sth->fetchrow_array();
501 
502  return undef if (!defined($dbID));
503 
504  my $gene = $self->fetch_by_dbID($dbID);
505 
506  return $gene;
507 } ## end sub fetch_by_exon_stable_id
508 
509 =head2 fetch_all_by_domain
510 
511  Arg [1] : String $domain
512  The domain to fetch genes from
513  Example : my @genes = @{ $gene_adaptor->fetch_all_by_domain($domain) };
514  Description: Retrieves a listref of genes whose translation contain interpro
515  domain $domain. The genes are returned in their native coord
516  system (i.e. the coord_system they are stored in). If the coord
517  system needs to be changed, then tranform or transfer should be
518  called on the individual objects returned.
519  Returntype : list of Bio::EnsEMBL::Genes
520  Exceptions : none
521  Caller : domainview
522  Status : Stable
523 
524 =cut
525 
526 sub fetch_all_by_domain {
527  my ($self, $domain) = @_;
528 
529  throw("domain argument is required") unless ($domain);
530 
531  my $sth = $self->prepare(
532  qq(
533  SELECT tr.gene_id
534  FROM interpro i,
535  protein_feature pf,
536  transcript tr,
537  translation tl,
538  seq_region sr,
539  coord_system cs
540  WHERE cs.species_id = ?
541  AND cs.coord_system_id = sr.coord_system_id
542  AND sr.seq_region_id = tr.seq_region_id
543  AND tr.is_current = 1
544  AND tr.transcript_id = tl.transcript_id
545  AND tl.translation_id = pf.translation_id
546  AND pf.hit_name = i.id
547  AND i.interpro_ac = ?
548  GROUP BY tr.gene_id));
549 
550  $sth->bind_param(1, $self->species_id(), SQL_VARCHAR);
551  $sth->bind_param(2, $domain, SQL_VARCHAR);
552 
553  $sth->execute();
554 
555  my @array = @{$sth->fetchall_arrayref()};
556  $sth->finish();
557 
558  my @gene_ids = map { $_->[0] } @array;
559 
560  return $self->fetch_all_by_dbID_list(\@gene_ids);
561 } ## end sub fetch_all_by_domain
562 
563 =head2 fetch_all_by_Slice_and_external_dbname_link
564 
565  Arg [1] : Bio::EnsEMBL::Slice $slice
566  The slice to fetch genes on.
567  Arg [2] : (optional) string $logic_name
568  the logic name of the type of features to obtain
569  Arg [3] : (optional) boolean $load_transcripts
570  if true, transcripts will be loaded immediately
571  rather than lazy loaded later.
572  Arg [4] : String
573  Name of the external database to fetch the Genes by
574  Example : @genes = @{
575  $ga->fetch_all_by_Slice_and_external_dbname_link(
576  $slice, undef, undef, "HGNC" ) };
577  Description: Overrides superclass method to optionally load
578  transcripts immediately rather than lazy-loading them
579  later. This is more efficient when there are a lot
580  of genes whose transcripts are going to be used. The
581  genes are then filtered to return only those with
582  external database links of the type specified
583  Returntype : reference to list of genes
584  Exceptions : thrown if exon cannot be placed on transcript slice
585  Caller :
586  Status : Stable
587 
588 =cut
589 
590 sub fetch_all_by_Slice_and_external_dbname_link {
591  my ($self, $slice, $logic_name, $load_transcripts, $db_name) = @_;
592 
593  # Get the external_db_id(s) from the name.
594  my $dbentry_adaptor = $self->db()->get_DBEntryAdaptor();
595  my $external_db_ids = $dbentry_adaptor->get_external_db_ids($db_name, undef, 'ignore release');
596 
597  if (scalar(@{$external_db_ids}) == 0) {
598  my $external_db_names = $dbentry_adaptor->get_distinct_external_dbs();
599  my $available = join("\n", map { "\t${_}"} @{$external_db_names});
600  warning sprintf("Could not find external database " . "'%s' in the external_db table\n" . "Available are:\n%s", $db_name, $available);
601  return [];
602  }
603 
604  # Get the gene_ids for those with links.
605  my %linked_genes;
606 
607  foreach my $local_external_db_id (@{$external_db_ids}) {
608  my @linked_genes = $dbentry_adaptor->list_gene_ids_by_external_db_id($local_external_db_id);
609  $linked_genes{$_} = 1 for @linked_genes;
610  }
611 
612  # Get all the genes on the slice and filter by the gene ids list
613  my $genes = $self->fetch_all_by_Slice($slice, $logic_name, $load_transcripts);
614  my $genes_passed = [ grep { exists $linked_genes{$_->dbID()} } @{$genes} ];
615  return $genes_passed;
616 } ## end sub fetch_all_by_Slice_and_external_dbname_link
617 
618 =head2 fetch_all_by_Slice
619 
620  Arg [1] : Bio::EnsEMBL::Slice $slice
621  The slice to fetch genes on.
622  Arg [2] : (optional) string $logic_name
623  the logic name of the type of features to obtain
624  Arg [3] : (optional) boolean $load_transcripts
625  if true, transcripts will be loaded immediately rather than
626  lazy loaded later.
627  Arg [4] : (optional) string $source
628  the source name of the features to obtain.
629  Arg [5] : (optional) string biotype
630  the biotype of the features to obtain.
631  Example : @genes = @{$gene_adaptor->fetch_all_by_Slice()};
632  Description: Overrides superclass method to optionally load transcripts
633  immediately rather than lazy-loading them later. This
634  is more efficient when there are a lot of genes whose
635  transcripts are going to be used.
636  Returntype : reference to list of genes
637  Exceptions : thrown if exon cannot be placed on transcript slice
638  Caller : Slice::get_all_Genes
639  Status : Stable
640 
641 =cut
642 
643 sub fetch_all_by_Slice {
644  my ($self, $slice, $logic_name, $load_transcripts, $source, $biotype) = @_;
645 
646  my $constraint = 'g.is_current = 1';
647 
648  if (defined($source)) {
649  $constraint .= " and g.source = '$source'";
650  }
651  if (defined($biotype)) {
652  my $inline_variables = 1;
653  $constraint .= " and ".$self->generate_in_constraint($biotype, 'g.biotype', SQL_VARCHAR, $inline_variables);
654  }
655 
656  my $genes = $self->SUPER::fetch_all_by_Slice_constraint($slice, $constraint, $logic_name);
657 
658  # If there are 0 genes, still do lazy-loading.
659  if (!$load_transcripts || @$genes < 1) {
660  return $genes;
661  }
662 
663  # Preload all of the transcripts now, instead of lazy loading later,
664  # faster than one query per transcript.
665 
666  # First check if transcripts are already preloaded.
667  # FIXME: Should check all transcripts.
668  if (exists($genes->[0]->{'_transcript_array'})) {
669  return $genes;
670  }
671 
672  # Get extent of region spanned by transcripts.
673  my ($min_start, $max_end);
674  foreach my $g (@$genes) {
675  if (!defined($min_start) || $g->seq_region_start() < $min_start) {
676  $min_start = $g->seq_region_start();
677  }
678  if (!defined($max_end) || $g->seq_region_end() > $max_end) {
679  $max_end = $g->seq_region_end();
680  }
681  }
682 
683  my $ext_slice;
684 
685  if ($min_start >= $slice->start() && $max_end <= $slice->end()) {
686  $ext_slice = $slice;
687  } else {
688  my $sa = $self->db()->get_SliceAdaptor();
689  $ext_slice = $sa->fetch_by_region($slice->coord_system->name(), $slice->seq_region_name(), $min_start, $max_end, $slice->strand(), $slice->coord_system->version());
690  }
691 
692  # Associate transcript identifiers with genes.
693 
694  my %g_hash = map { $_->dbID => $_ } @{$genes};
695 
696  my $g_id_str = join(',', keys(%g_hash));
697 
698  my $sth = $self->prepare("SELECT gene_id, transcript_id " . "FROM transcript " . "WHERE gene_id IN ($g_id_str)");
699 
700  $sth->execute();
701 
702  my ($g_id, $tr_id);
703  $sth->bind_columns(\($g_id, $tr_id));
704 
705  my %tr_g_hash;
706 
707  while ($sth->fetch()) {
708  $tr_g_hash{$tr_id} = $g_hash{$g_id};
709  }
710 
711  my $ta = $self->db()->get_TranscriptAdaptor();
712  my $transcripts = $ta->fetch_all_by_Slice($ext_slice, 1, undef, sprintf("t.transcript_id IN (%s)", join(',', sort { $a <=> $b } keys(%tr_g_hash))));
713 
714  # Move transcripts onto gene slice, and add them to genes.
715  foreach my $tr (@{$transcripts}) {
716  if (!exists($tr_g_hash{$tr->dbID()})) { next }
717 
718  my $new_tr;
719  if ($slice != $ext_slice) {
720  $new_tr = $tr->transfer($slice);
721  if (!defined($new_tr)) {
722  throw("Unexpected. " . "Transcript could not be transfered onto Gene slice.");
723  }
724  } else {
725  $new_tr = $tr;
726  }
727 
728  $tr_g_hash{$tr->dbID()}->add_Transcript($new_tr);
729  }
730 
731  return $genes;
732 } ## end sub fetch_all_by_Slice
733 
734 =head2 count_all_by_Slice
735 
736  Arg [1] : Bio::EnsEMBL::Slice $slice
737  The slice to count genes on.
738  Arg [2] : (optional) biotype(s) string or arrayref of strings
739  the biotype of the features to count.
740  Arg [1] : (optional) string $source
741  the source name of the features to count.
742  Example : $cnt = $gene_adaptor->count_all_by_Slice();
743  Description: Method to count genes on a given slice, filtering by biotype and source
744  Returntype : integer
745  Exceptions : thrown if exon cannot be placed on transcript slice
746  Status : Stable
747  Caller : general
748 =cut
749 
750 sub count_all_by_Slice {
751  my ($self, $slice, $biotype, $source) = @_;
752 
753  my $constraint = 'g.is_current = 1';
754  if (defined($source)) {
755  $constraint .= " and g.source = '$source'";
756  }
757  if (defined($biotype)) {
758  $constraint .= " and " . $self->biotype_constraint($biotype);
759  }
760 
761  return $self->count_by_Slice_constraint($slice, $constraint);
762 }
763 
764 =head2 fetch_by_transcript_id
765 
766  Arg [1] : Int $trans_id
767  Unique database identifier for the transcript whose gene should
768  be retrieved. The gene is returned in its native coord
769  system (i.e. the coord_system it is stored in). If the coord
770  system needs to be changed, then tranform or transfer should
771  be called on the returned object. undef is returned if the
772  gene or transcript is not found in the database.
773  Example : $gene = $gene_adaptor->fetch_by_transcript_id(1241);
774  Description: Retrieves a gene from the database via the database identifier
775  of one of its transcripts.
776  Returntype : Bio::EnsEMBL::Gene
777  Exceptions : none
778  Caller : general
779  Status : Stable
780 
781 =cut
782 
783 sub fetch_by_transcript_id {
784  my ($self, $trans_id) = @_;
785 
786  # this is a cheap SQL call
787  my $sth = $self->prepare(
788  qq(
789  SELECT tr.gene_id
790  FROM transcript tr
791  WHERE tr.transcript_id = ?
792  ));
793 
794  $sth->bind_param(1, $trans_id, SQL_INTEGER);
795  $sth->execute();
796 
797  my ($geneid) = $sth->fetchrow_array();
798 
799  $sth->finish();
800 
801  return undef if (!defined $geneid);
802 
803  my $gene = $self->fetch_by_dbID($geneid);
804  return $gene;
805 }
806 
807 =head2 fetch_by_transcript_stable_id
808 
809  Arg [1] : string $trans_stable_id
810  transcript stable ID whose gene should be retrieved
811  Example : my $gene = $gene_adaptor->fetch_by_transcript_stable_id
812  ('ENST0000234');
813  Description: Retrieves a gene from the database via the stable ID of one of
814  its transcripts
815  Returntype : Bio::EnsEMBL::Gene
816  Exceptions : none
817  Caller : general
818  Status : Stable
819 
820 =cut
821 
822 sub fetch_by_transcript_stable_id {
823  my ($self, $trans_stable_id) = @_;
824 
825  my $sth = $self->prepare(
826  qq(
827  SELECT gene_id
828  FROM transcript
829  WHERE stable_id = ?
830  AND is_current = 1
831  ));
832 
833  $sth->bind_param(1, $trans_stable_id, SQL_VARCHAR);
834  $sth->execute();
835 
836  my ($geneid) = $sth->fetchrow_array();
837  $sth->finish;
838 
839  return undef if (!defined $geneid);
840 
841  my $gene = $self->fetch_by_dbID($geneid);
842  return $gene;
843 }
844 
845 =head2 fetch_by_translation_stable_id
846 
847  Arg [1] : String $translation_stable_id
848  The stable id of a translation of the gene to be obtained
849  Example : my $gene = $gene_adaptor->fetch_by_translation_stable_id
850  ('ENSP00000278194');
851  Description: Retrieves a gene via the stable id of one of its translations.
852  Returntype : Bio::EnsEMBL::Gene
853  Exceptions : none
854  Caller : general
855  Status : Stable
856 
857 =cut
858 
859 sub fetch_by_translation_stable_id {
860  my ($self, $translation_stable_id) = @_;
861 
862  my $sth = $self->prepare(
863  qq(
864  SELECT tr.gene_id
865  FROM transcript tr,
866  translation tl
867  WHERE tl.stable_id = ?
868  AND tr.transcript_id = tl.transcript_id
869  AND tr.is_current = 1
870  ));
871 
872  $sth->bind_param(1, $translation_stable_id, SQL_VARCHAR);
873  $sth->execute();
874 
875  my ($geneid) = $sth->fetchrow_array();
876  $sth->finish;
877  if (!defined $geneid) {
878  return undef;
879  }
880  return $self->fetch_by_dbID($geneid);
881 }
882 
883 =head2 fetch_all_by_external_name
884 
885  Arg [1] : String $external_name
886  The external identifier for the gene to be obtained
887  Arg [2] : (optional) String $external_db_name
888  The name of the external database from which the
889  identifier originates.
890  Arg [3] : Boolean override. Force SQL regex matching for users
891  who really do want to find all 'NM%'
892  Example : @genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA2')}
893  @many_genes = @{$gene_adaptor->fetch_all_by_external_name('BRCA%')}
894  Description: Retrieves a list of genes with an external database
895  identifier $external_name. The genes returned are in
896  their native coordinate system, i.e. in the coordinate
897  system they are stored in the database in. If another
898  coordinate system is required then the Gene::transfer or
899  Gene::transform method can be used.
900  SQL wildcards % and _ are supported in the $external_name,
901  but their use is somewhat restricted for performance reasons.
902  Users that really do want % and _ in the first three characters
903  should use argument 3 to prevent optimisations
904  Returntype : listref of Bio::EnsEMBL::Gene
905  Exceptions : none
906  Caller : goview, general
907  Status : Stable
908 
909 =cut
910 
911 sub fetch_all_by_external_name {
912  my ($self, $external_name, $external_db_name, $override) = @_;
913 
914  my $entryAdaptor = $self->db->get_DBEntryAdaptor();
915 
916  my @ids = $entryAdaptor->list_gene_ids_by_extids($external_name, $external_db_name, $override);
917 
918  my %genes_by_dbIDs = map { $_->dbID(), $_ } @{$self->fetch_all_by_dbID_list(\@ids)};
919 
920  my @features = map { $genes_by_dbIDs{$_} } @ids;
921  my @reference = grep { $_->slice()->is_reference() } @features;
922  my @non_reference = grep { ! $_->slice()->is_reference() } @features;
923  return [ @reference, @non_reference ];
924 }
925 
926 =head2 fetch_all_by_description
927 
928  Arg [1] : String of description
929  Example : $gene_list = $gene_adaptor->fetch_all_by_description('RNA%');
930  Description: Fetches genes by their textual description. Fully supports SQL
931  wildcards, since getting an exact hit is unlikely.
932  Returntype : listref of Bio::EnsEMBL::Gene
933 
934 =cut
935 
936 sub fetch_all_by_description {
937  my ($self,$description) = @_;
938 
939  my $constraint = "g.description LIKE ?";
940  $self->bind_param_generic_fetch($description, SQL_VARCHAR);
941  return $self->generic_fetch($constraint);
942 }
943 
944 =head2 fetch_all_by_GOTerm
945 
947  The GO term for which genes should be fetched.
948 
949  Example: @genes = @{
950  $gene_adaptor->fetch_all_by_GOTerm(
951  $go_adaptor->fetch_by_accession('GO:0030326') ) };
952 
953  Description : Retrieves a list of genes that are associated with
954  the given GO term, or with any of its descendent
955  GO terms. The genes returned are in their native
956  coordinate system, i.e. in the coordinate system
957  in which they are stored in the database. If
958  another coordinate system is required then the
959  Gene::transfer or Gene::transform method can be
960  used.
961 
962  Return type : listref of Bio::EnsEMBL::Gene
963  Exceptions : Throws of argument is not a GO term
964  Caller : general
965  Status : Stable
966 
967 =cut
968 
969 sub fetch_all_by_GOTerm {
970  my ($self, $term) = @_;
971 
972  assert_ref($term, 'Bio::EnsEMBL::OntologyTerm');
973  if ($term->ontology() ne 'GO') {
974  throw('Argument is not a GO term');
975  }
976 
977  my $entryAdaptor = $self->db->get_DBEntryAdaptor();
978 
979  my %unique_dbIDs;
980  foreach my $accession (map { $_->accession() } ($term, @{$term->descendants()})) {
981  my @ids = $entryAdaptor->list_gene_ids_by_extids($accession, 'GO');
982  foreach my $dbID (@ids) { $unique_dbIDs{$dbID} = 1 }
983  }
984 
985  my @result = @{$self->fetch_all_by_dbID_list([sort { $a <=> $b } keys(%unique_dbIDs)])};
986 
987  return \@result;
988 }
989 
990 =head2 fetch_all_by_ontology_linkage_type
991 
992  Arg [1] : (optional) string $db_name
993  The database name to search for. Defaults to GO
994  Arg [2] : string $linkage_type
995  Linkage type to search for e.g. IMP
996 
997  Example: my $genes = $gene_adaptor->fetch_all_by_ontology_linkage_type('GO', 'IMP');
998  my $genes = $gene_adaptor->fetch_all_by_ontology_linkage_type(undef, 'IMP');
999 
1000  Description : Retrieves a list of genes that are associated with
1001  the given ontology linkage type. The genes returned
1002  are in their native coordinate system, i.e. in the
1003  coordinate system in which they are stored in the database.
1004  Return type : listref of Bio::EnsEMBL::Gene
1005  Exceptions : Throws if a linkage type is not given
1006  Caller : general
1007  Status : Stable
1008 
1009 =cut
1010 
1011 sub fetch_all_by_ontology_linkage_type {
1012  my ($self, $db_name, $linkage_type) = @_;
1013  $db_name = 'GO' if ! defined $db_name;
1014  throw "No linkage type given" if ! defined $linkage_type;
1015 
1016  my $dbentry_adaptor = $self->db->get_DBEntryAdaptor();
1017  my $external_db_ids = $dbentry_adaptor->get_external_db_ids($db_name, undef, 'ignore release');
1018  if (scalar(@{$external_db_ids}) == 0) {
1019  warning sprintf("Could not find external database '%s' in the external_db table", $db_name);
1020  return [];
1021  }
1022 
1023  # Get the gene_ids for those with links.
1024  my %unique_dbIDs;
1025  foreach my $local_external_db_id (@{$external_db_ids}) {
1026  my @gene_ids = $dbentry_adaptor->list_gene_ids_by_external_db_id($local_external_db_id, $linkage_type);
1027  $unique_dbIDs{$_} = 1 for @gene_ids;
1028  }
1029 
1030  # Get all the genes and return
1031  return $self->fetch_all_by_dbID_list([keys %unique_dbIDs]);
1032 }
1033 
1034 =head2 fetch_all_by_GOTerm_accession
1035 
1036  Arg [1] : String
1037  The GO term accession for which genes should be
1038  fetched.
1039 
1040  Example :
1041 
1042  @genes =
1043  @{ $gene_adaptor->fetch_all_by_GOTerm_accession(
1044  'GO:0030326') };
1045 
1046  Description : Retrieves a list of genes that are associated with
1047  the given GO term, or with any of its descendent
1048  GO terms. The genes returned are in their native
1049  coordinate system, i.e. in the coordinate system
1050  in which they are stored in the database. If
1051  another coordinate system is required then the
1052  Gene::transfer or Gene::transform method can be
1053  used.
1054 
1055  Return type : listref of Bio::EnsEMBL::Gene
1056  Exceptions : Throws of argument is not a GO term accession
1057  Caller : general
1058  Status : Stable
1059 
1060 =cut
1061 
1062 sub fetch_all_by_GOTerm_accession {
1063  my ($self, $accession) = @_;
1064 
1065  if ($accession !~ /^GO:/) {
1066  throw('Argument is not a GO term accession');
1067  }
1068 
1069  my $goAdaptor = Bio::EnsEMBL::Registry->get_adaptor('Multi', 'Ontology', 'OntologyTerm');
1070 
1071  my $term = $goAdaptor->fetch_by_accession($accession);
1072 
1073  return $self->fetch_all_by_GOTerm($term);
1074 }
1075 
1076 =head2 fetch_all_alt_alleles
1077 
1078  Arg [1] : Bio::EnsEMBL::Gene $gene
1079  The gene to fetch alternative alleles for
1080  Arg [2] : Boolean (optional)
1081  Ask the method to warn about any gene without an alt allele
1082  group. Defaults to false
1083  Example : my @alt_genes = @{ $gene_adaptor->fetch_all_alt_alleles($gene) };
1084  foreach my $alt_gene (@alt_genes) {
1085  print "Alternate allele: " . $alt_gene->stable_id() . "\n" ;
1086  }
1087  Description: Retrieves genes which are alternate alleles to a provided gene.
1088  Alternate alleles in Ensembl are genes which are similar and are
1089  on an alternative haplotype of the same region. There are not
1090  currently very many of these. This method will return a
1091  reference to an empty list if no alternative alleles are found.
1092  Returntype : ArrayRef of Bio::EnsEMBL::Gene objects
1093  Exceptions : throw if incorrect arg provided
1094  warning if gene arg does not have an entry in an alt allele and if
1095  the warn flag is true
1096  Caller : Gene::get_all_alt_alleles
1097  Status : Stable
1098 
1099 =cut
1100 
1101 sub fetch_all_alt_alleles {
1102  my $self = shift;
1103  my $gene = shift;
1104  my $warn = shift;
1105 
1106  if (!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
1107  throw('Bio::EnsEMBL::Gene argument is required');
1108  }
1109 
1110  my $gene_id = $gene->dbID();
1111 
1112  if (!$gene_id) {
1113  warning('Cannot retrieve alternate alleles for gene without dbID');
1114  return [];
1115  }
1116 
1117  my $aaga = $self->db->get_adaptor('AltAlleleGroup');
1118  my $aag = $aaga->fetch_by_gene_id($gene->dbID);
1119  unless ($aag) {
1120  if ($warn) {
1121  warning("Supplied gene has no alternative alleles");
1122  }
1123  return [];
1124  }
1125  # query for all alternative genes. do not filter
1126  # the representative but do filter this gene out
1127  return $aag->get_all_Genes(undef, [$gene]);
1128 } ## end sub fetch_all_alt_alleles
1129 
1130 =head2 is_ref
1131 
1132  Arg [1] : Gene dbID
1133  Description: Used to determine whether a given Gene is the representative
1134  Gene of an alt allele group. If it does not have an alternative
1135  allele that is more representative, then this ID will be said to
1136  be representative.
1137  Returntype : Boolean - True for yes or no alternatives
1138 
1139 =cut
1140 
1141 sub is_ref {
1142  my ($self, $gene_id) = @_;
1143  my $aag = $self->db->get_adaptor('AltAlleleGroup')->fetch_by_gene_id($gene_id);
1144  if (defined($aag)) {
1145  if ($aag->rep_Gene_id == $gene_id) {
1146  return 1;
1147  } else {
1148  return 0;
1149  }
1150  } else {
1151  return 1;
1152  }
1153  throw("Unhandled circumstance in GeneAdaptor->is_ref");
1154 }
1155 
1156 =head2 store_alt_alleles
1157 
1158 
1159  Arg [1] : reference to list of Bio::EnsEMBL::Genes $genes
1160  Example : $gene_adaptor->store_alt_alleles([$gene1, $gene2, $gene3]);
1161  Description: This method creates a group of alternative alleles (i.e. locus)
1162  from a set of genes. The genes should be genes from alternate
1163  haplotypes which are similar. The genes must already be stored
1164  in this database. WARNING - now that more fine-grained support
1165  for alt_alleles has been implemented, this method is rather coarse.
1166  Consider working directly with AltAlleleGroup and
1167  AltAlleleGroupAdaptor.
1168  Returntype : int alt_allele_group_id or undef if no alt_alleles were stored
1169  Exceptions : throw on incorrect arguments
1170  throw on sql error (e.g. duplicate unique id)
1171  Caller : general
1172  Status : Stable
1173 
1174 =cut
1175 
1176 sub store_alt_alleles {
1177  my $self = shift;
1178  my $genes = shift;
1179 
1180  warning "Unsupported. Switch to using AltAlleleGroupAdaptor::store() and AltAlleleGroups";
1181 
1182  if (!ref($genes) eq 'ARRAY') {
1183  throw('List reference of Bio::EnsEMBL::Gene argument expected.');
1184  }
1185  my @genes = @$genes;
1186  my $num_genes = scalar(@genes);
1187  if ($num_genes < 2) {
1188  warning('At least 2 genes must be provided to construct alternative alleles (gene id: ' . $genes[0]->dbID() . '). Ignoring.');
1189  return;
1190  }
1191 
1192  my $allele_list;
1193  foreach my $gene (@$genes) {
1194  my $aa_record = [];
1195  push @$aa_record, $gene->dbID;
1196  my %type = {};
1197  if ($gene->slice->is_reference()) {
1198  $type{'IS_REPRESENTATIVE'} = 1;
1199  }
1200  push @$aa_record, \%type;
1201  push @$allele_list, $aa_record;
1202  }
1203 
1204  my $aag = Bio::EnsEMBL::AltAlleleGroup->new(
1205  -MEMBERS => $allele_list,
1206  );
1207  if (scalar( @{$aag->get_all_members_with_type('IS_REPRESENTATIVE')} ) != 1) {
1208  warning('Inappropriate number of alternative alleles on the reference sequence. Ignoring.');
1209  return;
1210  }
1211 
1212  my $aaga = $self->db->get_adaptor('AltAlleleGroup');
1213  return $aaga->store($aag);
1214 } ## end sub store_alt_alleles
1215 
1216 =head2 store
1217 
1218  Arg [1] : Bio::EnsEMBL::Gene $gene
1219  The gene to store in the database
1220  Arg [2] : ignore_release in xrefs [default 1] set to 0 to use release info
1221  in external database references
1222  Arg [3] : prevent coordinate recalculation if you are persisting
1223  transcripts with this gene
1224  Arg [4] : prevent copying supporting features across exons
1225  increased speed for lost accuracy
1226  Example : $gene_adaptor->store($gene);
1227  Description: Stores a gene in the database.
1228  Returntype : the database identifier (dbID) of the newly stored gene
1229  Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene or if
1230  $gene does not have an analysis object
1231  Caller : general
1232  Status : Stable
1233 
1234 =cut
1235 
1236 sub store {
1237  my ($self, $gene, $ignore_release, $skip_recalculating_coordinates, $skip_exon_sf) = @_;
1238 
1239  if (!ref $gene || !$gene->isa('Bio::EnsEMBL::Gene')) {
1240  throw("Must store a gene object, not a $gene");
1241  }
1242  if (!defined($ignore_release)) {
1243  $ignore_release = 1;
1244  }
1245  my $db = $self->db();
1246 
1247  if ($gene->is_stored($db)) {
1248  return $gene->dbID();
1249  }
1250 
1251  # ensure coords are correct before storing
1252  $gene->recalculate_coordinates();
1253 
1254  my $analysis = $gene->analysis();
1255  throw("Genes must have an analysis object.") if (!defined($analysis));
1256 
1257  my $analysis_id;
1258  if ($analysis->is_stored($db)) {
1259  $analysis_id = $analysis->dbID();
1260  } else {
1261  $analysis_id = $db->get_AnalysisAdaptor->store($analysis);
1262  }
1263 
1264  my $type = $gene->get_Biotype->name;
1265 
1266  # default to is_current = 1 if this attribute is not set
1267  my $is_current = $gene->is_current;
1268  $is_current = 1 unless (defined($is_current));
1269 
1270  my $original = $gene;
1271  my $original_transcripts = $gene->get_all_Transcripts();
1272 
1273  my $seq_region_id;
1274 
1275  ($gene, $seq_region_id) = $self->_pre_store($gene);
1276 
1277  my @columns = qw(
1278  biotype
1279  analysis_id
1280  seq_region_id
1281  seq_region_start
1282  seq_region_end
1283  seq_region_strand
1284  description
1285  source
1286  is_current
1287  canonical_transcript_id
1288  );
1289 
1290  my @canned_columns;
1291  my @canned_values;
1292 
1293  if (defined($gene->stable_id)) {
1294  push @columns, 'stable_id', 'version';
1295 
1296  my $created = $self->db->dbc->from_seconds_to_date($gene->created_date());
1297  my $modified = $self->db->dbc->from_seconds_to_date($gene->modified_date());
1298 
1299  if ($created) {
1300  push @canned_columns, 'created_date';
1301  push @canned_values, $created;
1302  }
1303  if ($modified) {
1304  push @canned_columns, 'modified_date';
1305  push @canned_values, $modified;
1306  }
1307 
1308  }
1309 
1310  my $columns = join(', ', @columns, @canned_columns);
1311  my $values = join(', ', ('?') x @columns, @canned_values);
1312  my $store_gene_sql = qq(
1313  INSERT INTO gene ( $columns ) VALUES ( $values )
1314  );
1315 
1316  my $sth = $self->prepare($store_gene_sql);
1317  $sth->bind_param(1, $type, SQL_VARCHAR);
1318  $sth->bind_param(2, $analysis_id, SQL_INTEGER);
1319  $sth->bind_param(3, $seq_region_id, SQL_INTEGER);
1320  $sth->bind_param(4, $gene->start(), SQL_INTEGER);
1321  $sth->bind_param(5, $gene->end(), SQL_INTEGER);
1322  $sth->bind_param(6, $gene->strand(), SQL_TINYINT);
1323  $sth->bind_param(7, $gene->description(), SQL_LONGVARCHAR);
1324  $sth->bind_param(8, $gene->source(), SQL_VARCHAR);
1325  $sth->bind_param(9, $is_current, SQL_TINYINT);
1326 
1327  # Canonical transcript ID will be updated later.
1328  # Set it to zero for now.
1329  $sth->bind_param(10, 0, SQL_TINYINT);
1330 
1331 
1332  if (defined($gene->stable_id)) {
1333 
1334  $sth->bind_param(11, $gene->stable_id, SQL_VARCHAR);
1335  $sth->bind_param(12, $gene->version, SQL_INTEGER);
1336  }
1337 
1338  $sth->execute();
1339  $sth->finish();
1340 
1341  my $gene_dbID = $self->last_insert_id('gene_id', undef, 'gene');
1342 
1343  # store the dbentries associated with this gene
1344  my $dbEntryAdaptor = $db->get_DBEntryAdaptor();
1345 
1346  foreach my $dbe (@{$gene->get_all_DBEntries}) {
1347  $dbEntryAdaptor->store($dbe, $gene_dbID, "Gene", $ignore_release);
1348  }
1349 
1350  # We allow transcripts not to share equal exons and instead have
1351  # copies. For the database we still want sharing though, to have
1352  # easier time with stable ids. So we need to have a step to merge
1353  # exons together before store.
1354  my %exons;
1355 
1356  foreach my $trans (@{$gene->get_all_Transcripts}) {
1357  foreach my $e (@{$trans->get_all_Exons}) {
1358  my $key = $e->hashkey();
1359  if (exists $exons{$key}) {
1360  $trans->swap_exons($e, $exons{$key}, $skip_exon_sf);
1361  } else {
1362  $exons{$key} = $e;
1363  }
1364  }
1365  }
1366 
1367  my $transcript_adaptor = $db->get_TranscriptAdaptor();
1368 
1369  my $transcripts = $gene->get_all_Transcripts();
1370 
1371  my $new_canonical_transcript_id;
1372  for (my $i = 0; $i < @$transcripts; $i++) {
1373  my $new = $transcripts->[$i];
1374  my $old = $original_transcripts->[$i];
1375 
1376  my $new_dbID = $transcript_adaptor->store($new, $gene_dbID, $analysis_id, $skip_recalculating_coordinates);
1377  $new = $transcript_adaptor->fetch_by_dbID($new_dbID);
1378 
1379  if ($new) {
1380  if (!defined($new_canonical_transcript_id) && $new->is_canonical()) {
1381  $new_canonical_transcript_id = $new->dbID();
1382  }
1383 
1384  # update the original transcripts since we may have made copies of
1385  # them by transforming the gene
1386  $old->dbID($new->dbID());
1387  $old->adaptor($new->adaptor());
1388 
1389  if ($new->translation) {
1390  $old->translation->dbID($new->translation()->dbID);
1391  $old->translation->adaptor($new->translation()->adaptor);
1392  }
1393  }
1394  }
1395 
1396  if (defined($new_canonical_transcript_id)) {
1397  # Now the canonical transcript has been stored, so update the
1398  # canonical_transcript_id of this gene with the new dbID.
1399  my $sth = $self->prepare(
1400  q(
1401  UPDATE gene
1402  SET canonical_transcript_id = ?
1403  WHERE gene_id = ?)
1404  );
1405 
1406  $sth->bind_param(1, $new_canonical_transcript_id, SQL_INTEGER);
1407  $sth->bind_param(2, $gene_dbID, SQL_INTEGER);
1408 
1409  $sth->execute();
1410  $sth->finish();
1411 
1412  my $transcript_adaptor = $db->get_TranscriptAdaptor();
1413  $transcript_adaptor->update_canonical_attribute($new_canonical_transcript_id);
1414  }
1415 
1416  # update gene to point to display xref if it is set
1417  if (my $display_xref = $gene->display_xref) {
1418  my $dxref_id;
1419  if ($display_xref->is_stored($db)) {
1420  $dxref_id = $display_xref->dbID();
1421  } else {
1422  $dxref_id = $dbEntryAdaptor->exists($display_xref);
1423  }
1424 
1425  if (defined($dxref_id)) {
1426  my $sth = $self->prepare("UPDATE gene SET display_xref_id = ? WHERE gene_id = ?");
1427  $sth->bind_param(1, $dxref_id, SQL_INTEGER);
1428  $sth->bind_param(2, $gene_dbID, SQL_INTEGER);
1429  $sth->execute();
1430  $sth->finish();
1431  $display_xref->dbID($dxref_id);
1432  $display_xref->adaptor($dbEntryAdaptor);
1433  $display_xref->dbID($dxref_id);
1434  $display_xref->adaptor($dbEntryAdaptor);
1435  } else {
1436  warning("Display_xref " . $display_xref->dbname() . ":" . $display_xref->display_id() . " is not stored in database.\n" . "Not storing relationship to this gene.");
1437  $display_xref->dbID(undef);
1438  $display_xref->adaptor(undef);
1439  }
1440  }
1441 
1442  # store gene attributes if there are any
1443  my $attr_adaptor = $db->get_AttributeAdaptor();
1444  $attr_adaptor->store_on_Gene($gene_dbID, $gene->get_all_Attributes);
1445 
1446  # set the adaptor and dbID on the original passed in gene not the
1447  # transfered copy
1448  $original->adaptor($self);
1449  $original->dbID($gene_dbID);
1450 
1451  return $gene_dbID;
1452 } ## end sub store
1453 
1454 =head2 remove
1455 
1456  Arg [1] : Bio::EnsEMBL::Gene $gene
1457  the gene to remove from the database
1458  Example : $gene_adaptor->remove($gene);
1459  Description: Removes a gene completely from the database. All associated
1460  transcripts, exons, stable_identifiers, descriptions, etc.
1461  are removed as well. Use with caution!
1462  Returntype : none
1463  Exceptions : throw on incorrect arguments
1464  warning if gene is not stored in this database
1465  Caller : general
1466  Status : Stable
1467 
1468 =cut
1469 
1470 sub remove {
1471  my $self = shift;
1472  my $gene = shift;
1473 
1474  if (!ref($gene) || !$gene->isa('Bio::EnsEMBL::Gene')) {
1475  throw("Bio::EnsEMBL::Gene argument expected.");
1476  }
1477 
1478  if (!$gene->is_stored($self->db())) {
1479  warning("Cannot remove gene " . $gene->dbID() . ". Is not stored in " . "this database.");
1480  return;
1481  }
1482 
1483  # remove all object xrefs associated with this gene
1484 
1485  my $dbe_adaptor = $self->db()->get_DBEntryAdaptor();
1486  foreach my $dbe (@{$gene->get_all_DBEntries()}) {
1487  $dbe_adaptor->remove_from_object($dbe, $gene, 'Gene');
1488  }
1489 
1490  # remove all alternative allele entries associated with this gene
1491  my $sth = $self->prepare("DELETE FROM alt_allele WHERE gene_id = ?");
1492  $sth->bind_param(1, $gene->dbID, SQL_INTEGER);
1493  $sth->execute();
1494  $sth->finish();
1495 
1496  # remove the attributes associated with this transcript
1497  my $attrib_adaptor = $self->db->get_AttributeAdaptor;
1498  $attrib_adaptor->remove_from_Gene($gene);
1499 
1500  # remove all of the transcripts associated with this gene
1501  my $transcriptAdaptor = $self->db->get_TranscriptAdaptor();
1502  foreach my $trans (@{$gene->get_all_Transcripts()}) {
1503  $transcriptAdaptor->remove($trans);
1504  }
1505 
1506  # remove this gene from the database
1507 
1508  $sth = $self->prepare("DELETE FROM gene WHERE gene_id = ? ");
1509  $sth->bind_param(1, $gene->dbID, SQL_INTEGER);
1510  $sth->execute();
1511  $sth->finish();
1512 
1513  # unset the gene identifier and adaptor thereby flagging it as unstored
1514 
1515  $gene->dbID(undef);
1516  $gene->adaptor(undef);
1517 
1518  return;
1519 } ## end sub remove
1520 
1521 =head2 get_Interpro_by_geneid
1522 
1523  Arg [1] : String $gene_stable_id
1524  The stable ID of the gene to obtain
1525  Example : @i = @{
1526  $gene_adaptor->get_Interpro_by_geneid(
1527  $gene->stable_id() ) };
1528  Description: Gets interpro accession numbers by gene stable id. A hack really
1529  - we should have a much more structured system than this.
1530  Returntype : listref of strings (Interpro_acc:description)
1531  Exceptions : none
1532  Caller : domainview
1533  Status : Stable
1534 
1535 =cut
1536 
1537 sub get_Interpro_by_geneid {
1538  my ($self, $gene_stable_id) = @_;
1539 
1540  my $sql = qq(
1541  SELECT i.interpro_ac,
1542  x.description
1543  FROM transcript t,
1544  translation tl,
1545  protein_feature pf,
1546  interpro i,
1547  xref x,
1548  gene g
1549  WHERE g.stable_id = ?
1550  AND t.gene_id = g.gene_id
1551  AND t.is_current = 1
1552  AND tl.transcript_id = t.transcript_id
1553  AND tl.translation_id = pf.translation_id
1554  AND i.id = pf.hit_name
1555  AND i.interpro_ac = x.dbprimary_acc);
1556 
1557  my $sth = $self->prepare($sql);
1558 
1559  $sth->bind_param(1, $gene_stable_id, SQL_VARCHAR);
1560 
1561  $sth->execute;
1562 
1563  my @out;
1564  my %h;
1565  while ((my $arr = $sth->fetchrow_arrayref())) {
1566  if ($h{$arr->[0]}) { next; }
1567  $h{$arr->[0]} = 1;
1568  my $string = $arr->[0] . ":" . $arr->[1];
1569  push(@out, $string);
1570  }
1571 
1572  return \@out;
1573 } ## end sub get_Interpro_by_geneid
1574 
1575 =head2 update
1576 
1577  Arg [1] : Bio::EnsEMBL::Gene $gene
1578  The gene to update
1579  Example : $gene_adaptor->update($gene);
1580  Description: Updates the type, analysis, display_xref, is_current and
1581  description of a gene in the database.
1582  Returntype : None
1583  Exceptions : thrown if the $gene is not a Bio::EnsEMBL::Gene
1584  Caller : general
1585  Status : Stable
1586 
1587 =cut
1588 
1589 sub update {
1590  my ($self, $gene) = @_;
1591  my $update = 0;
1592 
1593  if (!defined $gene || !ref $gene || !$gene->isa('Bio::EnsEMBL::Gene')) {
1594  throw("Must update a gene object, not a $gene");
1595  }
1596 
1597  # Get old canonical transcript id
1598  my $sth = $self->prepare("SELECT canonical_transcript_id FROM gene WHERE gene_id=?");
1599  $sth->execute($gene->dbID());
1600  my ($old_canonical_transcript_id) = $sth->fetchrow_array();
1601  $sth->finish();
1602 
1603  my $update_gene_sql = qq(
1604  UPDATE gene
1605  SET stable_id = ?,
1606  biotype = ?,
1607  analysis_id = ?,
1608  display_xref_id = ?,
1609  description = ?,
1610  is_current = ?,
1611  canonical_transcript_id = ?,
1612  version = ?
1613  WHERE gene_id = ?
1614  );
1615 
1616  my $display_xref = $gene->display_xref();
1617  my $display_xref_id;
1618 
1619  if ($display_xref && $display_xref->dbID()) {
1620  $display_xref_id = $display_xref->dbID();
1621  } else {
1622  $display_xref_id = undef;
1623  }
1624 
1625  $sth = $self->prepare($update_gene_sql);
1626 
1627  $sth->bind_param(1, $gene->stable_id(), SQL_VARCHAR);
1628  $sth->bind_param(2, $gene->get_Biotype->name, SQL_VARCHAR);
1629  $sth->bind_param(3, $gene->analysis->dbID(), SQL_INTEGER);
1630  $sth->bind_param(4, $display_xref_id, SQL_INTEGER);
1631  $sth->bind_param(5, $gene->description(), SQL_VARCHAR);
1632  $sth->bind_param(6, $gene->is_current(), SQL_TINYINT);
1633 
1634  if (defined($gene->canonical_transcript())) {
1635  $sth->bind_param(7, $gene->canonical_transcript()->dbID(), SQL_INTEGER);
1636  } else {
1637  $sth->bind_param(7, 0, SQL_INTEGER);
1638  }
1639  $sth->bind_param(8, $gene->version(), SQL_TINYINT);
1640  $sth->bind_param(9, $gene->dbID(), SQL_INTEGER);
1641 
1642  $sth->execute();
1643 
1644  if (defined($gene->canonical_transcript())) {
1645  my $transcript_adaptor = $self->db()->get_TranscriptAdaptor();
1646  $transcript_adaptor->update_canonical_attribute($gene->canonical_transcript()->dbID(), $old_canonical_transcript_id);
1647  }
1648 
1649 } ## end sub update
1650 
1651 
1652 =head2 update_coords
1653 
1654  Arg [1] : Bio::EnsEMBL::Gene $gene
1655  The gene to update
1656  Example : $gene_adaptor->update_coords($gene);
1657  Description: In the event of a transcript being removed, coordinates for the Gene
1658  need to be reset, but update() does not do this. update_coords
1659  fills this niche
1660  Returntype : None
1661  Exceptions : thrown if the $gene is not supplied
1662  Caller : general
1663 
1664 =cut
1665 
1666 sub update_coords {
1667  my ($self, $gene) = @_;
1668  throw('Must have a gene to update in order to update it') unless ($gene);
1669  $gene->recalculate_coordinates;
1670  my $update_sql = qq(
1671  UPDATE gene
1672  SET seq_region_start = ?,
1673  seq_region_end = ?
1674  WHERE gene_id = ?
1675  );
1676  my $sth = $self->prepare($update_sql);
1677  $sth->bind_param(1, $gene->seq_region_start);
1678  $sth->bind_param(2, $gene->seq_region_end);
1679  $sth->bind_param(3, $gene->dbID);
1680  $sth->execute();
1681 }
1682 
1683 # _objs_from_sth
1684 
1685 # Arg [1] : StatementHandle $sth
1686 # Arg [2] : Bio::EnsEMBL::AssemblyMapper $mapper
1687 # Arg [3] : Bio::EnsEMBL::Slice $dest_slice
1688 # Description: PROTECTED implementation of abstract superclass method.
1689 # responsible for the creation of Genes
1690 # Returntype : listref of Bio::EnsEMBL::Genes in target coordinate system
1691 # Exceptions : none
1692 # Caller : internal
1693 # Status : Stable
1694 
1695 sub _objs_from_sth {
1696  my ($self, $sth, $mapper, $dest_slice) = @_;
1697 
1698  #
1699  # This code is ugly because an attempt has been made to remove as many
1700  # function calls as possible for speed purposes. Thus many caches and
1701  # a fair bit of gymnastics is used.
1702  #
1703 
1704  my $sa = $self->db()->get_SliceAdaptor();
1705  my $aa = $self->db()->get_AnalysisAdaptor();
1706  my $dbEntryAdaptor = $self->db()->get_DBEntryAdaptor();
1707 
1708  my @genes;
1709  my %analysis_hash;
1710  my %slice_hash;
1711  my %sr_name_hash;
1712  my %sr_cs_hash;
1713 
1714  my (
1715  $gene_id, $seq_region_id, $seq_region_start,
1716  $seq_region_end, $seq_region_strand, $analysis_id,
1717  $biotype, $display_xref_id, $gene_description,
1718  $source, $is_current,
1719  $canonical_transcript_id, $stable_id, $version,
1720  $created_date, $modified_date, $xref_display_label,
1721  $xref_primary_acc, $xref_description, $xref_version,
1722  $external_db, $external_status, $external_release,
1723  $external_db_name, $info_type, $info_text
1724  );
1725 
1726  $sth->bind_columns(\(
1727  $gene_id, $seq_region_id, $seq_region_start,
1728  $seq_region_end, $seq_region_strand, $analysis_id,
1729  $biotype, $display_xref_id, $gene_description,
1730  $source, $is_current,
1731  $canonical_transcript_id, $stable_id, $version,
1732  $created_date, $modified_date, $xref_display_label,
1733  $xref_primary_acc, $xref_description, $xref_version,
1734  $external_db, $external_status, $external_release,
1735  $external_db_name, $info_type, $info_text
1736  ) );
1737 
1738  my $dest_slice_start;
1739  my $dest_slice_end;
1740  my $dest_slice_strand;
1741  my $dest_slice_length;
1742  my $dest_slice_cs;
1743  my $dest_slice_sr_name;
1744  my $dest_slice_sr_id;
1745  my $asma;
1746 
1747  if ($dest_slice) {
1748  $dest_slice_start = $dest_slice->start();
1749  $dest_slice_end = $dest_slice->end();
1750  $dest_slice_strand = $dest_slice->strand();
1751  $dest_slice_length = $dest_slice->length();
1752  $dest_slice_cs = $dest_slice->coord_system();
1753  $dest_slice_sr_name = $dest_slice->seq_region_name();
1754  $dest_slice_sr_id = $dest_slice->get_seq_region_id();
1755  $asma = $self->db->get_AssemblyMapperAdaptor();
1756  }
1757 
1758  FEATURE: while($sth->fetch()) {
1759 
1760  #get the analysis object
1761  my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
1762  $analysis_hash{$analysis_id} = $analysis;
1763 
1764  #need to get the internal_seq_region, if present
1765  $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
1766  my $slice = $slice_hash{"ID:".$seq_region_id};
1767 
1768  if (!$slice) {
1769  $slice = $sa->fetch_by_seq_region_id($seq_region_id);
1770  $slice_hash{"ID:".$seq_region_id} = $slice;
1771  $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
1772  $sr_cs_hash{$seq_region_id} = $slice->coord_system();
1773  }
1774 
1775  #obtain a mapper if none was defined, but a dest_seq_region was
1776  if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
1777  $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
1778  }
1779 
1780  my $sr_name = $sr_name_hash{$seq_region_id};
1781  my $sr_cs = $sr_cs_hash{$seq_region_id};
1782 
1783  #
1784  # remap the feature coordinates to another coord system
1785  # if a mapper was provided
1786  #
1787 
1788  if ($mapper) {
1789 
1790  if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
1791  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
1792  $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
1793 
1794  } else {
1795  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
1796  $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs);
1797  }
1798 
1799  #skip features that map to gaps or coord system boundaries
1800  next FEATURE if (!defined($seq_region_id));
1801 
1802  #get a slice in the coord system we just mapped to
1803  $slice = $slice_hash{"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
1804  }
1805 
1806  #
1807  # If a destination slice was provided convert the coords.
1808  #
1809  if (defined($dest_slice)) {
1810  my $seq_region_len = $dest_slice->seq_region_length();
1811 
1812  if ( $dest_slice_strand == 1 ) {
1813  $seq_region_start = $seq_region_start - $dest_slice_start + 1;
1814  $seq_region_end = $seq_region_end - $dest_slice_start + 1;
1815 
1816  if ( $dest_slice->is_circular ) {
1817  # Handle circular chromosomes.
1818 
1819  if ( $seq_region_start > $seq_region_end ) {
1820  # Looking at a feature overlapping the chromosome origin.
1821 
1822  if ( $seq_region_end > $dest_slice_start ) {
1823  # Looking at the region in the beginning of the chromosome
1824  $seq_region_start -= $seq_region_len;
1825  }
1826  if ( $seq_region_end < 0 ) {
1827  $seq_region_end += $seq_region_len;
1828  }
1829  } else {
1830  if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
1831  # Looking at the region overlapping the chromosome
1832  # origin and a feature which is at the beginning of the
1833  # chromosome.
1834  $seq_region_start += $seq_region_len;
1835  $seq_region_end += $seq_region_len;
1836  }
1837  }
1838  }
1839  } else {
1840 
1841  my $start = $dest_slice_end - $seq_region_end + 1;
1842  my $end = $dest_slice_end - $seq_region_start + 1;
1843 
1844  if ($dest_slice->is_circular()) {
1845 
1846  if ($dest_slice_start > $dest_slice_end) {
1847  # slice spans origin or replication
1848 
1849  if ($seq_region_start >= $dest_slice_start) {
1850  $end += $seq_region_len;
1851  $start += $seq_region_len if $seq_region_end > $dest_slice_start;
1852 
1853  } elsif ($seq_region_start <= $dest_slice_end) {
1854  # do nothing
1855  } elsif ($seq_region_end >= $dest_slice_start) {
1856  $start += $seq_region_len;
1857  $end += $seq_region_len;
1858 
1859  } elsif ($seq_region_end <= $dest_slice_end) {
1860  $end += $seq_region_len if $end < 0;
1861 
1862  } elsif ($seq_region_start > $seq_region_end) {
1863  $end += $seq_region_len;
1864  }
1865 
1866  } else {
1867 
1868  if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
1869  # do nothing
1870  } elsif ($seq_region_start > $seq_region_end) {
1871  if ($seq_region_start <= $dest_slice_end) {
1872  $start -= $seq_region_len;
1873  } elsif ($seq_region_end >= $dest_slice_start) {
1874  $end += $seq_region_len;
1875  }
1876  }
1877  }
1878  }
1879 
1880  $seq_region_start = $start;
1881  $seq_region_end = $end;
1882  $seq_region_strand *= -1;
1883 
1884  } ## end else [ if ( $dest_slice_strand...)]
1885 
1886  # Throw away features off the end of the requested slice or on
1887  # different seq_region.
1888  if ($seq_region_end < 1
1889  || $seq_region_start > $dest_slice_length
1890  || ($dest_slice_sr_id != $seq_region_id)) {
1891  next FEATURE;
1892  }
1893  $slice = $dest_slice;
1894  }
1895 
1896  my $display_xref;
1897 
1898  if (defined $xref_display_label) {
1899  $display_xref = Bio::EnsEMBL::DBEntry->new_fast({
1900  'dbID' => $display_xref_id,
1901  'adaptor' => $dbEntryAdaptor,
1902  'display_id' => $xref_display_label,
1903  'primary_id' => $xref_primary_acc,
1904  'version' => $xref_version,
1905  'description' => $xref_description,
1906  'release' => $external_release,
1907  'dbname' => $external_db,
1908  'db_display_name' => $external_db_name,
1909  'info_type' => $info_type,
1910  'info_text' => $info_text
1911  });
1912  $display_xref->status($external_status);
1913  }
1914 
1915  # Finally, create the new Gene.
1916  push(
1917  @genes,
1918  $self->_create_feature_fast(
1919  'Bio::EnsEMBL::Gene', {
1920  'analysis' => $analysis,
1921  'biotype' => $biotype,
1922  'start' => $seq_region_start,
1923  'end' => $seq_region_end,
1924  'strand' => $seq_region_strand,
1925  'adaptor' => $self,
1926  'slice' => $slice,
1927  'dbID' => $gene_id,
1928  'stable_id' => $stable_id,
1929  'version' => $version,
1930  'created_date' => $created_date || undef,
1931  'modified_date' => $modified_date || undef,
1932  'description' => $gene_description,
1933  'external_name' => undef, # will use display_id
1934  # from display_xref
1935  'external_db' => $external_db,
1936  'external_status' => $external_status,
1937  'display_xref' => $display_xref,
1938  'source' => $source,
1939  'is_current' => $is_current,
1940  'canonical_transcript_id' => $canonical_transcript_id}));
1941 
1942  } ## end while ($sth->fetch())
1943 
1944  return \@genes;
1945 } ## end sub _objs_from_sth
1946 
1947 =head2 cache_gene_seq_mappings
1948 
1949  Example : $gene_adaptor->cache_gene_seq_mappings();
1950  Description: caches all the assembly mappings needed for genes
1951  Returntype : None
1952  Exceptions : None
1953  Caller : general
1954  Status : At Risk
1955  : New experimental code
1956 
1957 =cut
1958 
1959 sub cache_gene_seq_mappings {
1960  my ($self) = @_;
1961 
1962  # get the sequence level to map too
1963 
1964  my $sql = 'SELECT name ' . 'FROM coord_system ' . 'WHERE attrib like "%%sequence_level%%"' . 'AND species_id = ?';
1965 
1966  my $sth = $self->prepare($sql);
1967  $sth->bind_param(1, $self->species_id(), SQL_INTEGER);
1968  $sth->execute();
1969 
1970  my $sequence_level = $sth->fetchrow_array();
1971 
1972  $sth->finish();
1973 
1974  my $csa = $self->db->get_CoordSystemAdaptor();
1975  my $ama = $self->db->get_AssemblyMapperAdaptor();
1976 
1977  my $cs1 = $csa->fetch_by_name($sequence_level);
1978 
1979  # get level to map to two
1980 
1981  my $mcc = $self->db->get_MetaCoordContainerAdaptor();
1982  my $csnew = $mcc->fetch_all_CoordSystems_by_feature_type('gene');
1983 
1984  foreach my $cs2 (@$csnew) {
1985  my $am = $ama->fetch_by_CoordSystems($cs1, $cs2);
1986  $am->register_all();
1987  }
1988 
1989 } ## end sub cache_gene_seq_mappings
1990 
1991 =head2 fetch_all_by_exon_supporting_evidence
1992 
1993  Arg [1] : String $hit_name
1994  Name of supporting feature
1995  Arg [2] : String $feature_type
1996  one of "dna_align_feature" or "protein_align_feature"
1997  Arg [3] : (optional) Bio::Ensembl::Analysis
1998  Example : $genes = $gene_adaptor->fetch_all_by_exon_supporting_evidence(
1999  'XYZ', 'dna_align_feature');
2000  Description: Gets all the genes with transcripts with exons which have a
2001  specified hit on a particular type of feature. Optionally filter
2002  by analysis.
2003  Returntype : Listref of Bio::EnsEMBL::Gene
2004  Exceptions : If feature_type is not of correct type.
2005  Caller : general
2006  Status : Stable
2007 
2008 =cut
2009 
2010 sub fetch_all_by_exon_supporting_evidence {
2011  my ($self, $hit_name, $feature_type, $analysis) = @_;
2012 
2013  if ($feature_type !~ /(dna)|(protein)_align_feature/) {
2014  throw("feature type must be dna_align_feature or protein_align_feature");
2015  }
2016 
2017  my ($anal_from, $anal_where);
2018  if($analysis) {
2019  $anal_from = ", analysis a ";
2020  $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? ";
2021  }
2022 
2023  my $sql = qq(
2024  SELECT DISTINCT(g.gene_id)
2025  FROM gene g,
2026  transcript t,
2027  exon_transcript et,
2028  supporting_feature sf,
2029  $feature_type f
2030  $anal_from
2031  WHERE g.gene_id = t.gene_id
2032  AND g.is_current = 1
2033  AND t.transcript_id = et.transcript_id
2034  AND et.exon_id = sf.exon_id
2035  AND sf.feature_id = f.${feature_type}_id
2036  AND sf.feature_type = ?
2037  AND f.hit_name=?
2038  $anal_where
2039  );
2040 
2041  my $sth = $self->prepare($sql);
2042 
2043  $sth->bind_param(1, $feature_type, SQL_VARCHAR);
2044  $sth->bind_param(2, $hit_name, SQL_VARCHAR);
2045  $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
2046 
2047  $sth->execute();
2048 
2049  my @genes;
2050 
2051  while (my $id = $sth->fetchrow_array) {
2052  my $gene = $self->fetch_by_dbID($id);
2053  push(@genes, $gene) if $gene;
2054  }
2055 
2056  return \@genes;
2057 } ## end sub fetch_all_by_exon_supporting_evidence
2058 
2059 =head2 fetch_all_by_transcript_supporting_evidence
2060 
2061  Arg [1] : String $hit_name
2062  Name of supporting feature
2063  Arg [2] : String $feature_type
2064  one of "dna_align_feature" or "protein_align_feature"
2065  Arg [3] : (optional) Bio::Ensembl::Analysis
2066  Example : $genes = $gene_adaptor->fetch_all_by_transcript_supporting_evidence('XYZ', 'dna_align_feature');
2067  Description: Gets all the genes with transcripts with evidence for a
2068  specified hit on a particular type of feature. Optionally filter
2069  by analysis.
2070  Returntype : Listref of Bio::EnsEMBL::Gene.
2071  Exceptions : If feature_type is not of correct type.
2072  Caller : general
2073  Status : Stable
2074 
2075 =cut
2076 
2077 sub fetch_all_by_transcript_supporting_evidence {
2078  my ($self, $hit_name, $feature_type, $analysis) = @_;
2079 
2080  if ($feature_type !~ /(dna)|(protein)_align_feature/) {
2081  throw("feature type must be dna_align_feature or protein_align_feature");
2082  }
2083 
2084  my ($anal_from, $anal_where);
2085  if($analysis) {
2086  $anal_from = ", analysis a ";
2087  $anal_where = "AND a.analysis_id = f.analysis_id AND a.analysis_id=? ";
2088  }
2089 
2090  my $sql = qq(
2091  SELECT DISTINCT(g.gene_id)
2092  FROM gene g,
2093  transcript t,
2094  transcript_supporting_feature sf,
2095  $feature_type f
2096  $anal_from
2097  WHERE g.gene_id = t.gene_id
2098  AND g.is_current = 1
2099  AND t.transcript_id = sf.transcript_id
2100  AND sf.feature_id = f.${feature_type}_id
2101  AND sf.feature_type = ?
2102  AND f.hit_name=?
2103  $anal_where
2104  );
2105 
2106  my $sth = $self->prepare($sql);
2107 
2108  $sth->bind_param(1, $feature_type, SQL_VARCHAR);
2109  $sth->bind_param(2, $hit_name, SQL_VARCHAR);
2110  $sth->bind_param(3, $analysis->dbID(), SQL_INTEGER) if ($analysis);
2111 
2112  $sth->execute();
2113 
2114  my @genes;
2115 
2116  while (my $id = $sth->fetchrow_array) {
2117  my $gene = $self->fetch_by_dbID($id);
2118  push(@genes, $gene) if $gene;
2119  }
2120 
2121  return \@genes;
2122 } ## end sub fetch_all_by_transcript_supporting_evidence
2123 
2124 sub _final_clause {
2125  return ' ORDER BY g.gene_id'
2126 }
2127 
2128 1;
2129 
transcript
public transcript()
Bio::EnsEMBL::Registry::get_adaptor
public Adaptor get_adaptor()
Bio::EnsEMBL::Storable::dbID
public Int dbID()
EnsEMBL
Definition: Filter.pm:1
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
map
public map()
Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
Definition: BaseFeatureAdaptor.pm:24
Bio::EnsEMBL::AltAlleleGroup
Definition: AltAlleleGroup.pm:67
accession
public accession()
Bio::EnsEMBL::DBSQL::SliceAdaptor
Definition: SliceAdaptor.pm:78
Bio::EnsEMBL::Gene
Definition: Gene.pm:37
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio::EnsEMBL::Registry
Definition: Registry.pm:113
exon
public exon()
Bio::EnsEMBL::DBSQL::GeneAdaptor
Definition: GeneAdaptor.pm:44
about
public about()
Bio::EnsEMBL::Utils::Scalar
Definition: Scalar.pm:66
Bio::EnsEMBL::Registry::load_registry_from_db
public Int load_registry_from_db()
Bio::EnsEMBL::DBEntry
Definition: DBEntry.pm:12
Bio::EnsEMBL::OntologyTerm
Definition: OntologyTerm.pm:10
Bio::EnsEMBL::AltAlleleGroup::new
public Bio::EnsEMBL::AltAlleleGroup new()
info
public info()
Bio::EnsEMBL::Storable::new_fast
public Instance new_fast()
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68