ensembl-hive  2.8.1
BaseAdaptor.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 
22 =head1 CONTACT
23 
24  Please email comments or questions to the public Ensembl
25  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
26 
27  Questions may also be sent to the Ensembl help desk at
28  <http://www.ensembl.org/Help/Contact>.
29 
30 =cut
31 
32 =head1 NAME
33 
34 Bio::EnsEMBL::DBSQL::BaseAdaptor - Base Adaptor for DBSQL adaptors
35 
36 =head1 SYNOPSIS
37 
38  # base adaptor provides
39 
40  # SQL prepare function
41  $adaptor->prepare("sql statement");
42 
43  # get of root DBAdaptor object
44  $adaptor->db();
45 
46  # constructor, ok for inheritence
47  $adaptor = Bio::EnsEMBL::DBSQL::SubClassOfBaseAdaptor->new($dbobj);
48 
49 =head1 DESCRIPTION
50 
51 This is a true base class for Adaptors in the Ensembl DBSQL
52 system.
53 
54 Adaptors are expected to have the following functions
55 
56  $obj = $adaptor->fetch_by_dbID($internal_id);
57 
58 which builds the object from the primary key of the object. This
59 function is crucial because it allows adaptors to collaborate relatively
60 independently of each other - in other words, we can change the schema
61 under one adaptor without too many knock on changes through the other
62 adaptors.
63 
64 Most adaptors will also have
65 
66  $dbid = $adaptor->store($obj);
67 
68 which stores the object. Currently the storing of an object also causes
69 the objects to set
70 
71  $obj->dbID();
72 
73 correctly and attach the adaptor.
74 
75 Other fetch functions go by the convention of
76 
77  @object_array = @{ $adaptor->fetch_all_by_XXXX($arguments_for_XXXX) };
78 
79 sometimes it returns an array ref denoted by the 'all' in the name of
80 the method, sometimes an individual object. For example
81 
82  $gene = $gene_adaptor->fetch_by_stable_id($stable_id);
83 
84 or
85 
86  @fp = @{ $simple_feature_adaptor->fetch_all_by_Slice($slice) };
87 
88 Occassionally adaptors need to provide access to lists of ids. In this
89 case the convention is to go list_XXXX, such as
90 
91  @gene_ids = @{ $gene_adaptor->list_geneIds() };
92 
93 (note: this method is poorly named)
94 
95 =cut
96 
98 
99 require Exporter;
100 use vars qw(@ISA @EXPORT);
101 use strict;
102 
103 use Bio::EnsEMBL::Utils::Exception qw(throw);
104 use Bio::EnsEMBL::Utils::Scalar qw(wrap_array);
105 use DBI qw(:sql_types);
106 use Data::Dumper;
107 use Scalar::Util qw/looks_like_number/;
108 
109 @ISA = qw(Exporter);
110 @EXPORT = (@{$DBI::EXPORT_TAGS{'sql_types'}});
111 
112 =head2 new
113 
114  Arg [1] : Bio::EnsEMBL::DBSQL::DBConnection $dbobj
115  Example : $adaptor = new AdaptorInheritedFromBaseAdaptor($dbobj);
116  Description: Creates a new BaseAdaptor object. The intent is that this
117  constructor would be called by an inherited superclass either
118  automatically or through $self->SUPER::new in an overridden
119  new method.
121  Exceptions : none
123  Status : Stable
124 
125 =cut
126 
127 sub new {
128  my ( $class, $dbobj ) = @_;
129 
130  my $self = bless {}, $class;
131 
132  if ( !defined $dbobj || !ref $dbobj ) {
133  throw("Don't have a db [$dbobj] for new adaptor");
134  }
135 
136  if ( $dbobj->isa('Bio::EnsEMBL::DBSQL::DBAdaptor') ) {
137  $self->db($dbobj);
138  $self->dbc( $dbobj->dbc );
139  $self->species_id( $dbobj->species_id() );
140  $self->is_multispecies( $dbobj->is_multispecies() );
141  } elsif ( ref($dbobj) =~ /DBAdaptor$/ ) {
142  $self->db($dbobj);
143  $self->dbc( $dbobj->dbc );
144  } elsif ( ref($dbobj) =~ /DBConnection$/ ) {
145  $self->dbc($dbobj);
146  } else {
147  throw("Don't have a DBAdaptor [$dbobj] for new adaptor");
148  }
149 
150  return $self;
151 }
152 
153 
154 =head2 prepare
155 
156  Arg [1] : string $string
157  a SQL query to be prepared by this adaptors database
158  Example : $sth = $adaptor->prepare("select yadda from blabla")
159  Description: provides a DBI statement handle from the adaptor. A convenience
160  function so you dont have to write $adaptor->db->prepare all the
161  time
162  Returntype : DBI::StatementHandle
163  Exceptions : none
164  Caller : Adaptors inherited from BaseAdaptor
165  Status : Stable
166 
167 =cut
168 
169 sub prepare {
170  my ( $self, $string ) = @_;
171 
172  # Uncomment next line to cancel caching on the SQL side.
173  # Needed for timing comparisons etc.
174  #$string =~ s/SELECT/SELECT SQL_NO_CACHE/i;
175 
176  return $self->dbc->prepare($string);
177 }
178 
179 
180 =head2 db
181 
182  Arg [1] : (optional) Bio::EnsEMBL::DBSQL::DBAdaptor $obj
183  the database this adaptor is using.
184  Example : $db = $adaptor->db();
185  Description: Getter/Setter for the DatabaseConnection that this adaptor is
186  using.
187  Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor
188  Exceptions : none
189  Caller : Adaptors inherited from BaseAdaptor
190  Status : Stable
191 
192 =cut
193 
194 sub db {
195  my ( $self, $value ) = @_;
196 
197  if ( defined($value) ) {
198  $self->{'db'} = $value;
199  }
200 
201  return $self->{'db'};
202 }
203 
204 =head2 dbc
205 
206  Arg [1] : (optional) Bio::EnsEMBL::DBSQL::DBConnection $obj
207  the database this adaptor is using.
208  Example : $db = $adaptor->db();
209  Description: Getter/Setter for the DatabaseConnection that this adaptor is
210  using.
212  Exceptions : none
213  Caller : Adaptors inherited from BaseAdaptor
214  Status : Stable
215 
216 =cut
217 
218 sub dbc {
219  my ( $self, $value ) = @_;
220 
221  if ( defined($value) ) {
222  $self->{'dbc'} = $value;
223  }
224 
225  return $self->{'dbc'};
226 }
227 
228 =head2 is_multispecies
229 
230  Arg [1] : (optional) boolean $arg
231  Example : if ($adaptor->is_multispecies()) { }
232  Description: Getter/Setter for the is_multispecies boolean of
233  to use for this adaptor.
234  Returntype : boolean
235  Exceptions : none
236  Caller : general
237  Status : Stable
238 
239 =cut
240 
241 sub is_multispecies {
242  my ( $self, $arg ) = @_;
243 
244  if ( defined($arg) ) {
245  $self->{_is_multispecies} = $arg;
246  }
247 
248  return $self->{_is_multispecies};
249 }
250 
251 =head2 species_id
252 
253  Arg [1] : (optional) int $species_id
254  The internal ID of the species in a multi-species database.
255  Example : $db = $adaptor->db();
256  Description: Getter/Setter for the internal ID of the species in a
257  multi-species database. The default species ID is 1.
258  Returntype : Integer
259  Exceptions : none
260  Caller : Adaptors inherited from BaseAdaptor
261  Status : Stable
262 
263 =cut
264 
265 sub species_id {
266  my ( $self, $value ) = @_;
267 
268  if ( defined($value) ) {
269  $self->{'species_id'} = $value;
270  }
271 
272  return $self->{'species_id'} || 1;
273 }
274 
275 
276 # list primary keys for a particular table
277 # args are table name and primary key field
278 # if primary key field is not supplied, tablename_id is assumed
279 # returns listref of IDs
280 sub _list_dbIDs {
281  my ( $self, $table, $pk, $ordered ) = @_;
282 
283  if ( !defined($pk) ) { $pk = $table . "_id" }
284 
285  my $sql = sprintf( "SELECT `%s` FROM `%s`", $pk, $table );
286 
287  my $join_with_cs = 0;
288  if ( $self->is_multispecies()
289  && $self->isa('Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor')
290  && !$self->isa('Bio::EnsEMBL::DBSQL::UnmappedObjectAdaptor') )
291  {
292 
293  $sql .= q(
294 JOIN seq_region USING (seq_region_id)
295 JOIN coord_system cs USING (coord_system_id)
296 WHERE cs.species_id = ?
297 );
298 
299  $join_with_cs = 1;
300  }
301 
302  if ( defined($ordered) && $ordered ) {
303  $sql .= " ORDER BY seq_region_id, seq_region_start";
304  }
305 
306  my $sth = $self->prepare($sql);
307 
308  if ($join_with_cs) {
309  $sth->bind_param( 1, $self->species_id(), SQL_INTEGER );
310  }
311 
312  eval { $sth->execute() };
313  if ($@) {
314  throw("Detected an error whilst executing SQL '${sql}': $@");
315  }
316 
317  my $id;
318  $sth->bind_col( 1, \$id );
319 
320  my @out;
321  while ( $sth->fetch() ) {
322  push( @out, $id );
323  }
324 
325  return \@out;
326 } ## end sub _list_dbIDs
327 
328 
329 # _straight_join
330 
331 # Arg [1] : (optional) boolean $new_val
332 # Example : $self->_straight_join(1);
333 # $self->generic_fetch($constraint);
334 # $self->_straight_join(0);
335 # Description: PROTECTED Getter/Setter that turns on/off the use of
336 # a straight join in queries.
337 # Returntype : boolean
338 # Exceptions : none
339 # Caller : general
340 
341 sub _straight_join {
342  my $self = shift;
343  if(@_) {
344  $self->{'_straight_join'} = shift;
345  }
346 
347  return $self->{'_straight_join'};
348 }
349 
350 sub _can_straight_join {
351  my $self = shift;
352  return $self->dbc->_driver_object->can_straight_join;
353 }
354 
355 =head2 bind_param_generic_fetch
356 
357  Arg [1] : (optional) scalar $param
358  This is the parameter to bind
359  Arg [2] : (optional) int $sql_type
360  Type of the parameter (from DBI (:sql_types))
361  Example : $adaptor->bind_param_generic_fetch($stable_id,SQL_VARCHAR);
362  $adaptor->generic_fetch();
363  Description: When using parameters for the query, will call the bind_param to avoid
364  some security issues. If there are no arguments, will return the bind_parameters
365  ReturnType : listref
366  Exceptions: if called with one argument
367 
368 =cut
369 
370 sub bind_param_generic_fetch{
371  my $self = shift;
372  my $param = shift;
373  my $sql_type = shift;
374 
375  if (defined $param && !defined $sql_type){
376  throw("Need to specify sql_type for parameter $param\n");
377  }
378  elsif (defined $param && defined $sql_type){
379  #check when there is a SQL_INTEGER type that the parameter is really a number
380  if ($sql_type eq SQL_INTEGER){
381  throw "Trying to assign a non numerical parameter to an integer value in the database" if ($param !~ /^[+-]{0,1}\d+$/);
382  }
383  #both paramters have been entered, push it to the bind_param array
384  push @{$self->{'_bind_param_generic_fetch'}},[$param,$sql_type];
385  }
386  elsif (!defined $param && !defined $sql_type){
387  #when there are no arguments, return the array
388  return $self->{'_bind_param_generic_fetch'};
389  }
390 
391 }
392 
393 # Used to reset the params without circumventing scope
394 sub _bind_param_generic_fetch {
395  my ($self, $_bind_param_generic_fetch) = @_;
396  $self->{'_bind_param_generic_fetch'} = $_bind_param_generic_fetch if $_bind_param_generic_fetch;
397  return $self->{_bind_param_generic_fetch};
398 }
399 
400 =head2 generate_in_constraint
401 
402  Arg [1] : ArrayRef or Scalar $list
403  List or a single value of items to be pushed into an IN statement
404  Arg [2] : Scalar $column
405  Column this IN statement is being applied to. Please fully resolve the
406  column.
407  Arg [3] : Scalar $param_type
408  Data type which should be used when binding. Please use DBI data type symbols
409  Arg [4] : Scalar boolean $inline_variables
410  Boolean to control if variables are inlined in the constraint. If
411  false values are bound via bind_param_generic_fetch() (the default behaviour).
412 
413  Description : Used internally to generate a SQL constraint to restrict a query by an IN statement.
414  The code generates the complete IN statement.
415  Returntype : String
416  Exceptions : If no list is supplied, the list of values is empty or no data type was given
417  Caller : general
418 
419 =cut
420 
421 sub generate_in_constraint {
422  my ($self, $list, $column, $param_type, $inline_variables) = @_;
423  throw("A list of values must be given") if ! defined $list;
424  $list = wrap_array($list); # homogenise into an array
425  throw "We should be given at least one value to insert" if scalar(@{$list}) == 0;
426  throw "Please supply the DBI param type" if ! defined $param_type;
427  #Figure out if we need to quote our values if we are asked to inline the variables
428  my $quote_values = 1;
429  if($param_type == SQL_INTEGER || $param_type == SQL_TINYINT || $param_type == SQL_DOUBLE ) {
430  $quote_values = 0;
431  }
432 
433  my $constraint = qq{${column} IN (};
434  if($inline_variables) {
435  if($quote_values) {
436  $constraint .= join(q{,}, map { qq{"${_}"} } @{$list});
437  }
438  else {
439  $constraint .= join(q{,}, @{$list});
440  }
441  }
442  else {
443  my @subs = ('?') x scalar(@{$list});
444  $constraint .= join(q{,}, @subs);
445  $self->bind_param_generic_fetch($_, $param_type) for @{$list};
446  }
447  $constraint .= q{)};
448  return $constraint;
449 }
450 
451 =head2 generic_fetch
452 
453  Arg [1] : (optional) string $constraint
454  An SQL query constraint (i.e. part of the WHERE clause)
455  Arg [2] : (optional) Bio::EnsEMBL::AssemblyMapper $mapper
456  A mapper object used to remap features
457  as they are retrieved from the database
458  Arg [3] : (optional) Bio::EnsEMBL::Slice $slice
459  A slice that features should be remapped to
460  Example : $fts = $a->generic_fetch('contig_id in (1234, 1235)');
461  Description: Performs a database fetch and returns feature objects in
462  contig coordinates.
463  Returntype : listref of Bio::EnsEMBL::SeqFeature in contig coordinates
464  Exceptions : Thrown if there is an issue with querying the data
465  Caller : BaseFeatureAdaptor, ProxyDnaAlignFeatureAdaptor::generic_fetch
466  Status : Stable
467 
468 =cut
469 
470 sub generic_fetch {
471  my ($self, $constraint, $mapper, $slice) = @_;
472  my $sql = $self->_generate_sql($constraint);
473  my $params = $self->bind_param_generic_fetch();
474  $params ||= [];
475  $self->{_bind_param_generic_fetch} = undef;
476  my $sth = $self->db()->dbc()->prepare($sql);
477  my $i = 1;
478  foreach my $param (@{$params}){
479  $sth->bind_param($i,$param->[0],$param->[1]);
480  $i++;
481  }
482  eval { $sth->execute() };
483  if ($@) {
484  throw("Detected an error whilst executing SQL '${sql}': $@");
485  }
486 
487  my $res = $self->_objs_from_sth($sth, $mapper, $slice);
488  $sth->finish();
489  return $res;
490 }
491 
492 =head2 generic_count
493 
494  Arg [1] : (optional) string $constraint
495  An SQL query constraint (i.e. part of the WHERE clause)
496  Example : $number_feats = $a->generic_count('contig_id in (1234, 1235)');
497  Description: Performs a database fetch and returns a count of those features
498  found. This is analagous to C<generic_fetch()>
499  Returntype : Integer count of the elements.
500  Exceptions : Thrown if there is an issue with querying the data
501 
502 =cut
503 
504 sub generic_count {
505  my ($self, $constraint) = @_;
506  my $sql = $self->_generate_sql($constraint, 'count(*)');
507  my $params = $self->bind_param_generic_fetch();
508  $params ||= [];
509  $self->{_bind_param_generic_fetch} = undef;
510  my $h = $self->db()->dbc()->sql_helper();
511  my $count = $h->execute_single_result(-SQL => $sql, -PARAMS => $params);
512  return $count;
513 }
514 
515 sub _generate_sql {
516  my ($self, $constraint, @input_columns) = @_;
517 
518  my @tabs = $self->_tables();
519 
520  my $extra_default_where;
521 
522  # Hack for feature types that needs to be restricted to species_id (in
523  # coord_system).
524  if ( $self->is_multispecies()
525  && $self->isa('Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor')
526  && !$self->isa('Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor')
527  && !$self->isa('Bio::EnsEMBL::DBSQL::UnmappedObjectAdaptor') )
528  {
529  # We do a check to see if there is already seq_region
530  # and coord_system defined to ensure we get the right
531  # alias. We then do the extra query irrespectively of
532  # what has already been specified by the user.
533  my %thash = map { $_->[0] => $_->[1] } @tabs;
534 
535  my $sr_alias =
536  ( exists( $thash{seq_region} ) ? $thash{seq_region} : 'sr' );
537  my $cs_alias =
538  ( exists( $thash{coord_system} ) ? $thash{coord_system} : 'cs' );
539 
540  if ( !exists( $thash{seq_region} ) ) {
541  push( @tabs, [ 'seq_region', $sr_alias ] );
542  }
543  if ( !exists( $thash{coord_system} ) ) {
544  push( @tabs, [ 'coord_system', $cs_alias ] );
545  }
546 
547  $extra_default_where = sprintf(
548  '%s.seq_region_id = %s.seq_region_id '
549  . 'AND %s.coord_system_id = %s.coord_system_id '
550  . 'AND %s.species_id = ?',
551  $tabs[0]->[1], $sr_alias, $sr_alias,
552  $cs_alias, $cs_alias );
553 
554  $self->bind_param_generic_fetch( $self->species_id(), SQL_INTEGER );
555  } ## end if ( $self->is_multispecies...)
556 
557  @input_columns = $self->_columns() if ! @input_columns;
558  my $columns = join(', ', @input_columns);
559 
560  #
561  # Construct a left join statement if one was defined, and remove the
562  # left-joined table from the table list
563  #
564  my @left_join_list = $self->_left_join();
565  my $left_join_prefix = '';
566  my $left_join = '';
567  my @tables;
568  if(@left_join_list) {
569  my %left_join_hash = map { $_->[0] => $_->[1] } @left_join_list;
570  while(my $t = shift @tabs) {
571  my $t_alias = $t->[0] . " " . $t->[1];
572  if( exists $left_join_hash{ $t->[0] } || exists $left_join_hash{$t_alias}) {
573  my $condition = $left_join_hash{ $t->[0] };
574  $condition ||= $left_join_hash{$t_alias};
575  my $syn = $t->[1];
576  $left_join .=
577  "\n LEFT JOIN " . $t->[0] . " $syn ON $condition ) ";
578  $left_join_prefix .= '(';
579  } else {
580  push @tables, $t;
581  }
582  }
583  } else {
584  @tables = @tabs;
585  }
586 
587  my $straight_join = '';
588 
589  if($self->_straight_join() and $self->_can_straight_join) {
590  $straight_join = "STRAIGHT_JOIN";
591  }
592 
593  #construct a nice table string like 'table1 t1, table2 t2'
594  my $tablenames = join(', ', map({ join(' ', @$_) } @tables));
595 
596  my $sql =
597  "SELECT $straight_join $columns \n"
598  . "FROM $left_join_prefix ($tablenames) $left_join";
599 
600  my $default_where = $self->_default_where_clause();
601  my $final_clause = $self->_final_clause;
602 
603  if ($extra_default_where) {
604  if ($default_where) {
605  $default_where .= "\n AND $extra_default_where";
606  } else {
607  $default_where = $extra_default_where;
608  }
609  }
610 
611  #append a where clause if it was defined
612  if ($constraint) {
613  $sql .= "\n WHERE $constraint ";
614  if ($default_where) {
615  $sql .= " AND\n $default_where ";
616  }
617  } elsif ($default_where) {
618  $sql .= "\n WHERE $default_where ";
619  }
620 
621  #append additional clauses which may have been defined, don't append ORDER BY twice
622  if (index($final_clause, "ORDER") == -1 || index($sql, "ORDER") == -1) {
623  $sql .= "\n$final_clause";
624  }
625 
626  # FOR DEBUG:
627  #printf(STDERR "SQL:\n%s\n", $sql);
628 
629  return $sql;
630 }
631 
632 
633 =head2 fetch_by_dbID
634 
635  Arg [1] : int $id
636  The unique database identifier for the feature to be obtained
637  Example : $feat = $adaptor->fetch_by_dbID(1234));
638  $feat = $feat->transform('contig');
639  Description: Returns the feature created from the database defined by the
640  the id $id. The feature will be returned in its native
641  coordinate system. That is, the coordinate system in which it
642  is stored in the database. In order to convert it to a
643  particular coordinate system use the transfer() or transform()
644  method. If the feature is not found in the database then
645  undef is returned instead
646  Returntype : Bio::EnsEMBL::Feature or undef
647  Exceptions : thrown if $id arg is not provided
648  does not exist
649  Caller : general
650  Status : Stable
651 
652 =cut
653 
654 sub fetch_by_dbID {
655  my ($self, $id) = @_;
656  if ($self->_no_id_cache()) {
657  return $self->_uncached_fetch_by_dbID($id);
658  }
659  return $self->_id_cache()->get($id);
660 }
661 
662 # The actual implmenetation moved sideways to allow for uncached access
663 # otherwise we'd constantly loop
664 
665 sub _uncached_fetch_by_dbID{
666  my ($self,$id) = @_;
667 
668  throw("id argument is required") if(!defined $id);
669 
670  #construct a constraint like 't1.table1_id = 123'
671  my @tabs = $self->_tables;
672  my ($name, $syn) = @{$tabs[0]};
673  $self->bind_param_generic_fetch($id,SQL_INTEGER);
674  my $constraint = "${syn}.${name}_id = ?";
675 
676  #Should only be one
677  my ($feat) = @{$self->generic_fetch($constraint)};
678 
679  return if(!$feat);
680 
681  return $feat;
682 }
683 
684 
685 =head2 fetch_all_by_dbID_list
686 
687  Arg [1] : listref of integers $id_list
688  The unique database identifiers for the features to
689  be obtained.
690  Arg [2] : optional - Bio::EnsEMBL::Slice to map features onto.
691  Example : @feats = @{$adaptor->fetch_all_by_dbID_list([1234, 2131, 982]))};
692  Description: Returns the features created from the database
693  defined by the the IDs in contained in the provided
694  ID list $id_list. The features will be returned
695  in their native coordinate system. That is, the
696  coordinate system in which they are stored in the
697  database. In order to convert the features to a
698  particular coordinate system use the transfer() or
699  transform() method. If none of the features are
700  found in the database a reference to an empty list is
701  returned.
702  Returntype : listref of Bio::EnsEMBL::Features
703  Exceptions : thrown if $id arg is not provided
704  does not exist
705  Caller : general
706  Status : Stable
707 
708 =cut
709 
710 sub fetch_all_by_dbID_list {
711  my ($self, $id_list_ref, $slice) = @_;
712  if ($self->_no_id_cache()) {
713  return $self->_uncached_fetch_all_by_dbID_list($id_list_ref, $slice);
714  }
715  return $self->_id_cache()->get_by_list($id_list_ref, $slice);
716 }
717 
718 # The actual implmenetation moved sideways to allow for uncached access
719 # otherwise we'd constantly loop
720 sub _uncached_fetch_all_by_dbID_list {
721  my ( $self, $id_list_ref, $slice ) = @_;
722  return $self->_uncached_fetch_all_by_id_list($id_list_ref, $slice, "dbID", 1);
723 } ## end sub fetch_all_by_dbID_list
724 
725 =head2 _uncached_fetch_all_by_id_list
726 
727  Arg [1] : listref of IDs
728  Arg [2] : (optional) Bio::EnsEMBL::Slice $slice
729  A slice that features should be remapped to
730  Arg [3] : String describing the ID type.
731  Valid values include dbID and stable_id. dbID is an alias for
732  the primary key, while other names map directly to table columns
733  of the Feature this adaptor manages.
734  Arg [4] : Boolean $numeric
735  Indicates if the incoming data is to be processed as a numeric
736  or as a String. If arg [3] was set to dbID then we default this to
737  be true. If arg [3] was set to stable_id then we default this to
738  be false.
739  When not using a standard arg[3] the IDs are assumed to be Strings.
740  Arg [5] : Integer $max_size
741  Control the maximum number of IDs sent to a database in a single
742  query. Defaults to 2K for Strings and 16K for integers. Only
743  provide if you know *exactly* why you need to edit it.
744  Example : $list_of_features = $adaptor->_uncached_fetch_all_by_id_list(
745  [qw(ENSG00000101321 ENSG00000101346 ENSG00000101367)],
746  undef,
747  "stable_id", 0); #using strings
748 
749  # Numeric set to true because we are using numerics
750  $list_of_features = $adaptor->_uncached_fetch_all_by_id_list(
751  [1,2,3,4],
752  undef,
753  "dbID", 1);
754 
755  # Numeric defaults to true because we are querying using dbID
756  $list_of_features = $adaptor->_uncached_fetch_all_by_id_list(
757  [1,2,3,4],
758  undef,
759  "dbID");
760  Description: This is a generic method used to fetch lists of features by IDs.
761  It avoids caches, meaning it is best suited for block fetching.
762  See fetch_all_by_dbID_list() for more info.
763  Returntype : ArrayRef of Bio::EnsEMBL::Feature
764  Exceptions : Thrown if a list of IDs is not supplied.
765  Caller : BaseFeatureAdaptor, BaseAdaptor and derived classes.
766 
767 =cut
768 
769 sub _uncached_fetch_all_by_id_list {
770  my ( $self, $id_list_ref, $slice, $id_type, $numeric, $max_size ) = @_;
771 
772  if ( !defined($id_list_ref) || ref($id_list_ref) ne 'ARRAY' ) {
773  throw("id_list list reference argument is required");
774  }
775 
776  if ( !@{$id_list_ref} ) { return [] }
777 
778  # Construct a constraint like 't1.table1_id = 123'
779  my @tabs = $self->_tables();
780  my ( $name, $syn ) = @{ $tabs[0] };
781 
782  # prepare column name for query. If the id_type was
783  # set to dbID then we assume the column must be
784  # tablename_id e.g. gene_id. Otherwise we assume the id_type
785  # is the field/column name
786  my $field_name;
787  if($id_type eq 'dbID') {
788  $field_name = $name.'_id';
789  # If numeric was not set default it to 1 since this is an int
790  $numeric = 1 if ! defined $numeric;
791  }
792  elsif($id_type eq 'stable_id') {
793  # If numeric was not set default it to 0 since this is a string
794  $numeric = 0 if ! defined $numeric;
795  $field_name = $id_type;
796  }
797  else {
798  $field_name = $id_type;
799  }
800 
801  my $sql_data_type;
802 
803  # Ensuring we do not exceed MySQL's max_allowed_packet (defaults to 1MB)
804  # by splitting large queries into smaller queries of at most 256KB
805  # (262,144 8-bit characters)
806  # If we had a numeric then really we are talking about working with
807  # integers. Normal max ensembl id size is 12 plus 2 characters for
808  # commas in our IN statement comes to 14. Even bloating this to 16 gives
809  # a max number of 16,384 IDs (262114/16).
810  #
811  if($numeric) {
812  my $first_id = $id_list_ref->[0];
813  if(!looks_like_number($first_id)) {
814  throw "You specified that we are looking for numerics but $first_id is not a numeric";
815  }
816  $max_size = 16384 if ! defined $max_size;
817  $sql_data_type = SQL_INTEGER;
818  }
819  # However when dealing with Strings those can be very large (assuming
820  # 128 is the max length of a stable ID). 128 is 8x smaller than our
821  # previous max expected integer so we reduce the max ids by 8. This gives
822  # 2048 IDs (16384/8)
823  else {
824  $max_size = 2048 if ! defined $max_size;
825  $sql_data_type = SQL_VARCHAR;
826  }
827 
828  # build up unique id list, also validate on the way by
829  my %id_list;
830  for (@{$id_list_ref}) {
831  $id_list{$_}++;
832  }
833  my @id_list = keys %id_list;
834 
835  my @out;
836  my $inline = 1;
837  while (@id_list) {
838  my @ids;
839  my $id_str;
840 
841  if ( scalar(@id_list) > $max_size ) {
842  @ids = splice( @id_list, 0, $max_size );
843  }
844  else {
845  @ids = @id_list;
846  @id_list = ();
847  }
848  # Push off to our IN statement constructor for this work
849  my $constraint = $self->generate_in_constraint(\@ids, "${syn}.${field_name}", $sql_data_type, $inline);
850  push @out, @{ $self->generic_fetch($constraint, undef, $slice) };
851  }
852 
853  return \@out;
854 }
855 
856 # might not be a good idea, but for convenience
857 # shouldnt be called on the BIG tables though
858 
859 sub fetch_all {
860  my $self = shift;
861  return $self->generic_fetch();
862 }
863 
864 =head2 last_insert_id
865 
866  Arg [1] : (optional) $field the name of the field the inserted ID was pushed
867  into
868  Arg [2] : (optional) HashRef used to pass extra attributes through to the
869  DBD driver
870  Arg [3] : (optional) $table the name of the table to use if the adaptor
871  does not implement C<_tables()>
872  Description : Delegating method which uses DBI to extract the last inserted
873  identifier. If using MySQL we just call the DBI method
874  L<DBI::last_insert_id()> since MySQL ignores any extra
875  arguments. See L<DBI> for more information about this
876  delegated method.
877  Example : my $id = $self->last_insert_id('my_id'); my $other_id = $self->last_insert_id();
878  Returntype : Scalar or undef
879 
880 =cut
881 
882 sub last_insert_id {
883  my ($self, $field, $attributes, $table) = @_;
884  my $dbc = $self->dbc();
885  my $dbh = $dbc->db_handle();
886  my @args;
887  unless (@args = $dbc->_driver_object->last_insert_id_args($field, $table)) {
888  if(!$table) {
889  my ($table_entry) = $self->_tables(); # first table entry
890  $table = $table_entry->[0]; # table_entry is [ name, alias ]
891  }
892  @args = (undef, $dbc->dbname(), $table, $field);
893  }
894  $attributes ||= {};
895  return $dbh->last_insert_id(@args, $attributes);
896 }
897 
898 =head2 insert_ignore_clause
899 =cut
900 
901 sub insert_ignore_clause {
902  my $self = shift;
903  return $self->dbc->_driver_object->insert_ignore_clause;
904 }
905 
906 =head2 _id_cache
907 
908  Description : Used to return an instance of a support BaseCache module
909  which can be used to speed up object access. The method
910  also respects the DBAdaptor's no_cache() flag and will
911  return undef in those situations
912  Example : my $cache = $self->_id_cache();
913  Returntype : Bio::EnsEMBL::DBSQL::Support::BaseCache
914 
915 =cut
916 
917 sub _id_cache {
918  my ($self) = @_;
919  return if $self->db()->no_cache() && !$self->ignore_cache_override;
920  if(! exists $self->{_id_cache}) {
921  $self->{_id_cache} = $self->_build_id_cache();
922  }
923  return $self->{_id_cache};
924 }
925 
926 =head2 _no_id_cache
927 
928  Description : Flags if the ID based caching is active or not. This could be
929  due to the adaptor not wanting to cache or because of
930  a global no_cache() flag on the DBAdaptor instance
931  Returntype : Boolean
932 
933 =cut
934 
935 sub _no_id_cache {
936  my ($self) = @_;
937  return 1 if ! $self->_id_cache();
938  return 0;
939 }
940 
941 =head2 ignore_cache_override
942 
943  Description : Method to interfere with no_cache directive from Registry on
944  a per adaptor basis. This method should be called after new()
945  in order to trigger the _build_id_cache at first query.
946  Example : $adaptor->ignore_cache_override(1);
947  Returntype : Boolean
948 
949 =cut
950 
951 sub ignore_cache_override {
952  my $self = shift;
953  $self->{'_override'} = shift if(@_);
954  unless (defined($self->{'_override'})) {return}
955  return $self->{'_override'};
956 }
957 
958 =head2 schema_version
959 
960  Description : Returns the schema version of the currently connected
961  DBAdaptor. The subroutine also caches this value so
962  repeated calls continue to be speedy.
963  Example : $adaptor->schema_version();
964  Returntype : Integer
965 
966 =cut
967 
968 sub schema_version {
969  my ($self) = @_;
970  return $self->{_schema_version} if exists $self->{_schema_version};
971  my $mc = $self->db()->get_MetaContainer();
972  return $self->{_schema_version} = $mc->get_schema_version();
973 }
974 
975 #_tables
976 #
977 # Args : none
978 # Example : $tablename = $self->_table_name()
979 # Description: ABSTRACT PROTECTED
980 # Subclasses are responsible for implementing this
981 # method. It should list of [tablename, alias] pairs.
982 # Additionally the primary table (with the dbID,
983 # analysis_id, and score) should be the first table in
984 # the list. e.g:
985 # ( ['repeat_feature', 'rf'],
986 # ['repeat_consensus', 'rc']);
987 # used to obtain features.
988 # Returntype : list of [tablename, alias] pairs
989 # Exceptions : thrown if not implemented by subclass
990 # Caller : BaseFeatureAdaptor::generic_fetch
991 #
992 
993 sub _tables {
994  throw( "abstract method _tables not defined "
995  . "by implementing subclass of BaseAdaptor" );
996 }
997 
998 
999 #_columns
1000 #
1001 # Args : none
1002 # Example : $tablename = $self->_columns()
1003 # Description: ABSTRACT PROTECTED
1004 # Subclasses are responsible for implementing this
1005 # method. It should return a list of columns to be
1006 # used for feature creation.
1007 # Returntype : list of strings
1008 # Exceptions : thrown if not implemented by subclass
1009 # Caller : BaseFeatureAdaptor::generic_fetch
1010 #
1011 
1012 sub _columns {
1013  throw( "abstract method _columns not defined "
1014  . "by implementing subclass of BaseAdaptor" );
1015 }
1016 
1017 
1018 # _default_where_clause
1019 #
1020 # Arg [1] : none
1021 # Example : none
1022 # Description: May be overridden to provide an additional where
1023 # constraint to the SQL query which is generated to
1024 # fetch feature records. This constraint is always
1025 # appended to the end of the generated where clause
1026 # Returntype : string
1027 # Exceptions : none
1028 # Caller : generic_fetch
1029 #
1030 
1031 sub _default_where_clause { return '' }
1032 
1033 
1034 # _left_join
1035 
1036 # Arg [1] : none
1037 # Example : none
1038 # Description: Can be overridden by a subclass to specify any left
1039 # joins which should occur. The table name specigfied
1040 # in the join must still be present in the return
1041 # values of.
1042 # Returntype : a {'tablename' => 'join condition'} pair
1043 # Exceptions : none
1044 # Caller : general
1045 #
1046 
1047 sub _left_join { return () }
1048 
1049 
1050 #_final_clause
1051 
1052 # Arg [1] : none
1053 # Example : none
1054 # Description: May be overriden to provide an additional clause
1055 # to the end of the SQL query used to fetch feature
1056 # records. This is useful to add a required ORDER BY
1057 # clause to the query for example.
1058 # Returntype : string
1059 # Exceptions : none
1060 # Caller : generic_fetch
1061 
1062 sub _final_clause { return '' }
1063 
1064 
1065 #_objs_from_sth
1066 
1067 # Arg [1] : DBI::row_hashref $hashref containing key-value pairs
1068 # for each of the columns specified by the _columns method
1069 # Example : my @feats = $self->_obj_from_hashref
1070 # Description: ABSTRACT PROTECTED
1071 # The subclass is responsible for implementing this
1072 # method. It should take in a DBI row hash reference
1073 # and return a list of created features in contig
1074 # coordinates.
1075 # Returntype : list of Bio::EnsEMBL::*Features in contig coordinates
1076 # Exceptions : thrown if not implemented by subclass
1077 # Caller : BaseFeatureAdaptor::generic_fetch
1078 
1079 sub _objs_from_sth {
1080  throw( "abstract method _objs_from_sth not defined "
1081  . "by implementing subclass of BaseAdaptor" );
1082 }
1083 
1084 #_build_id_cache
1085 
1086 # Example : my $id_cache = $self->_build_id_cache
1087 # Description: ABSTRACT PROTECTED
1088 # The subclass is responsible for returning an instance
1089 # of the Bio::EnsEMBL::DBSQL::Support::BaseCache
1090 # which can be used to speed up ID based fetch operations
1091 # Returntype : Instance of Bio::EnsEMBL::DBSQL::Support::BaseCache
1092 # Exceptions : Could be thrown by the implementing sub-class
1093 # Caller : BaseAdaptor::_id_cache
1094 sub _build_id_cache {
1095  return;
1096 }
1097 
1098 #
1099 # Given a logic name and an existing constraint this will
1100 # add an analysis table constraint to the feature. Note that if no
1101 # analysis_id exists in the columns of the primary table then no
1102 # constraint is added at all
1103 #
1104 sub _logic_name_to_constraint {
1105  my $self = shift;
1106  my $constraint = shift;
1107  my $logic_name = shift;
1108 
1109  return $constraint if(!$logic_name);
1110 
1111  #make sure that an analysis_id exists in the primary table
1112  my ($prim_tab) = $self->_tables();
1113  my $prim_synonym = $prim_tab->[1];
1114 
1115  my $found_analysis=0;
1116  foreach my $col ($self->_columns) {
1117  my ($syn,$col_name) = split(/\./,$col);
1118  next if($syn ne $prim_synonym);
1119  if($col_name eq 'analysis_id') {
1120  $found_analysis = 1;
1121  last;
1122  }
1123  }
1124 
1125  if(!$found_analysis) {
1126  warning("This feature is not associated with an analysis.\n" .
1127  "Ignoring logic_name argument = [$logic_name].\n");
1128  return $constraint;
1129  }
1130 
1131  my $aa = $self->db->get_AnalysisAdaptor();
1132  my $an = $aa->fetch_by_logic_name($logic_name);
1133 
1134  if ( !defined($an) ) {
1135  return;
1136  }
1137 
1138  my $an_id = $an->dbID();
1139 
1140  $constraint .= ' AND' if($constraint);
1141  $constraint .= " ${prim_synonym}.analysis_id = $an_id";
1142  return $constraint;
1143 }
1144 
1145 
1146 1;
EnsEMBL
Definition: Filter.pm:1
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
Bio::EnsEMBL::AssemblyMapper
Definition: AssemblyMapper.pm:49
map
public map()
Bio::EnsEMBL::SeqFeature
Definition: SeqFeature.pm:30
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio::EnsEMBL::DBSQL::BaseAdaptor::prepare
public DBI::StatementHandle prepare()
Bio::EnsEMBL::DBSQL::DBConnection::prepare
public DBI prepare()
about
public about()
Bio::EnsEMBL::DBSQL::BaseAdaptor
Definition: BaseAdaptor.pm:71
Bio::EnsEMBL::DBSQL::DBConnection
Definition: DBConnection.pm:42
Bio::EnsEMBL::Utils::Scalar
Definition: Scalar.pm:66
info
public info()
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68