ensembl-hive  2.8.1
DnaAlignFeatureAdaptor.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor - Adaptor for DnaAlignFeatures
34 
35 =head1 SYNOPSIS
36 
37  $dafa = $registry->get_adaptor( 'Human', 'Core', 'DnaAlignFeature' );
38 
39  my @features = @{ $dafa->fetch_all_by_Slice($slice) };
40 
41  $dafa->store(@features);
42 
43 =head1 DESCRIPTION
44 
45 This is an adaptor responsible for the retrieval and storage of
46 DnaDnaAlignFeatures from the database. This adaptor inherits most of its
47 functionality from the BaseAlignFeatureAdaptor and BaseFeatureAdaptor
48 superclasses.
49 
50 =head1 METHODS
51 
52 =cut
53 
54 
55 package Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor;
56 
57 use vars qw(@ISA);
58 use strict;
61 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
62 
64 
65 
66 =head2 _tables
67 
68  Args : none
69  Example : @tabs = $self->_tables
70  Description: PROTECTED implementation of the abstract method inherited from
71  BaseFeatureAdaptor. Returns list of [tablename, alias] pairs
72  Returntype : list of listrefs of strings
73  Exceptions : none
75  Status : Stable
76 
77 =cut
78 
79 sub _tables {
80  my $self = shift;
81 
82  return (['dna_align_feature', 'daf'],['external_db','exdb']);
83 }
84 
85 
86 sub _left_join{
87  return (['external_db',"exdb.external_db_id = daf.external_db_id"]);
88 }
89 
90 =head2 _columns
91 
92  Args : none
93  Example : @columns = $self->_columns
94  Description: PROTECTED implementation of abstract superclass method.
95  Returns a list of columns that are needed for object creation.
96  Returntype : list of strings
97  Exceptions : none
99  Status : Stable
100 
101 =cut
102 
103 sub _columns {
104  my $self = shift;
105 
106  #warning, implementation of _objs_from_sth method depends on order of list
107  return qw(daf.dna_align_feature_id
108  daf.seq_region_id
109  daf.analysis_id
110  daf.seq_region_start
111  daf.seq_region_end
112  daf.seq_region_strand
113  daf.hit_start
114  daf.hit_end
115  daf.hit_name
116  daf.hit_strand
117  daf.cigar_line
118  daf.evalue
119  daf.perc_ident
120  daf.score
121  daf.external_db_id
122  daf.hcoverage
123  daf.align_type
124  exdb.db_name
125  exdb.db_display_name);
126 }
127 
128 
129 =head2 store
130 
131  Arg [1] : list of Bio::EnsEMBL::DnaAlignFeatures @feats
132  the features to store in the database
133  Example : $dna_align_feature_adaptor->store(@features);
134  Description: Stores a list of DnaAlignFeatures in the database
135  Returntype : none
136  Exceptions : throw if any of the provided features cannot be stored
137  which may occur if:
138  * The feature does not have an associate Slice
139  * The feature does not have an associated analysis
140  * The Slice the feature is associated with is on a seq_region
141  unknown to this database
142  A warning is given if:
143  * The feature has already been stored in this db
144  Caller : Pipeline
145  Status : Stable
146 
147 =cut
148 
149 sub store {
150  my ( $self, @feats ) = @_;
151 
152  throw("Must call store with features") if ( scalar(@feats) == 0 );
153 
154  my @tabs = $self->_tables;
155  my ($tablename) = @{ $tabs[0] };
156 
157  my $db = $self->db();
158  my $analysis_adaptor = $db->get_AnalysisAdaptor();
159 
160  my $sth = $self->prepare(
161  "INSERT INTO $tablename (seq_region_id, seq_region_start,
162  seq_region_end, seq_region_strand,
163  hit_start, hit_end, hit_strand, hit_name,
164  cigar_line, analysis_id, score, evalue,
165  perc_ident, external_db_id, hcoverage, align_type)
166  VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" # 16 arguments, external_data removed and align_type added
167  );
168 
169 FEATURE:
170  foreach my $feat (@feats) {
171  if ( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaDnaAlignFeature") )
172  {
173  throw("feature must be a Bio::EnsEMBL::DnaDnaAlignFeature, not a [". ref($feat). "]." );
174  }
175 
176  if ( $feat->is_stored($db) ) {
177  warning( "DnaDnaAlignFeature [".$feat->dbID()."] is already stored in this database." );
178  next FEATURE;
179  }
180 
181  my $hstart = $feat->hstart();
182  my $hend = $feat->hend();
183  my $hstrand = $feat->hstrand();
184  $self->_check_start_end_strand( $hstart, $hend, $hstrand, $feat->hslice );
185 
186  my $cigar_string = $feat->cigar_string();
187  if ( !$cigar_string ) {
188  $cigar_string = $feat->length() . 'M';
189  warning( "DnaDnaAlignFeature does not define a cigar_string.\n"
190  . "Assuming ungapped block with cigar_line=$cigar_string ." );
191  }
192  my $align_type = $feat->align_type();
193 
194  my $hseqname = $feat->hseqname();
195  if ( !$hseqname ) {
196  throw("DnaDnaAlignFeature must define an hseqname.");
197  }
198 
199  if ( !defined( $feat->analysis ) ) {
200  throw(
201  "An analysis must be attached to the features to be stored.");
202  }
203 
204  #store the analysis if it has not been stored yet
205  if ( !$feat->analysis->is_stored($db) ) {
206  $analysis_adaptor->store( $feat->analysis() );
207  }
208 
209  my $original = $feat;
210  my $seq_region_id;
211  ( $feat, $seq_region_id ) = $self->_pre_store($feat);
212 
213  $sth->bind_param( 1, $seq_region_id, SQL_INTEGER );
214  $sth->bind_param( 2, $feat->start, SQL_INTEGER );
215  $sth->bind_param( 3, $feat->end, SQL_INTEGER );
216  $sth->bind_param( 4, $feat->strand, SQL_TINYINT );
217  $sth->bind_param( 5, $hstart, SQL_INTEGER );
218  $sth->bind_param( 6, $hend, SQL_INTEGER );
219  $sth->bind_param( 7, $hstrand, SQL_TINYINT );
220  $sth->bind_param( 8, $hseqname, SQL_VARCHAR );
221  $sth->bind_param( 9, $cigar_string, SQL_LONGVARCHAR );
222  $sth->bind_param( 10, $feat->analysis->dbID, SQL_INTEGER );
223  $sth->bind_param( 11, $feat->score, SQL_DOUBLE );
224  $sth->bind_param( 12, $feat->p_value, SQL_DOUBLE );
225  $sth->bind_param( 13, $feat->percent_id, SQL_FLOAT );
226  $sth->bind_param( 14, $feat->external_db_id, SQL_INTEGER );
227  $sth->bind_param( 15, $feat->hcoverage, SQL_DOUBLE );
228  $sth->bind_param( 16, $feat->align_type, SQL_VARCHAR);
229  $sth->execute();
230 
231  my $dbId = $self->last_insert_id("${tablename}_id", undef, $tablename);
232 
233  # store attributes if there are any
234  my $attr_adaptor = $db->get_AttributeAdaptor();
235  $attr_adaptor->store_on_DnaDnaAlignFeature($dbId, $feat->get_all_Attributes);
236 
237  $original->dbID( $dbId );
238  $original->adaptor($self);
239  } ## end foreach my $feat (@feats)
240 
241  $sth->finish();
242 } ## end sub store
243 
244 
245 =head2 remove
246 
247  Arg [1] : A feature $feature
248  Example : $feature_adaptor->remove($feature);
249  Description: This removes a feature from the database. The table the
250  feature is removed from is defined by the abstract method
251  _tablename, and the primary key of the table is assumed
252  to be _tablename() . '_id'. The feature argument must
253  be an object implementing the dbID method, and for the
254  feature to be removed from the database a dbID value must
255  be returned.
256  Returntype : none
257  Exceptions : thrown if $feature arg does not implement dbID(), or if
258  $feature->dbID is not a true value
259  Caller : general
260  Status : Stable
261 
262 =cut
263 
264 
265 sub remove {
266  my ($self, $feature) = @_;
267 
268  if(!$feature || !ref($feature) || !$feature->isa('Bio::EnsEMBL::Feature')) {
269  throw('Feature argument is required');
270  }
271 
272  if(!$feature->is_stored($self->db)) {
273  throw("This feature is not stored in this database");
274  }
275 
276  my @tabs = $self->_tables;
277  my ($table) = @{$tabs[0]};
278 
279  my $sth = $self->prepare("DELETE FROM $table WHERE ${table}_id = ?");
280  $sth->bind_param(1,$feature->dbID,SQL_INTEGER);
281  $sth->execute();
282 
283  # remove the attributes associated with this feature
284  my $attrib_adaptor = $self->db->get_AttributeAdaptor;
285  $attrib_adaptor->remove_from_DnaDnaAlignFeature($feature);
286 
287  #unset the feature dbID ad adaptor
288  $feature->dbID(undef);
289  $feature->adaptor(undef);
290 
291  return;
292 }
293 
294 
295 =head2 _objs_from_sth
296 
297  Arg [1] : DBI statement handle $sth
298  an exectuted DBI statement handle generated by selecting
299  the columns specified by _columns() from the table specified
300  by _table()
301  Example : @dna_dna_align_feats = $self->_obj_from_hashref
302  Description: PROTECTED implementation of superclass abstract method.
303  Creates DnaDnaAlignFeature objects from a DBI hashref
304  Returntype : listref of Bio::EnsEMBL::DnaDnaAlignFeatures
305  Exceptions : none
306  Caller : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
307  Status : Stable
308 
309 =cut
310 
311 sub _objs_from_sth {
312  my ( $self, $sth, $mapper, $dest_slice ) = @_;
313 
314  #
315  # This code is ugly because an attempt has been made to remove as many
316  # function calls as possible for speed purposes. Thus many caches and
317  # a fair bit of gymnastics is used.
318  #
319 
320  # In case of userdata we need the features on the dest_slice. In case
321  # of get_all_supporting_features dest_slice is not provided.
322  my $sa = ( $dest_slice
323  ? $dest_slice->adaptor()
324  : $self->db()->get_SliceAdaptor() );
325  my $aa = $self->db->get_AnalysisAdaptor();
326 
327  my @features;
328  my %analysis_hash;
329  my %slice_hash;
330  my %sr_name_hash;
331  my %sr_cs_hash;
332 
333  my ( $dna_align_feature_id, $seq_region_id,
334  $analysis_id, $seq_region_start,
335  $seq_region_end, $seq_region_strand,
336  $hit_start, $hit_end,
337  $hit_name, $hit_strand,
338  $cigar_line, $evalue,
339  $perc_ident, $score,
340  $external_db_id, $hcoverage,
341  $align_type,
342  $external_db_name, $external_display_db_name );
343 
344  $sth->bind_columns( \( $dna_align_feature_id, $seq_region_id,
345  $analysis_id, $seq_region_start,
346  $seq_region_end, $seq_region_strand,
347  $hit_start, $hit_end,
348  $hit_name, $hit_strand,
349  $cigar_line, $evalue,
350  $perc_ident, $score,
351  $external_db_id, $hcoverage,
352  $align_type,
353  $external_db_name, $external_display_db_name )
354  );
355 
356  my $dest_slice_start;
357  my $dest_slice_end;
358  my $dest_slice_strand;
359  my $dest_slice_length;
360  my $dest_slice_cs;
361  my $dest_slice_sr_name;
362  my $dest_slice_sr_id;
363  my $asma;
364 
365  if ($dest_slice) {
366  $dest_slice_start = $dest_slice->start();
367  $dest_slice_end = $dest_slice->end();
368  $dest_slice_strand = $dest_slice->strand();
369  $dest_slice_length = $dest_slice->length();
370  $dest_slice_cs = $dest_slice->coord_system();
371  $dest_slice_sr_name = $dest_slice->seq_region_name();
372  $dest_slice_sr_id = $dest_slice->get_seq_region_id();
373  $asma = $self->db->get_AssemblyMapperAdaptor();
374  }
375 
376  FEATURE: while($sth->fetch()) {
377 
378  #get the analysis object
379  my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
380  $analysis_hash{$analysis_id} = $analysis;
381 
382  #need to get the internal_seq_region, if present
383  $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
384  my $slice = $slice_hash{"ID:".$seq_region_id};
385 
386  if (!$slice) {
387  $slice = $sa->fetch_by_seq_region_id($seq_region_id);
388  $slice_hash{"ID:".$seq_region_id} = $slice;
389  $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
390  $sr_cs_hash{$seq_region_id} = $slice->coord_system();
391  }
392 
393  #obtain a mapper if none was defined, but a dest_seq_region was
394  if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
395  $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
396  }
397 
398  my $sr_name = $sr_name_hash{$seq_region_id};
399  my $sr_cs = $sr_cs_hash{$seq_region_id};
400 
401  #
402  # remap the feature coordinates to another coord system
403  # if a mapper was provided
404  #
405 
406  if ($mapper) {
407 
408  if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
409  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
410  $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
411 
412  } else {
413  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
414  $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs);
415  }
416 
417  #skip features that map to gaps or coord system boundaries
418  next FEATURE if (!defined($seq_region_id));
419 
420  #get a slice in the coord system we just mapped to
421  $slice = $slice_hash{"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
422  }
423 
424  #
425  # If a destination slice was provided convert the coords.
426  #
427  if (defined($dest_slice)) {
428  my $seq_region_len = $dest_slice->seq_region_length();
429 
430  if ( $dest_slice_strand == 1 ) {
431  $seq_region_start = $seq_region_start - $dest_slice_start + 1;
432  $seq_region_end = $seq_region_end - $dest_slice_start + 1;
433 
434  if ( $dest_slice->is_circular ) {
435  # Handle circular chromosomes.
436 
437  if ( $seq_region_start > $seq_region_end ) {
438  # Looking at a feature overlapping the chromosome origin.
439 
440  if ( $seq_region_end > $dest_slice_start ) {
441  # Looking at the region in the beginning of the chromosome
442  $seq_region_start -= $seq_region_len;
443  }
444  if ( $seq_region_end < 0 ) {
445  $seq_region_end += $seq_region_len;
446  }
447  } else {
448  if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
449  # Looking at the region overlapping the chromosome
450  # origin and a feature which is at the beginning of the
451  # chromosome.
452  $seq_region_start += $seq_region_len;
453  $seq_region_end += $seq_region_len;
454  }
455  }
456  }
457  } else {
458 
459  my $start = $dest_slice_end - $seq_region_end + 1;
460  my $end = $dest_slice_end - $seq_region_start + 1;
461 
462  if ($dest_slice->is_circular()) {
463 
464  if ($dest_slice_start > $dest_slice_end) {
465  # slice spans origin or replication
466 
467  if ($seq_region_start >= $dest_slice_start) {
468  $end += $seq_region_len;
469  $start += $seq_region_len if $seq_region_end > $dest_slice_start;
470 
471  } elsif ($seq_region_start <= $dest_slice_end) {
472  # do nothing
473  } elsif ($seq_region_end >= $dest_slice_start) {
474  $start += $seq_region_len;
475  $end += $seq_region_len;
476 
477  } elsif ($seq_region_end <= $dest_slice_end) {
478  $end += $seq_region_len if $end < 0;
479 
480  } elsif ($seq_region_start > $seq_region_end) {
481  $end += $seq_region_len;
482  }
483 
484  } else {
485 
486  if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
487  # do nothing
488  } elsif ($seq_region_start > $seq_region_end) {
489  if ($seq_region_start <= $dest_slice_end) {
490  $start -= $seq_region_len;
491  } elsif ($seq_region_end >= $dest_slice_start) {
492  $end += $seq_region_len;
493  }
494  }
495  }
496  }
497 
498  $seq_region_start = $start;
499  $seq_region_end = $end;
500  $seq_region_strand *= -1;
501 
502  } ## end else [ if ( $dest_slice_strand...)]
503 
504  # Throw away features off the end of the requested slice or on
505  # different seq_region.
506  if ($seq_region_end < 1
507  || $seq_region_start > $dest_slice_length
508  || ($dest_slice_sr_id != $seq_region_id)) {
509  next FEATURE;
510  }
511  $slice = $dest_slice;
512  }
513 
514  # Finally, create the new DnaAlignFeature.
515  push( @features,
516  $self->_create_feature_fast(
517  'Bio::EnsEMBL::DnaDnaAlignFeature', {
518  'slice' => $slice,
519  'start' => $seq_region_start,
520  'end' => $seq_region_end,
521  'strand' => $seq_region_strand,
522  'hseqname' => $hit_name,
523  'hstart' => $hit_start,
524  'hend' => $hit_end,
525  'hstrand' => $hit_strand,
526  'score' => $score,
527  'p_value' => $evalue,
528  'percent_id' => $perc_ident,
529  'cigar_string' => $cigar_line,
530  'analysis' => $analysis,
531  'adaptor' => $self,
532  'dbID' => $dna_align_feature_id,
533  'external_db_id' => $external_db_id,
534  'hcoverage' => $hcoverage,
535  'align_type' => $align_type,
536  'dbname' => $external_db_name,
537  'db_display_name' => $external_display_db_name
538  } ) );
539 
540  } ## end while ( $sth->fetch() )
541 
542  return \@features;
543 } ## end sub _objs_from_sth
544 
545 =head2 list_dbIDs
546 
547  Arg [1] : none
548  Example : @feature_ids = @{$dna_align_feature_adaptor->list_dbIDs()};
549  Description: Gets an array of internal ids for all dna align features in
550  the current db
551  Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
552  Returntype : list of ints
553  Exceptions : none
554  Caller : ?
555  Status : Stable
556 
557 =cut
558 
559 sub list_dbIDs {
560  my ($self, $ordered) = @_;
561 
562  return $self->_list_dbIDs("dna_align_feature",undef, $ordered);
563 }
564 
565 1;
566 
567 
EnsEMBL
Definition: Filter.pm:1
Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
Definition: BaseFeatureAdaptor.pm:24
Bio::EnsEMBL::DBSQL::BaseAdaptor::generic_fetch
public Listref generic_fetch()
Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::fetch_all_by_Slice
public Listref fetch_all_by_Slice()
Bio::EnsEMBL::DnaDnaAlignFeature
Definition: DnaDnaAlignFeature.pm:15
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68
Bio::EnsEMBL::DBSQL::DnaAlignFeatureAdaptor
Definition: DnaAlignFeatureAdaptor.pm:25
Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor
Definition: BaseAlignFeatureAdaptor.pm:18