ensembl-hive  2.7.0
ProteinAlignFeatureAdaptor.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
34 Adaptor for ProteinAlignFeatures
35 
36 =head1 SYNOPSIS
37 
38  $pafa =
39  $registry->get_adaptor( 'Human', 'Core', 'ProteinAlignFeature' );
40 
41  my @features = @{ $pafa->fetch_all_by_Slice($slice) };
42 
43  $pafa->store(@features);
44 
45 =head1 METHODS
46 
47 =cut
48 
49 
50 package Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor;
51 use vars qw(@ISA);
52 use strict;
53 
56 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
58 
59 
60 =head2 store
61 
62  Arg [1] : list of Bio::EnsEMBL::DnaPepAlignFeature @feats
63  Example : $protein_align_feature_adaptor->store(@feats);
64  Description: stores a list of ProteinAlignFeatures in the database
65  Returntype : none
66  Exceptions : throw if any of the provided features cannot be stored
67  which may occur if:
68  * The feature does not have an associated Slice
69  * The feature does not have an associated analysis
70  * The Slice the feature is associated with is on a seq_region
71  unknown to this database
72  A warning is given if:
73  * The feature has already been stored in this db
74  Caller : Pipeline
75  Status : Stable
76 
77 =cut
78 
79 
80 sub store{
81  my ($self, @feats) = @_;
82 
83  throw("Must call store with features") if( scalar(@feats) == 0 );
84 
85  my @tabs = $self->_tables;
86  my ($tablename) = @{$tabs[0]};
87 
88  my $db = $self->db();
89  my $slice_adaptor = $db->get_SliceAdaptor();
90  my $analysis_adaptor = $db->get_AnalysisAdaptor();
91 
92  my $sth = $self->prepare(
93  "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
94  seq_region_strand, hit_start, hit_end,
95  hit_name, cigar_line,
96  analysis_id, score, evalue, perc_ident, external_db_id, hcoverage, align_type)
97  VALUES (?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?,?)");
98 
99  FEATURE: foreach my $feat ( @feats ) {
100  if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaPepAlignFeature") ) {
101  throw("feature must be a Bio::EnsEMBL::DnaPepAlignFeature,"
102  . " not a [".ref($feat)."].");
103  }
104 
105  if($feat->is_stored($db)) {
106  warning("PepDnaAlignFeature [".$feat->dbID."] is already stored" .
107  " in this database.");
108  next FEATURE;
109  }
110 
111  #sanity check the hstart and hend
112  my $hstart = $feat->hstart();
113  my $hend = $feat->hend();
114  $self->_check_start_end_strand($hstart,$hend,1,$feat->hslice);
115 
116  my $cigar_string = $feat->cigar_string();
117  if(!$cigar_string) {
118  $cigar_string = $feat->length() . 'M';
119  warning("DnaPepAlignFeature does not define a cigar_string.\n" .
120  "Assuming ungapped block with cigar_string=$cigar_string\n");
121  }
122 
123  my $hseqname = $feat->hseqname();
124  if(!$hseqname) {
125  throw("DnaPepAlignFeature must define an hseqname.");
126  }
127 
128  if(!defined($feat->analysis)) {
129  throw("An analysis must be attached to the features to be stored.");
130  }
131 
132  #store the analysis if it has not been stored yet
133  if(!$feat->analysis->is_stored($db)) {
134  $analysis_adaptor->store($feat->analysis());
135  }
136 
137  my $slice = $feat->slice();
138  if(!defined($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
139  throw("A slice must be attached to the features to be stored.");
140  }
141 
142  my $original = $feat;
143  my $seq_region_id;
144  ($feat, $seq_region_id) = $self->_pre_store($feat);
145 
146  $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
147  $sth->bind_param(2,$feat->start,SQL_INTEGER);
148  $sth->bind_param(3,$feat->end,SQL_INTEGER);
149  $sth->bind_param(4,$feat->strand,SQL_TINYINT);
150  $sth->bind_param(5,$feat->hstart,SQL_INTEGER);
151  $sth->bind_param(6,$feat->hend,SQL_INTEGER);
152  $sth->bind_param(7,$feat->hseqname,SQL_VARCHAR);
153  $sth->bind_param(8,$feat->cigar_string,SQL_LONGVARCHAR);
154  $sth->bind_param(9,$feat->analysis->dbID,SQL_INTEGER);
155  $sth->bind_param(10,$feat->score,SQL_DOUBLE);
156  $sth->bind_param(11,$feat->p_value,SQL_DOUBLE);
157  $sth->bind_param(12,$feat->percent_id,SQL_REAL);
158  $sth->bind_param(13,$feat->external_db_id,SQL_INTEGER);
159  $sth->bind_param(14,$feat->hcoverage,SQL_DOUBLE);
160  $sth->bind_param(15,$feat->align_type,SQL_VARCHAR);
161 
162  $sth->execute();
163  my $dbId = $self->last_insert_id("${tablename}_id", undef, $tablename);
164  $original->dbID($dbId);
165  $original->adaptor($self);
166  }
167 
168  $sth->finish();
169 }
170 
171 
172 =head2 _objs_from_sth
173 
174  Arg [1] : DBI statement handle $sth
175  an exectuted DBI statement handle generated by selecting
176  the columns specified by _columns() from the table specified
177  by _table()
178  Example : @dna_dna_align_feats = $self->_obj_from_hashref
179  Description: PROTECTED implementation of superclass abstract method.
180  Creates DnaDnaAlignFeature objects from a DBI hashref
181  Returntype : listref of Bio::EnsEMBL::ProteinAlignFeatures
182  Exceptions : none
183  Caller : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
184  Status : Stable
185 
186 =cut
187 
188 sub _objs_from_sth {
189  my ($self, $sth, $mapper, $dest_slice) = @_;
190 
191  #
192  # This code is ugly because an attempt has been made to remove as many
193  # function calls as possible for speed purposes. Thus many caches and
194  # a fair bit of gymnastics is used.
195  #
196 
197  my $sa = $self->db()->get_SliceAdaptor();
198  my $aa = $self->db()->get_AnalysisAdaptor();
199 
200  my @features;
201  my %analysis_hash;
202  my %slice_hash;
203  my %sr_name_hash;
204  my %sr_cs_hash;
205 
206  my (
207  $protein_align_feature_id, $seq_region_id, $seq_region_start,
208  $seq_region_end, $analysis_id, $seq_region_strand,
209  $hit_start, $hit_end, $hit_name,
210  $cigar_line, $evalue, $perc_ident,
211  $score, $external_db_id, $hcoverage, $align_type,
212  $external_db_name, $external_display_db_name );
213 
214  $sth->bind_columns(\(
215  $protein_align_feature_id, $seq_region_id, $seq_region_start,
216  $seq_region_end, $analysis_id, $seq_region_strand,
217  $hit_start, $hit_end, $hit_name,
218  $cigar_line, $evalue, $perc_ident,
219  $score, $external_db_id, $hcoverage, $align_type,
220  $external_db_name, $external_display_db_name ));
221 
222  my $dest_slice_start;
223  my $dest_slice_end;
224  my $dest_slice_strand;
225  my $dest_slice_length;
226  my $dest_slice_cs;
227  my $dest_slice_sr_name;
228  my $dest_slice_sr_id;
229  my $asma;
230 
231  if ($dest_slice) {
232  $dest_slice_start = $dest_slice->start();
233  $dest_slice_end = $dest_slice->end();
234  $dest_slice_strand = $dest_slice->strand();
235  $dest_slice_length = $dest_slice->length();
236  $dest_slice_cs = $dest_slice->coord_system();
237  $dest_slice_sr_name = $dest_slice->seq_region_name();
238  $dest_slice_sr_id = $dest_slice->get_seq_region_id();
239  $asma = $self->db->get_AssemblyMapperAdaptor();
240  }
241 
242  FEATURE: while($sth->fetch()) {
243 
244  #get the analysis object
245  my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
246  $analysis_hash{$analysis_id} = $analysis;
247 
248  #need to get the internal_seq_region, if present
249  $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
250  my $slice = $slice_hash{"ID:".$seq_region_id};
251 
252  if (!$slice) {
253  $slice = $sa->fetch_by_seq_region_id($seq_region_id);
254  $slice_hash{"ID:".$seq_region_id} = $slice;
255  $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
256  $sr_cs_hash{$seq_region_id} = $slice->coord_system();
257  }
258 
259  #obtain a mapper if none was defined, but a dest_seq_region was
260  if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
261  $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
262  }
263 
264  my $sr_name = $sr_name_hash{$seq_region_id};
265  my $sr_cs = $sr_cs_hash{$seq_region_id};
266 
267  #
268  # remap the feature coordinates to another coord system
269  # if a mapper was provided
270  #
271 
272  if ($mapper) {
273 
274  if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
275  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
276  $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
277 
278  } else {
279  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
280  $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs);
281  }
282 
283  #skip features that map to gaps or coord system boundaries
284  next FEATURE if (!defined($seq_region_id));
285 
286  #get a slice in the coord system we just mapped to
287  $slice = $slice_hash{"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
288  }
289 
290  #
291  # If a destination slice was provided convert the coords.
292  #
293  if (defined($dest_slice)) {
294  my $seq_region_len = $dest_slice->seq_region_length();
295 
296  if ( $dest_slice_strand == 1 ) {
297  $seq_region_start = $seq_region_start - $dest_slice_start + 1;
298  $seq_region_end = $seq_region_end - $dest_slice_start + 1;
299 
300  if ( $dest_slice->is_circular ) {
301  # Handle circular chromosomes.
302 
303  if ( $seq_region_start > $seq_region_end ) {
304  # Looking at a feature overlapping the chromosome origin.
305 
306  if ( $seq_region_end > $dest_slice_start ) {
307  # Looking at the region in the beginning of the chromosome
308  $seq_region_start -= $seq_region_len;
309  }
310  if ( $seq_region_end < 0 ) {
311  $seq_region_end += $seq_region_len;
312  }
313  } else {
314  if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
315  # Looking at the region overlapping the chromosome
316  # origin and a feature which is at the beginning of the
317  # chromosome.
318  $seq_region_start += $seq_region_len;
319  $seq_region_end += $seq_region_len;
320  }
321  }
322  }
323  } else {
324 
325  my $start = $dest_slice_end - $seq_region_end + 1;
326  my $end = $dest_slice_end - $seq_region_start + 1;
327 
328  if ($dest_slice->is_circular()) {
329 
330  if ($dest_slice_start > $dest_slice_end) {
331  # slice spans origin or replication
332 
333  if ($seq_region_start >= $dest_slice_start) {
334  $end += $seq_region_len;
335  $start += $seq_region_len if $seq_region_end > $dest_slice_start;
336 
337  } elsif ($seq_region_start <= $dest_slice_end) {
338  # do nothing
339  } elsif ($seq_region_end >= $dest_slice_start) {
340  $start += $seq_region_len;
341  $end += $seq_region_len;
342 
343  } elsif ($seq_region_end <= $dest_slice_end) {
344  $end += $seq_region_len if $end < 0;
345 
346  } elsif ($seq_region_start > $seq_region_end) {
347  $end += $seq_region_len;
348  }
349 
350  } else {
351 
352  if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
353  # do nothing
354  } elsif ($seq_region_start > $seq_region_end) {
355  if ($seq_region_start <= $dest_slice_end) {
356  $start -= $seq_region_len;
357  } elsif ($seq_region_end >= $dest_slice_start) {
358  $end += $seq_region_len;
359  }
360  }
361  }
362  }
363 
364  $seq_region_start = $start;
365  $seq_region_end = $end;
366  $seq_region_strand *= -1;
367 
368  } ## end else [ if ( $dest_slice_strand...)]
369 
370  # Throw away features off the end of the requested slice or on
371  # different seq_region.
372  if ($seq_region_end < 1
373  || $seq_region_start > $dest_slice_length
374  || ($dest_slice_sr_id != $seq_region_id)) {
375  next FEATURE;
376  }
377  $slice = $dest_slice;
378  }
379 
380  # Finally, create the new ProteinAlignFeature.
381  push(
382  @features,
383  $self->_create_feature_fast(
384  'Bio::EnsEMBL::DnaPepAlignFeature', {
385  'slice' => $slice,
386  'start' => $seq_region_start,
387  'end' => $seq_region_end,
388  'strand' => $seq_region_strand,
389  'hseqname' => $hit_name,
390  'hstart' => $hit_start,
391  'hend' => $hit_end,
392  'hstrand' => 1, # dna_pep_align features
393  # are always hstrand 1
394  'score' => $score,
395  'p_value' => $evalue,
396  'percent_id' => $perc_ident,
397  'cigar_string' => $cigar_line,
398  'analysis' => $analysis,
399  'adaptor' => $self,
400  'dbID' => $protein_align_feature_id,
401  'external_db_id' => $external_db_id,
402  'hcoverage' => $hcoverage,
403  'align_type' => $align_type,
404  'dbname' => $external_db_name,
405  'db_display_name' => $external_display_db_name
406  } ) );
407 
408  }
409 
410  return \@features;
411 }
412 
413 
414 
415 sub _tables {
416  my $self = shift;
417 
418  return (['protein_align_feature', 'paf'], ['external_db','exdb']);
419 }
420 
421 
422 sub _columns {
423  my $self = shift;
424 
425  #warning _objs_from_hashref method depends on ordering of this list
426  return qw( paf.protein_align_feature_id
427  paf.seq_region_id
428  paf.seq_region_start
429  paf.seq_region_end
430  paf.analysis_id
431  paf.seq_region_strand
432  paf.hit_start
433  paf.hit_end
434  paf.hit_name
435  paf.cigar_line
436  paf.evalue
437  paf.perc_ident
438  paf.score
439  paf.external_db_id
440  paf.hcoverage
441  paf.align_type
442  exdb.db_name
443  exdb.db_display_name);
444 }
445 
446 sub _left_join{
447  return (['external_db',"exdb.external_db_id = paf.external_db_id"]);
448 }
449 
450 =head2 list_dbIDs
451 
452  Arg [1] : none
453  Example : @feature_ids = @{$protein_align_feature_adaptor->list_dbIDs()};
454  Description: Gets an array of internal ids for all protein align
455  features in the current db
456  Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
457  Returntype : listref of ints
458  Exceptions : none
459  Caller : ?
460  Status : Stable
461 
462 =cut
463 
464 sub list_dbIDs {
465  my ($self,$ordered) = @_;
466 
467  return $self->_list_dbIDs("protein_align_feature", undef, $ordered);
468 }
469 
470 
471 
472 
473 
474 
475 1;
Bio::EnsEMBL::DnaPepAlignFeature
Definition: DnaPepAlignFeature.pm:12
EnsEMBL
Definition: Filter.pm:1
Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor
Definition: ProteinAlignFeatureAdaptor.pm:18
Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::fetch_all_by_Slice
public Listref fetch_all_by_Slice()
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68
Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor
Definition: BaseAlignFeatureAdaptor.pm:18