ensembl-hive  2.8.1
ProteinAlignFeatureAdaptor.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
34 Adaptor for ProteinAlignFeatures
35 
36 =head1 SYNOPSIS
37 
38  $pafa =
39  $registry->get_adaptor( 'Human', 'Core', 'ProteinAlignFeature' );
40 
41  my @features = @{ $pafa->fetch_all_by_Slice($slice) };
42 
43  $pafa->store(@features);
44 
45 =head1 METHODS
46 
47 =cut
48 
49 
50 package Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor;
51 
52 use vars qw(@ISA);
53 use strict;
54 
57 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
59 
60 
61 =head2 store
62 
63  Arg [1] : list of Bio::EnsEMBL::DnaPepAlignFeature @feats
64  Example : $protein_align_feature_adaptor->store(@feats);
65  Description: stores a list of ProteinAlignFeatures in the database
66  Returntype : none
67  Exceptions : throw if any of the provided features cannot be stored
68  which may occur if:
69  * The feature does not have an associated Slice
70  * The feature does not have an associated analysis
71  * The Slice the feature is associated with is on a seq_region
72  unknown to this database
73  A warning is given if:
74  * The feature has already been stored in this db
75  Caller : Pipeline
76  Status : Stable
77 
78 =cut
79 
80 
81 sub store{
82  my ($self, @feats) = @_;
83 
84  throw("Must call store with features") if( scalar(@feats) == 0 );
85 
86  my @tabs = $self->_tables;
87  my ($tablename) = @{$tabs[0]};
88 
89  my $db = $self->db();
90  my $slice_adaptor = $db->get_SliceAdaptor();
91  my $analysis_adaptor = $db->get_AnalysisAdaptor();
92 
93  my $sth = $self->prepare(
94  "INSERT INTO $tablename (seq_region_id, seq_region_start, seq_region_end,
95  seq_region_strand, hit_start, hit_end,
96  hit_name, cigar_line,
97  analysis_id, score, evalue, perc_ident, external_db_id, hcoverage, align_type)
98  VALUES (?,?,?,?,?,?,?,?,?,?, ?, ?, ?, ?,?)");
99 
100  FEATURE: foreach my $feat ( @feats ) {
101  if( !ref $feat || !$feat->isa("Bio::EnsEMBL::DnaPepAlignFeature") ) {
102  throw("feature must be a Bio::EnsEMBL::DnaPepAlignFeature,"
103  . " not a [".ref($feat)."].");
104  }
105 
106  if($feat->is_stored($db)) {
107  warning("PepDnaAlignFeature [".$feat->dbID."] is already stored" .
108  " in this database.");
109  next FEATURE;
110  }
111 
112  #sanity check the hstart and hend
113  my $hstart = $feat->hstart();
114  my $hend = $feat->hend();
115  $self->_check_start_end_strand($hstart,$hend,1,$feat->hslice);
116 
117  my $cigar_string = $feat->cigar_string();
118  if(!$cigar_string) {
119  $cigar_string = $feat->length() . 'M';
120  warning("DnaPepAlignFeature does not define a cigar_string.\n" .
121  "Assuming ungapped block with cigar_string=$cigar_string\n");
122  }
123 
124  my $hseqname = $feat->hseqname();
125  if(!$hseqname) {
126  throw("DnaPepAlignFeature must define an hseqname.");
127  }
128 
129  if(!defined($feat->analysis)) {
130  throw("An analysis must be attached to the features to be stored.");
131  }
132 
133  #store the analysis if it has not been stored yet
134  if(!$feat->analysis->is_stored($db)) {
135  $analysis_adaptor->store($feat->analysis());
136  }
137 
138  my $slice = $feat->slice();
139  if(!defined($slice) || !($slice->isa("Bio::EnsEMBL::Slice") or $slice->isa('Bio::EnsEMBL::LRGSlice')) ) {
140  throw("A slice must be attached to the features to be stored.");
141  }
142 
143  my $original = $feat;
144  my $seq_region_id;
145  ($feat, $seq_region_id) = $self->_pre_store($feat);
146 
147  $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
148  $sth->bind_param(2,$feat->start,SQL_INTEGER);
149  $sth->bind_param(3,$feat->end,SQL_INTEGER);
150  $sth->bind_param(4,$feat->strand,SQL_TINYINT);
151  $sth->bind_param(5,$feat->hstart,SQL_INTEGER);
152  $sth->bind_param(6,$feat->hend,SQL_INTEGER);
153  $sth->bind_param(7,$feat->hseqname,SQL_VARCHAR);
154  $sth->bind_param(8,$feat->cigar_string,SQL_LONGVARCHAR);
155  $sth->bind_param(9,$feat->analysis->dbID,SQL_INTEGER);
156  $sth->bind_param(10,$feat->score,SQL_DOUBLE);
157  $sth->bind_param(11,$feat->p_value,SQL_DOUBLE);
158  $sth->bind_param(12,$feat->percent_id,SQL_REAL);
159  $sth->bind_param(13,$feat->external_db_id,SQL_INTEGER);
160  $sth->bind_param(14,$feat->hcoverage,SQL_DOUBLE);
161  $sth->bind_param(15,$feat->align_type,SQL_VARCHAR);
162 
163  $sth->execute();
164  my $dbId = $self->last_insert_id("${tablename}_id", undef, $tablename);
165  $original->dbID($dbId);
166  $original->adaptor($self);
167  }
168 
169  $sth->finish();
170 }
171 
172 
173 =head2 _objs_from_sth
174 
175  Arg [1] : DBI statement handle $sth
176  an exectuted DBI statement handle generated by selecting
177  the columns specified by _columns() from the table specified
178  by _table()
179  Example : @dna_dna_align_feats = $self->_obj_from_hashref
180  Description: PROTECTED implementation of superclass abstract method.
181  Creates DnaDnaAlignFeature objects from a DBI hashref
182  Returntype : listref of Bio::EnsEMBL::ProteinAlignFeatures
183  Exceptions : none
184  Caller : Bio::EnsEMBL::BaseFeatureAdaptor::generic_fetch
185  Status : Stable
186 
187 =cut
188 
189 sub _objs_from_sth {
190  my ($self, $sth, $mapper, $dest_slice) = @_;
191 
192  #
193  # This code is ugly because an attempt has been made to remove as many
194  # function calls as possible for speed purposes. Thus many caches and
195  # a fair bit of gymnastics is used.
196  #
197 
198  my $sa = $self->db()->get_SliceAdaptor();
199  my $aa = $self->db()->get_AnalysisAdaptor();
200 
201  my @features;
202  my %analysis_hash;
203  my %slice_hash;
204  my %sr_name_hash;
205  my %sr_cs_hash;
206 
207  my (
208  $protein_align_feature_id, $seq_region_id, $seq_region_start,
209  $seq_region_end, $analysis_id, $seq_region_strand,
210  $hit_start, $hit_end, $hit_name,
211  $cigar_line, $evalue, $perc_ident,
212  $score, $external_db_id, $hcoverage, $align_type,
213  $external_db_name, $external_display_db_name );
214 
215  $sth->bind_columns(\(
216  $protein_align_feature_id, $seq_region_id, $seq_region_start,
217  $seq_region_end, $analysis_id, $seq_region_strand,
218  $hit_start, $hit_end, $hit_name,
219  $cigar_line, $evalue, $perc_ident,
220  $score, $external_db_id, $hcoverage, $align_type,
221  $external_db_name, $external_display_db_name ));
222 
223  my $dest_slice_start;
224  my $dest_slice_end;
225  my $dest_slice_strand;
226  my $dest_slice_length;
227  my $dest_slice_cs;
228  my $dest_slice_sr_name;
229  my $dest_slice_sr_id;
230  my $asma;
231 
232  if ($dest_slice) {
233  $dest_slice_start = $dest_slice->start();
234  $dest_slice_end = $dest_slice->end();
235  $dest_slice_strand = $dest_slice->strand();
236  $dest_slice_length = $dest_slice->length();
237  $dest_slice_cs = $dest_slice->coord_system();
238  $dest_slice_sr_name = $dest_slice->seq_region_name();
239  $dest_slice_sr_id = $dest_slice->get_seq_region_id();
240  $asma = $self->db->get_AssemblyMapperAdaptor();
241  }
242 
243  FEATURE: while($sth->fetch()) {
244 
245  #get the analysis object
246  my $analysis = $analysis_hash{$analysis_id} ||= $aa->fetch_by_dbID($analysis_id);
247  $analysis_hash{$analysis_id} = $analysis;
248 
249  #need to get the internal_seq_region, if present
250  $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
251  my $slice = $slice_hash{"ID:".$seq_region_id};
252 
253  if (!$slice) {
254  $slice = $sa->fetch_by_seq_region_id($seq_region_id);
255  $slice_hash{"ID:".$seq_region_id} = $slice;
256  $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
257  $sr_cs_hash{$seq_region_id} = $slice->coord_system();
258  }
259 
260  #obtain a mapper if none was defined, but a dest_seq_region was
261  if(!$mapper && $dest_slice && !$dest_slice_cs->equals($slice->coord_system)) {
262  $mapper = $asma->fetch_by_CoordSystems($dest_slice_cs, $slice->coord_system);
263  }
264 
265  my $sr_name = $sr_name_hash{$seq_region_id};
266  my $sr_cs = $sr_cs_hash{$seq_region_id};
267 
268  #
269  # remap the feature coordinates to another coord system
270  # if a mapper was provided
271  #
272 
273  if ($mapper) {
274 
275  if (defined $dest_slice && $mapper->isa('Bio::EnsEMBL::ChainedAssemblyMapper') ) {
276  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
277  $mapper->map($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs, 1, $dest_slice);
278 
279  } else {
280  ($seq_region_id, $seq_region_start, $seq_region_end, $seq_region_strand) =
281  $mapper->fastmap($sr_name, $seq_region_start, $seq_region_end, $seq_region_strand, $sr_cs);
282  }
283 
284  #skip features that map to gaps or coord system boundaries
285  next FEATURE if (!defined($seq_region_id));
286 
287  #get a slice in the coord system we just mapped to
288  $slice = $slice_hash{"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id);
289  }
290 
291  #
292  # If a destination slice was provided convert the coords.
293  #
294  if (defined($dest_slice)) {
295  my $seq_region_len = $dest_slice->seq_region_length();
296 
297  if ( $dest_slice_strand == 1 ) {
298  $seq_region_start = $seq_region_start - $dest_slice_start + 1;
299  $seq_region_end = $seq_region_end - $dest_slice_start + 1;
300 
301  if ( $dest_slice->is_circular ) {
302  # Handle circular chromosomes.
303 
304  if ( $seq_region_start > $seq_region_end ) {
305  # Looking at a feature overlapping the chromosome origin.
306 
307  if ( $seq_region_end > $dest_slice_start ) {
308  # Looking at the region in the beginning of the chromosome
309  $seq_region_start -= $seq_region_len;
310  }
311  if ( $seq_region_end < 0 ) {
312  $seq_region_end += $seq_region_len;
313  }
314  } else {
315  if ($dest_slice_start > $dest_slice_end && $seq_region_end < 0) {
316  # Looking at the region overlapping the chromosome
317  # origin and a feature which is at the beginning of the
318  # chromosome.
319  $seq_region_start += $seq_region_len;
320  $seq_region_end += $seq_region_len;
321  }
322  }
323  }
324  } else {
325 
326  my $start = $dest_slice_end - $seq_region_end + 1;
327  my $end = $dest_slice_end - $seq_region_start + 1;
328 
329  if ($dest_slice->is_circular()) {
330 
331  if ($dest_slice_start > $dest_slice_end) {
332  # slice spans origin or replication
333 
334  if ($seq_region_start >= $dest_slice_start) {
335  $end += $seq_region_len;
336  $start += $seq_region_len if $seq_region_end > $dest_slice_start;
337 
338  } elsif ($seq_region_start <= $dest_slice_end) {
339  # do nothing
340  } elsif ($seq_region_end >= $dest_slice_start) {
341  $start += $seq_region_len;
342  $end += $seq_region_len;
343 
344  } elsif ($seq_region_end <= $dest_slice_end) {
345  $end += $seq_region_len if $end < 0;
346 
347  } elsif ($seq_region_start > $seq_region_end) {
348  $end += $seq_region_len;
349  }
350 
351  } else {
352 
353  if ($seq_region_start <= $dest_slice_end and $seq_region_end >= $dest_slice_start) {
354  # do nothing
355  } elsif ($seq_region_start > $seq_region_end) {
356  if ($seq_region_start <= $dest_slice_end) {
357  $start -= $seq_region_len;
358  } elsif ($seq_region_end >= $dest_slice_start) {
359  $end += $seq_region_len;
360  }
361  }
362  }
363  }
364 
365  $seq_region_start = $start;
366  $seq_region_end = $end;
367  $seq_region_strand *= -1;
368 
369  } ## end else [ if ( $dest_slice_strand...)]
370 
371  # Throw away features off the end of the requested slice or on
372  # different seq_region.
373  if ($seq_region_end < 1
374  || $seq_region_start > $dest_slice_length
375  || ($dest_slice_sr_id != $seq_region_id)) {
376  next FEATURE;
377  }
378  $slice = $dest_slice;
379  }
380 
381  # Finally, create the new ProteinAlignFeature.
382  push(
383  @features,
384  $self->_create_feature_fast(
385  'Bio::EnsEMBL::DnaPepAlignFeature', {
386  'slice' => $slice,
387  'start' => $seq_region_start,
388  'end' => $seq_region_end,
389  'strand' => $seq_region_strand,
390  'hseqname' => $hit_name,
391  'hstart' => $hit_start,
392  'hend' => $hit_end,
393  'hstrand' => 1, # dna_pep_align features
394  # are always hstrand 1
395  'score' => $score,
396  'p_value' => $evalue,
397  'percent_id' => $perc_ident,
398  'cigar_string' => $cigar_line,
399  'analysis' => $analysis,
400  'adaptor' => $self,
401  'dbID' => $protein_align_feature_id,
402  'external_db_id' => $external_db_id,
403  'hcoverage' => $hcoverage,
404  'align_type' => $align_type,
405  'dbname' => $external_db_name,
406  'db_display_name' => $external_display_db_name
407  } ) );
408 
409  }
410 
411  return \@features;
412 }
413 
414 
415 
416 sub _tables {
417  my $self = shift;
418 
419  return (['protein_align_feature', 'paf'], ['external_db','exdb']);
420 }
421 
422 
423 sub _columns {
424  my $self = shift;
425 
426  #warning _objs_from_hashref method depends on ordering of this list
427  return qw( paf.protein_align_feature_id
428  paf.seq_region_id
429  paf.seq_region_start
430  paf.seq_region_end
431  paf.analysis_id
432  paf.seq_region_strand
433  paf.hit_start
434  paf.hit_end
435  paf.hit_name
436  paf.cigar_line
437  paf.evalue
438  paf.perc_ident
439  paf.score
440  paf.external_db_id
441  paf.hcoverage
442  paf.align_type
443  exdb.db_name
444  exdb.db_display_name);
445 }
446 
447 sub _left_join{
448  return (['external_db',"exdb.external_db_id = paf.external_db_id"]);
449 }
450 
451 =head2 list_dbIDs
452 
453  Arg [1] : none
454  Example : @feature_ids = @{$protein_align_feature_adaptor->list_dbIDs()};
455  Description: Gets an array of internal ids for all protein align
456  features in the current db
457  Arg[1] : <optional> int. not 0 for the ids to be sorted by the seq_region.
458  Returntype : listref of ints
459  Exceptions : none
460  Caller : ?
461  Status : Stable
462 
463 =cut
464 
465 sub list_dbIDs {
466  my ($self,$ordered) = @_;
467 
468  return $self->_list_dbIDs("protein_align_feature", undef, $ordered);
469 }
470 
471 
472 
473 
474 
475 
476 1;
Bio::EnsEMBL::DnaPepAlignFeature
Definition: DnaPepAlignFeature.pm:12
EnsEMBL
Definition: Filter.pm:1
Bio::EnsEMBL::DBSQL::ProteinAlignFeatureAdaptor
Definition: ProteinAlignFeatureAdaptor.pm:18
Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor::fetch_all_by_Slice
public Listref fetch_all_by_Slice()
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68
Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor
Definition: BaseAlignFeatureAdaptor.pm:18