ensembl-hive  2.7.0
ProteinFeatureAdaptor.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 
22 =head1 CONTACT
23 
24  Please email comments or questions to the public Ensembl
25  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
26 
27  Questions may also be sent to the Ensembl help desk at
28  <http://www.ensembl.org/Help/Contact>.
29 
30 =cut
31 
32 =head1 NAME
33 
35 
36 =head1 SYNOPSIS
37 
39 
41  -host => 'ensembldb.ensembl.org',
42  -user => 'anonymous'
43  );
44 
45  $pfa = Bio::EnsEMBL::Registry->get_adaptor( "human", "core",
46  "proteinfeature" );
47 
48  my @prot_feats = @{ $pfa->fetch_all_by_translation_id(1231) };
49 
50  my $prot_feat = $pfa->fetch_by_dbID(523);
51 
52 =head1 METHODS
53 
54 =cut
55 
56 package Bio::EnsEMBL::DBSQL::ProteinFeatureAdaptor;
57 
58 use strict;
59 
63 use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning);
64 
66 
67 =head2 fetch_all_by_translation_id
68 
69  Arg [1] : int $transl
70  the internal id of the translation corresponding to protein
71  whose features are desired
72  Example : @prot_feats =
73  @{ $prot_feat_adaptor->fetch_by_translation_id(1234) };
74  Description: Gets all protein features present on a peptide using the
75  translations internal identifier. This method will return
76  an unsorted list of all protein_feature types. The feature
77  types may be distinguished using the logic name attribute of
78  the attached analysis objects.
79  Returntype : listref of Bio::EnsEMBL::ProteinFeatures
80  Exceptions : none
81  Caller : ?
82  Status : Stable
83 
84 =cut
85 
86 sub fetch_all_by_translation_id {
87  my ($self, $translation_id, $logic_name) = @_;
88 
89  my $constraint = "pf.translation_id = ?";
90 
91  if(defined $logic_name){
92  my $logic_constraint = $self->_logic_name_to_constraint( '', $logic_name );
93  $constraint .= " AND ".$logic_constraint if defined $logic_constraint;
94  }
95  $self->bind_param_generic_fetch($translation_id, SQL_INTEGER);
96  my $features = $self->generic_fetch($constraint);
97 
98  return $features;
99 } ## end sub fetch_all_by_translation_id
100 
101 =head2 fetch_all_by_logic_name
102 
103  Arg [1] : string $logic_name
104  the logic name of the type of features to obtain
105  Example : $fs = $a->fetch_all_by_logic_name('foobar');
106  Description: Returns a listref of features created from the database.
107  only features with an analysis of type $logic_name will
108  be returned. If the logic name is invalid (not in the
109  analysis table), a reference to an empty list will be
110  returned.
111  Returntype : listref of Bio::EnsEMBL::ProteinFeatures
112  Exceptions : thrown if no $logic_name
113  Caller : General
114  Status : Stable
115 
116 =cut
117 
118 sub fetch_all_by_logic_name {
119  my ( $self, $logic_name ) = @_;
120 
121  if ( !defined($logic_name) ) {
122  throw("Need a logic_name");
123  }
124 
125  my $constraint = $self->_logic_name_to_constraint( '', $logic_name );
126 
127  if ( !defined($constraint) ) {
128  warning("Invalid logic name: $logic_name");
129  return [];
130  }
131 
132  return $self->generic_fetch($constraint);
133 }
134 
135 =head2 fetch_by_dbID
136 
137  Arg [1] : int $protfeat_id
138  the unique database identifier of the protein feature to obtain
139  Example : my $feature = $prot_feat_adaptor->fetch_by_dbID();
140  Description: Obtains a protein feature object via its unique id
141  Returntype : Bio::EnsEMBL::ProteinFeauture
142  Exceptions : none
143  Caller : ?
144  Status : Stable
145 
146 =cut
147 
148 sub fetch_by_dbID {
149  my ($self, $protfeat_id) = @_;
150 
151  my @select_cols = $self->_tbl_columns(1); # skip pk - protein_feature_id
152  my @select_cols_alias = map { 'pf.'.$_ } @select_cols;
153  my $select_sql = "SELECT ". (join ',', @select_cols_alias);
154 
155  $select_sql .= ", x.description, x.display_label, i.interpro_ac "
156  . "FROM protein_feature pf "
157  . "LEFT JOIN interpro AS i ON pf.hit_name = i.id "
158  . "LEFT JOIN xref AS x ON x.dbprimary_acc = i.interpro_ac "
159  . "WHERE pf.protein_feature_id = ?";
160 
161  my $sth = $self->prepare($select_sql);
162 
163  $sth->bind_param(1, $protfeat_id, SQL_INTEGER);
164  my $res = $sth->execute();
165 
166  my $pf_hash_ref = $sth->fetchrow_hashref();
167 
168  if($sth->rows == 0) {
169  $sth->finish();
170  return undef;
171  }
172 
173  $sth->finish();
174 
175  my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_dbID($pf_hash_ref->{analysis_id});
176 
177  my( $cigar_string, $align_type);
178  $cigar_string = $pf_hash_ref->{cigar_line} if exists $pf_hash_ref->{cigar_line}; # available > e92
179  $align_type = $pf_hash_ref->{align_type} if exists $pf_hash_ref->{align_type}; # available > e92
180 
181 
182  return
183  Bio::EnsEMBL::ProteinFeature->new(-ADAPTOR => $self,
184  -DBID => $protfeat_id,
185  -START => $pf_hash_ref->{seq_start},
186  -END => $pf_hash_ref->{seq_end},
187  -HSTART => $pf_hash_ref->{hit_start},
188  -HEND => $pf_hash_ref->{hit_end},
189  -HSEQNAME => $pf_hash_ref->{hit_name},
190  -HDESCRIPTION => $pf_hash_ref->{hit_description},
191  -ANALYSIS => $analysis,
192  -SCORE => $pf_hash_ref->{score},
193  -P_VALUE => $pf_hash_ref->{evalue},
194  -PERCENT_ID => $pf_hash_ref->{perc_ident},
195  -IDESC => $pf_hash_ref->{description},
196  -ILABEL => $pf_hash_ref->{display_label},
197  -INTERPRO_AC => $pf_hash_ref->{interpro_ac},
198  -TRANSLATION_ID => $pf_hash_ref->{translation_id},
199  -CIGAR_STRING => $cigar_string,
200  -ALIGN_TYPE => $align_type
201  );
202 } ## end sub fetch_by_dbID
203 
204 =head2 store
205 
206  Arg [1] : Bio::EnsEMBL::ProteinFeature $feature
207  The feature to be stored
208  Arg [2] : int $translation_id
209 
210  Example : $protein_feature_adaptor->store($protein_feature);
211  Description: Stores a protein feature in the database
212  Returntype : int - the new internal identifier of the stored protein feature
213  Exceptions : thrown if arg is not a Bio::EnsEMBL:
214  Caller : none
215  Status : Stable
216 
217 =cut
218 
219 sub store {
220  my ($self, $feature, $translation_id) = @_;
221 
222  if (!ref($feature) || !$feature->isa('Bio::EnsEMBL::ProteinFeature')) {
223  throw("ProteinFeature argument is required");
224  }
225 
226  my $db = $self->db();
227 
228  if ($feature->is_stored($db)) {
229  warning("ProteinFeature " . $feature->dbID() . " is already stored in " . "this database - not storing again");
230  }
231 
232  my $analysis = $feature->analysis();
233  if (!defined($analysis)) {
234  throw("Feature doesn't have analysis. Can't write to database");
235  }
236  if (!$analysis->is_stored($db)) {
237  $db->get_AnalysisAdaptor->store($analysis);
238  }
239 
240  my $insert_ignore = $self->insert_ignore_clause();
241  my @insert_cols = $self->_tbl_columns(1); # skip pk - protein_feature_id
242 
243  my @insert_values = map { '?' } @insert_cols;
244  my $insert_stmt = "${insert_ignore} INTO protein_feature (". (join ',', @insert_cols) . ') VALUES (' . (join ',', @insert_values) . ')';
245 
246  my $sth = $self->prepare($insert_stmt);
247 
248  my $i = 0;
249  $sth->bind_param(++$i, $translation_id, SQL_INTEGER);
250  $sth->bind_param(++$i, $feature->start, SQL_INTEGER);
251  $sth->bind_param(++$i, $feature->end, SQL_INTEGER);
252  $sth->bind_param(++$i, $feature->hstart, SQL_INTEGER);
253  $sth->bind_param(++$i, $feature->hend, SQL_INTEGER);
254  $sth->bind_param(++$i, $feature->hseqname, SQL_VARCHAR);
255  $sth->bind_param(++$i, $analysis->dbID, SQL_INTEGER);
256  $sth->bind_param(++$i, $feature->score, SQL_DOUBLE);
257  $sth->bind_param(++$i, $feature->p_value, SQL_DOUBLE);
258  $sth->bind_param(++$i, $feature->percent_id, SQL_FLOAT);
259  $sth->bind_param(++$i, $feature->external_data, SQL_VARCHAR);
260  $sth->bind_param(++$i, $feature->hdescription, SQL_LONGVARCHAR);
261 
262  if ($self->schema_version > 92) {
263  $sth->bind_param(++$i, $feature->cigar_string, SQL_VARCHAR);
264  $sth->bind_param(++$i, $feature->align_type, SQL_VARCHAR);
265  }
266 
267  $sth->execute();
268 
269  if (defined($sth->err) && $sth->err eq 0){ # is a warning if 0 and defined
270  warning('SQL warning : ' . $sth->errstr ."\n");
271  }
272 
273  my $dbID = $self->last_insert_id('protein_feature_id', undef, 'protein_feature');
274 
275  $feature->adaptor($self);
276  $feature->dbID($dbID);
277 
278  $sth->finish();
279 
280  return $dbID;
281 } ## end sub store
282 
283 sub _tables {
284  my $self = shift;
285 
286  return (['protein_feature', 'pf'], ['interpro', 'ip'], ['xref', 'x']);
287 }
288 
289 sub _left_join {
290  return (['interpro', "pf.hit_name = ip.id"], ['xref', "x.dbprimary_acc = ip.interpro_ac"]);
291 }
292 
293 # return columns from protein_feature table
294 sub _tbl_columns {
295  my ($self, $skip_pk) = @_;
296  $skip_pk = defined $skip_pk ? $skip_pk : 0;
297 
298  my @columns = qw(
299  protein_feature_id
300  translation_id
301  seq_start
302  seq_end
303  hit_start
304  hit_end
305  hit_name
306  analysis_id
307  score
308  evalue
309  perc_ident
310  external_data
311  hit_description
312  );
313 
314  $self->schema_version > 92 and push @columns, ('cigar_line', 'align_type');
315  shift @columns if $skip_pk;
316  return @columns;
317 }
318 
319 # return columns from joined tables (xref and interpro) prefixed with alias
320 sub _columns {
321  my $self = shift;
322 
323  my @columns = map{ "pf.".$_} $self->_tbl_columns();
324 
325  push @columns, qw(x.description x.display_label ip.interpro_ac);
326 
327  return @columns
328 
329 }
330 
331 
332 # Arg [1] : StatementHandle $sth
333 # Example : none
334 # Description: PROTECTED implementation of abstract superclass method.
335 # responsible for the creation of ProteinFeatures
336 # Returntype : listref of Bio::EnsEMBL::ProteinFeatures
337 # Exceptions : none
338 # Caller : internal
339 # Status : At Risk
340 
341 sub _objs_from_sth {
342  my ($self, $sth) = @_;
343 
344  my($dbID, $translation_id, $start, $end,
345  $hstart, $hend, $hid, $analysis_id,
346  $score, $evalue, $perc_id, $external_data,$hdesc,
347  $cigar_line, $align_type,
348  $desc, $ilabel, $interpro_ac);
349 
350  my $i = 0;
351  $sth->bind_col(++$i, \$dbID);
352  $sth->bind_col(++$i, \$translation_id);
353  $sth->bind_col(++$i, \$start);
354  $sth->bind_col(++$i, \$end);
355  $sth->bind_col(++$i, \$hstart);
356  $sth->bind_col(++$i, \$hend);
357  $sth->bind_col(++$i, \$hid);
358  $sth->bind_col(++$i, \$analysis_id);
359  $sth->bind_col(++$i, \$score);
360  $sth->bind_col(++$i, \$evalue);
361  $sth->bind_col(++$i, \$perc_id);
362  $sth->bind_col(++$i, \$external_data);
363  $sth->bind_col(++$i, \$hdesc);
364 
365 
366  if ($self->schema_version > 92) {
367  $sth->bind_col(++$i, \$cigar_line);
368  $sth->bind_col(++$i, \$align_type);
369  }
370 
371  $sth->bind_col(++$i, \$desc);
372  $sth->bind_col(++$i, \$ilabel);
373  $sth->bind_col(++$i, \$interpro_ac);
374 
375  my $analysis_adaptor = $self->db->get_AnalysisAdaptor();
376 
377  my @features;
378  while($sth->fetch()) {
379 
380  my $analysis = $analysis_adaptor->fetch_by_dbID($analysis_id);
381 
382  push(
383  @features,
384  my $feat = Bio::EnsEMBL::ProteinFeature->new(
385  -DBID => $dbID,
386  -ADAPTOR => $self,
387  -SEQNAME => $translation_id,
388  -START => $start,
389  -END => $end,
390  -ANALYSIS => $analysis,
391  -PERCENT_ID => $perc_id,
392  -P_VALUE => $evalue,
393  -SCORE => $score,
394  -HSTART => $hstart,
395  -HEND => $hend,
396  -HSEQNAME => $hid,
397  -HDESCRIPTION => $hdesc,
398  -IDESC => $desc,
399  -ILABEL => $ilabel,
400  -INTERPRO_AC => $interpro_ac,
401  -TRANSLATION_ID => $translation_id,
402  -CIGAR_STRING => $cigar_line,
403  -ALIGN_TYPE => $align_type,
404  ));
405 
406  }
407  return \@features;
408 }
409 
410 #wrapper method
411 =head2 fetch_all_by_uniprot_acc
412 
413  Arg [1] : string uniprot accession
414  The uniprot accession of the features to obtain
415  Arg [2] : (optional) string $logic_name
416  The analysis logic name of the type of features to
417  obtain. Default is 'gifts_import'
418  Example : @feats =
419  @{ $adaptor->fetch_all_by_uniprot_acc( 'P20366',
420  'gifts_import' ); }
421  Description: Returns a listref of features created from the
422  database which correspond to the given uniprot accession. If
423  logic name is defined, only features with an analysis
424  of type $logic_name will be returned. Defaults to 'gifts_import'
425  Returntype : listref of Bio::EnsEMBL::BaseAlignFeatures
426  Exceptions : thrown if uniprot_acc is not defined
427  Caller : general
428  Status : Stable
429 
430 =cut
431 
432 sub fetch_all_by_uniprot_acc {
433  my ( $self, $uniprot_acc, $logic_name ) = @_;
434  $logic_name = defined $logic_name ? $logic_name : "gifts_import";
435  return $self->fetch_all_by_hit_name($uniprot_acc, $logic_name);
436 }
437 
438 #inherited methods from BaseAlignFeatureAdaptor
439 sub fetch_all_by_Slice_and_hcoverage {
440  my ( $self ) = @_;
441  $self->throw( "ProteinFeatures can't be fetched by slice as".
442  " they are not on EnsEMBL coord system. Try fetch_all_by_translation_id instead" );
443 }
444 
445 sub fetch_all_by_Slice_and_external_db {
446  my ( $self ) = @_;
447  $self->throw( "ProteinFeatures can't be fetched by slice as".
448  " they are not on EnsEMBL coord system. Try fetch_all_by_translation_id instead" );
449 }
450 
451 sub fetch_all_by_Slice_and_pid {
452  my ( $self ) = @_;
453  $self->throw( "ProteinFeatures can't be fetched by slice as".
454  " they are not on EnsEMBL coord system. Try fetch_all_by_translation_id instead" );
455 }
456 
457 sub fetch_all_by_Slice {
458  my ( $self ) = @_;
459  $self->throw( "ProteinFeatures can't be fetched by slice as".
460  " they are not on EnsEMBL coord system. Try fetch_all_by_translation_id instead" );
461 }
462 
463 sub fetch_Iterator_by_Slice_method {
464  my ( $self ) = @_;
465  $self->throw( "ProteinFeatures can't be fetched by slice as".
466  " they are not on EnsEMBL coord system. Try fetch_all_by_translation_id instead" );
467 }
468 
469 sub fetch_Iterator_by_Slice {
470  my ( $self ) = @_;
471  $self->throw( "ProteinFeatures can't be fetched by slice as".
472  " they are not on EnsEMBL coord system. Try fetch_all_by_translation_id instead" );
473 }
474 
475 sub fetch_all_by_Slice_and_score {
476  my ( $self ) = @_;
477  $self->throw( "ProteinFeatures can't be fetched by slice as".
478  " they are not on EnsEMBL coord system. Try fetch_all_by_translation_id instead" );
479 }
480 
481 sub fetch_all_by_Slice_constraint {
482  my ( $self ) = @_;
483  $self->throw( "ProteinFeatures can't be fetched by slice as".
484  " they are not on EnsEMBL coord system. Try fetch_all_by_translation_id instead" );
485 }
486 
487 sub fetch_all_by_stable_id_list {
488  my ( $self, $id_list_ref, $slice ) = @_;
489  $self->throw( "ProteinFeatures can't be fetched by slice as".
490  " they are not on EnsEMBL coord system. Try fetch_all_by_translation_id instead" );
491 }
492 
493 sub count_by_Slice_constraint {
494  my ( $self ) = @_;
495  $self->throw( "ProteinFeatures cant be count by slice as".
496  " they are not on EnsEMBL coord system." );
497 }
498 
499 sub remove_by_Slice {
500  my ( $self ) = @_;
501  $self->throw( "ProteinFeatures cant be removed by slice as".
502  " they are not on EnsEMBL coord system." );
503 }
504 
505 sub get_seq_region_id_internal{
506  my ( $self ) = @_;
507  $self->throw( "No seq_region_id as ProteinFeatures are not on EnsEMBL coord system." );
508 }
509 
510 sub get_seq_region_id_external{
511  my ( $self ) = @_;
512  $self->throw( "No seq_region_id as ProteinFeatures are not on EnsEMBL coord system." );
513 }
514 
515 1;
516 
Bio::EnsEMBL::Registry::get_adaptor
public Adaptor get_adaptor()
map
public map()
Bio::EnsEMBL::DBSQL::ProteinFeatureAdaptor
Definition: ProteinFeatureAdaptor.pm:26
accession
public accession()
Bio::EnsEMBL
Definition: AltAlleleGroup.pm:5
Bio::EnsEMBL::ProteinFeature
Definition: ProteinFeature.pm:24
Bio::EnsEMBL::Registry
Definition: Registry.pm:113
Bio::EnsEMBL::ProteinFeature::new
public Bio::EnsEMBL::FeaturePair new()
Bio::EnsEMBL::DBSQL::BaseAdaptor
Definition: BaseAdaptor.pm:71
Bio::EnsEMBL::Registry::load_registry_from_db
public Int load_registry_from_db()
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68
Bio::EnsEMBL::DBSQL::BaseAlignFeatureAdaptor
Definition: BaseAlignFeatureAdaptor.pm:18