ensembl-hive  2.7.0
ProteinFeature.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 
22 =head1 CONTACT
23 
24  Please email comments or questions to the public Ensembl
25  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
26 
27  Questions may also be sent to the Ensembl help desk at
28  <http://www.ensembl.org/Help/Contact>.
29 =cut
30 
31 =head1 NAME
32 
34 
35 =head1 SYNOPSIS
36 
37  my $feature = Bio::EnsEMBL::ProteinFeature->new(
38  -start => $start,
39  -end => $end,
40  -hstart => $hit_start,
41  -hend => $hit_end,
42  -hseqname => $hit_name
43  );
44 
45 =head1 DESCRIPTION
46 
47 ProteinFeature objects represent domains or other features of interest
48 on a peptide sequence.
49 
50 =head1 METHODS
51 
52 =cut
53 
54 package Bio::EnsEMBL::ProteinFeature;
55 
56 use strict;
57 
60 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
61 
62 use parent qw(Bio::EnsEMBL::BaseAlignFeature);
63 
64 =head2 new
65 
66  Arg [IDESC] : (optional) string An interpro description
67  Arg [INTERPRO_AC] : (optional) string An interpro accession
68  Arg [TRANSLATION_ID] : (optional) integer A translation dbID
69  Arg [...] : named arguments to FeaturePair superclass
70  Example :
71 
72  $pf =
73  Bio::EnsEMBL::ProteinFeature->new( -IDESC => $idesc,
74  -INTERPRO_AC => $iac,
75  @fp_args );
76 
77  Description: Instantiates a Bio::EnsEMBL::ProteinFeature
78  Returntype : Bio::EnsEMBL::FeaturePair
79  Exceptions : none
80  Caller : general
81  Status : Stable
82 
83 =cut
84 
85 
86 sub new {
87  my ($proto, @args) = @_;
88 
89  my $class = ref($proto) || $proto;
90 
91  my $self;
92  my ($idesc, $ilabel, $interpro_ac, $translation_id, $external_data, $hit_description, $cigar_string, $align_type, $slice) = rearrange(['IDESC', 'ILABEL', 'INTERPRO_AC', 'TRANSLATION_ID', 'EXTERNAL_DATA', 'HDESCRIPTION', 'CIGAR_STRING', 'ALIGN_TYPE', 'SLICE'], @args);
93 
94 # BaseAlignFeature expects cigar_line or features
95  if($cigar_string && $align_type){
96  $self = $class->SUPER::new(@args);
97  }else{
98  #call the grand parent directly
99  $self = $class->Bio::EnsEMBL::FeaturePair::new(@args);
100  }
101 
102  # the strand of protein features is always 0
103  $self->{'strand'} = 0;
104  $self->{'idesc'} = $idesc || '';
105  $self->{'ilabel'} = $ilabel || '';
106  $self->{'interpro_ac'} = $interpro_ac || '';
107  $self->{'translation_id'} = $translation_id || '';
108  $self->{'external_data'} = $external_data || '';
109  $self->{'hit_description'} = $hit_description || '';
110  $self->{'cigar_string'} = $cigar_string || '';
111  $self->{'align_type'} = $align_type;
112 
113  return $self;
114 }
115 
116 =head2 strand
117 
118  Arg [1] : Ignored
119  Description: Overwrites Bio::EnsEMBL::Feature->strand to not allow
120  : the strand to be set.
121  Returntype : int
122  Status : Stable
123 
124 =cut
125 
126 #do not allow the strand to be set
127 sub strand {
128  my $self = shift;
129  return $self->{'strand'};
130 }
131 
132 =head2 idesc
133 
134  Arg [1] : (optional) string The interpro description
135  Example : print $protein_feature->idesc();
136  Description: Getter/Setter for the interpro description of this protein
137  feature.
138  Returntype : string
139  Exceptions : none
140  Caller : general
141  Status : Stable
142 
143 =cut
144 
145 sub idesc {
146  my $self = shift;
147  $self->{'idesc'} = shift if (@_);
148  return $self->{'idesc'};
149 }
150 
151 =head2 ilabel
152 
153  Arg [1] : (optional) string The interpro label
154  Example : print $protein_feature->ilabel();
155  Description: Getter/Setter for the interpro label of this protein
156  feature.
157  Returntype : string
158  Exceptions : none
159  Caller : general
160  Status : Stable
161 
162 =cut
163 
164 sub ilabel {
165  my $self = shift;
166  $self->{'ilabel'} = shift if (@_);
167  return $self->{'ilabel'};
168 }
169 
170 =head2 interpro_ac
171 
172  Arg [1] : (optional) string The interpro accession
173  Example : print $protein_feature->interpro_ac();
174  Description: Getter/Setter for the interpro accession of this protein
175  feature.
176  Returntype : string
177  Exceptions : none
178  Caller : general
179  Status : Stable
180 
181 =cut
182 
183 sub interpro_ac {
184  my $self = shift;
185  $self->{'interpro_ac'} = shift if (@_);
186  return $self->{'interpro_ac'};
187 }
188 
189 =head2 translation_id
190 
191  Arg [1] : (optional) integer The dbID of the translation
192  Example : print $protein_feature->translation_id();
193  Description: Getter/Setter for the translation dbID of this protein
194  feature.
195  Returntype : string
196  Exceptions : none
197  Caller : general
198  Status : Stable
199 
200 =cut
201 
202 sub translation_id {
203  my $self = shift;
204  $self->{'translation_id'} = shift if (@_);
205  return $self->{'translation_id'};
206 }
207 
208 sub external_data {
209  my $self = shift;
210  $self->{'external_data'} = shift if (@_);
211  return $self->{'external_data'};
212 }
213 
214 
215 =head2 summary_as_hash
216 
217  Example : $protein_feature_summary = $protein_feature->summary_as_hash();
218  Description : Retrieves a textual summary of this Protein feature.
219  Not inherited from Feature.
220  Returns : hashref of arrays of descriptive strings
221  Status : Intended for internal use
222 =cut
223 
224 sub summary_as_hash {
225  my $self = shift;
226  my %summary;
227  $summary{'type'} = $self->analysis->db;
228  $summary{'id'} = $self->display_id;
229  $summary{'start'} = $self->start;
230  $summary{'end'} = $self->end;
231  $summary{'interpro'} = $self->interpro_ac;
232  $summary{'description'} = $self->idesc;
233  $summary{'hit_start'} = $self->hstart;
234  $summary{'hit_end'} = $self->hend;
235  $summary{'cigar_string'} = $self->cigar_string;
236  $summary{'align_type'} = $self->align_type;
237  $summary{'hseqname'} = $self->hseqname;
238  $summary{'translation_id'} = $self->translation_id;
239 
240  return \%summary;
241 }
242 
243 
244 =head2 alignment_strings
245 
246  Arg [1] : list of string $flags
247  Example : $pf->alignment_strings
248  Description: Allows to rebuild the alignment string of both the query and target sequence
249  using the sequence from translation object and
250  MD Z String for mismatching positions. Regex : [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)* (Refer: SAM/BAM specification)
251  eg: MD:Z:96^RHKTDSFVGLMGKRALNS0V14
252  Returntype : array reference containing 2 strings
253  the first corresponds to seq
254  the second corresponds to hseq
255  Exceptions : none
256  Caller : general
257  Status : Stable
258 
259 =cut
260 
261 
262 sub alignment_strings {
263  my $self = shift;
264 
265  #Translations
266  my $transl_adaptor = $self->adaptor->db->get_TranslationAdaptor();
267  my $transl_object = $transl_adaptor->fetch_by_dbID($self->translation_id);
268  my $seq;
269  if(defined $transl_object && $transl_object->isa('Bio::EnsEMBL::Translation')) {
270  $seq = $transl_object->transcript()->translate()->seq();
271  }
272 
273  if ($self->align_type eq 'mdtag') {
274  if(defined $seq && defined $self->cigar_string){
275  return $self->_mdz_alignment_string($seq,$self->cigar_string);
276  }else{
277  warn "sequence or cigar_line not found for " . $self->translation_id;
278  }
279  } else {
280  throw("alignment_strings method not implemented for " . $self->align_type);
281  }
282  return;
283 }
284 
285 
286 sub transform {
287  my $self = shift;
288 
289  $self->throw( "ProteinFeature cant be transformed directly as".
290  " they are not on EnsEMBL coord system" );
291  return;
292 }
293 
294 
295 =head2 _hit_unit
296 
297  Arg [1] : none
298  Description: PRIVATE implementation of abstract superclass method. Returns
299  1 as the 'unit' used for the hit sequence.
300  Returntype : int
301  Exceptions : none
303  Status : Stable
304 
305 
306 =cut
307 
308 sub _hit_unit {
309  return 3;
310 }
311 
312 
313 =head2 _query_unit
314 
315  Arg [1] : none
316  Description: PRIVATE implementation of abstract superclass method. Returns
317  3 as the 'unit' used for the query sequence.
318  Returntype : int
319  Exceptions : none
321  Status : Stable
322 
323 
324 =cut
325 
326 sub _query_unit {
327  return 3;
328 }
329 
330 
331 
332 
333 
334 1;
Bio::EnsEMBL::BaseAlignFeature
Definition: BaseAlignFeature.pm:90
Bio::EnsEMBL::Feature
Definition: Feature.pm:47
accession
public accession()
Bio::EnsEMBL::Feature::strand
public Int strand()
Bio::EnsEMBL::ProteinFeature
Definition: ProteinFeature.pm:24
Bio::EnsEMBL::ProteinFeature::new
public Bio::EnsEMBL::FeaturePair new()
Bio::EnsEMBL::FeaturePair
Definition: FeaturePair.pm:56
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34