ensembl-hive  2.8.1
SeqEdit.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL:SeqEdit - A class representing a post transcriptional edit to a
34 sequence.
35 
36 =head1 SYNOPSIS
37 
40 
41  # construct a SeqEdit object using a Transcript attribute
42 
43  ($attribute) = @{ $translation->get_all_Attributes('_rna_edit') };
44 
45  $seq_edit = Bio::EnsEMBL::SeqEdit( -ATTRIB => $attribute );
46 
47  print $seq_edit->start(), "\n";
48  print $seq_edit->end(), "\n";
49  print $seq_edit->alt_seq(), "\n";
50 
51  # apply the edit to some sequence
52  $seq = $transcript->spliced_seq();
53  print "Before modifiction: $seq\n";
54 
55  $seq_edit->apply_edit( \$seq );
56  print "After modification: $seq\n";
57 
58  # construct an attribute object from a SeqEdit and add it to a
59  # translation
60 
61  $seq_edit = Bio::EnsEMBL::SeqEdit->new(
62  -CODE => '_selenocysteine',
63  -NAME => 'Selenocysteine',
64  -DESC => 'Selenocysteine',
65  -START => 10,
66  -END => 10,
67  -ALT_SEQ => 'U'
68  );
69 
70  $attribute = $seq_edit->get_Attribute();
71  $translation->add_Attributes($attribute);
72 
73 =head1 DESCRIPTION
74 
75 This is a class used to represent post transcriptional
76 modifications to sequences. SeqEdit objects are stored as ordinary
77 Bio::EnsEMBL::Attributes with a parseable value and can be used to
78 represent RNA editing, selenocysteines etc.
79 
80 Also see B<Bio::EnsEMBL::Attribute>
81 
82 =head1 METHODS
83 
84 =cut
85 
86 package Bio::EnsEMBL::SeqEdit;
87 
88 use strict;
89 use warnings;
90 
92 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
93 use Bio::EnsEMBL::Utils::Exception qw(throw);
94 
95 
96 =head2 new
97 
98  Arg [-ATTRIB] : Bio::EnsEMBL::Attribute
99  Constructs a new SeqEdit from an Attribute.
100  Can only be provided if no other constructor arguments
101  are provided.
102  Arg [-START] : The start position of the edit.
103  Arg [-END] : The end position of the edit.
104  Arg [-ALT_SEQ] : The alternate sequence
105  Arg [-CODE] : A code for this SeqEdit
106  Arg [-NAME] : A name for this SeqEdit
107  Arg [-DESCRIPTION] : Arg passed to superclass constructor
108  Example : my $sea = Bio::EnsEMBL::SeqEdit->new(-ATTRIB => $attrib);
109  my $sea = Bio::EnsEMBL::SeqEdit->new
110  (-START => 10,
111  -END => 12,
112  -ALT_SEQ => 'ACG',
113  -CODE => '_rna_edit',
114  -NAME => 'RNA Edit',
115  -DESCRIPTION => 'RNA edit');
116  Description: Constructs a SeqEdit representing a single edit to a
117  sequence, such as an rna modification or a selenocysteine.
118  Returntype : Bio::EnsEMBL::SeqEdit
119  Exceptions : throws if attribute set and other args aswell
120  throws if start and end not set correctly of attribure not set
121  Caller : general
122  Status : Stable
123 
124 =cut
125 
126 sub new {
127  my $class = shift;
128 
129  my ($attrib, $start, $end, $alt_seq, $name, $desc, $code) =
130  rearrange([qw(ATTRIB START END ALT_SEQ NAME DESCRIPTION CODE)], @_);
131 
132  my $self;
133 
134  if($attrib) {
135  if(defined($start) || defined($end) || defined($alt_seq) ||
136  defined($name) || defined($desc) || defined($code)) {
137  throw("Cannot specify -ATTRIB argument with additional arguments.");
138  }
139 
140  if(!ref($attrib) || !$attrib->isa('Bio::EnsEMBL::Attribute')) {
141  throw('Bio::EnsEMBL::Attribute argument expected.');
142  }
143 
144  ($start, $end, $alt_seq) = split(/\s+/, $attrib->value());
145 
146  if($start !~ /\d+/ || $end !~ /\d+/) {
147  throw('Could not parse value of attribute: '.$attrib->value());
148  }
149 
150  $name = $attrib->name();
151  $code = $attrib->code();
152  $desc = $attrib->description();
153 
154 
155  }
156 
157  if(defined($end) && defined($start) && $start > $end+1) {
158  throw("start must be less than or equal to end + 1");
159  }
160 
161  if(defined($start) && $start < 1) {
162  throw("start must be greater than or equal to 1");
163  }
164 
165  if(defined($end) && $end < 0) {
166  throw("end must be greater than or equal to 0");
167  }
168 
169  $alt_seq ||= '';
170 
171  return bless {'start' => $start,
172  'end' => $end,
173  'alt_seq' => $alt_seq,
174  'description' => $desc,
175  'name' => $name,
176  'code' => $code}, $class;
177 }
178 
179 
180 
181 =head2 start
182 
183  Arg [1] : (optional) int $start - the new start position
184  Example : $start = $se_attrib->start();
185  Description: Getter/Setter for the start position of the region replaced
186  by the alt_seq.
187 
188  Coordinates are inclusive and one-based, which means that
189  inserts are unusually represented by a start 1bp higher than
190  the end.
191 
192  E.g. start = 1, end = 1 is a replacement of the first base but
193  start = 1, end = 0 is an insert BEFORE the first base.
194  Returntype : int
195  Exceptions : none
196  Caller : Transcript, Translation
197  Status : Stable
198 
199 =cut
200 
201 sub start {
202  my $self = shift;
203 
204  if(@_) {
205  my $start = shift;
206  if(defined($start) && $start < 1) {
207  throw("start must be greater than or equal to 1");
208  }
209  $self->{'start'} = $start;
210  }
211 
212  return $self->{'start'};
213 }
214 
215 
216 =head2 end
217 
218  Arg [1] : (optional) int $end - the new end position
219  Example : $end = $se_attrib->end();
220  Description: Getter/Setter for the end position of the region replaced
221  by the alt_seq.
222 
223  Coordinates are inclusive and one-based, which means that
224  inserts are unusually represented by a start 1bp higher than
225  the end.
226 
227  E.g. start = 1, end = 1 is a replacement of the first base but
228  start = 1, end = 0 is an insert BEFORE the first base.
229  Returntype : int
230  Exceptions : throws if end <= 0
231  Caller : Transcript, Translation
232  Status : Stable
233 
234 =cut
235 
236 sub end {
237  my $self = shift;
238 
239  if(@_) {
240  my $end = shift;
241  if(defined($end) && $end < 0) {
242  throw("end must be greater than or equal to 0");
243  }
244  $self->{'end'} = $end;
245  }
246 
247  return $self->{'end'};
248 }
249 
250 
251 =head2 alt_seq
252 
253  Arg [1] : (optional) string $alt_seq
254  Example : my $alt_seq = $se_attrib->alt_seq();
255  Description: Getter/Setter for the replacement sequence used by this edit.
256  The sequence may either be a string of amino acids or
257  nucleotides depending on the context in which this edit is
258  used.
259 
260  In the case of a deletion the replacement sequence is an empty
261  string.
262  Returntype : string
263  Exceptions : none
264  Caller : Transcript, Translation
265  Status : Stable
266 
267 =cut
268 
269 sub alt_seq {
270  my $self = shift;
271  $self->{'alt_seq'} = shift || '' if(@_);
272  return $self->{'alt_seq'};
273 }
274 
275 
276 =head2 length_diff
277 
278  Arg [1] : none
279  Example : my $diff = $sea->length_diff();
280  Description: Returns the difference in length caused by applying this
281  edit to a sequence. This may be be negative (deletion),
282  positive (insertion) or 0 (replacement).
283 
284  If either start or end are not defined 0 is returned.
285  Returntype : int
286  Exceptions : none
287  Caller : general
288  Status : Stable
289 
290 =cut
291 
292 sub length_diff {
293  my $self = shift;
294 
295  return 0 if(!defined($self->{'end'}) || !defined($self->{'start'}));
296 
297  return length($self->{'alt_seq'}) - ($self->{'end'} - $self->{'start'} + 1);
298 }
299 
300 
301 
302 =head2 name
303 
304  Arg [1] : (optional) string $name
305  Example : my $name = $seqedit->name();
306  Description: Getter/Setter for the name of this SeqEdit
307  Returntype : string
308  Exceptions : none
309  Caller : general
310  Status : Stable
311 
312 =cut
313 
314 sub name {
315  my $self = shift;
316  $self->{'name'} = shift if(@_);
317  return $self->{'name'};
318 }
319 
320 
321 
322 
323 =head2 code
324 
325  Arg [1] : (optional) string $code
326  Example : my $code = $seqedit->code();
327  Description: Getter/Setter for the code of this SeqEdit
328  Returntype : string
329  Exceptions : none
330  Caller : general
331  Status : Stable
332 
333 =cut
334 
335 sub code {
336  my $self = shift;
337  $self->{'code'} = shift if(@_);
338  return $self->{'code'};
339 }
340 
341 
342 
343 =head2 description
344 
345  Arg [1] : (optional) string $desc
346  Example : my $desc = $seqedit->description();
347  Description: Getter/Setter for the description of this SeqEdit
348  Returntype : string
349  Exceptions : none
350  Caller : general
351  Status : Stable
352 
353 =cut
354 
355 sub description {
356  my $self = shift;
357  $self->{'description'} = shift if(@_);
358  return $self->{'description'};
359 }
360 
361 
362 
363 =head2 get_Attribute
364 
365  Arg [1] : none
366  Example : my $attrib = $seqedit->get_Attribute();
367  $transcript->add_Attributes($attrib);
368  Description: Converts a SeqEdit object into an Attribute object. This
369  allows the SeqEdit to be stored as any other attribute in the
370  ensembl database. The start/end and alt_seq properties
371  should be set before calling this method.
372  Returntype : Bio::EnsEMBL::Attribute
373  Exceptions : warning if start/end or alt_seq properties are not defined
374  Caller : general
375  Status : Stable
376 
377 =cut
378 
379 sub get_Attribute {
380  my $self = shift;
381 
382  my $start = $self->start();
383  my $end = $self->end();
384  my $alt_seq = $self->alt_seq();
385 
386  my $value;
387 
388  if(defined($start) && defined($end) && defined($alt_seq)) {
389  $value = join(' ', $start, $end, $alt_seq);
390  } else {
391  warning('Attribute value cannot be created unless start, end and alt_seq' .
392  'properties are defined');
393  $value = '';
394  }
395 
396  return Bio::EnsEMBL::Attribute->new(-CODE => $self->code(),
397  -VALUE => $value,
398  -NAME => $self->name(),
399  -DESCRIPTION => $self->description());
400 }
401 
402 
403 =head2 apply_edit
404 
405  Arg [1] : reference to string $seqref
406  Example : $sequence = 'ACTGAATATTTAAGGCA';
407  $seqedit->apply_edit(\$sequence);
408  print $sequence, "\n";
409  Description: Applies this edit directly to a sequence which is
410  passed by reference. The coordinates of this SeqEdit
411  are assumed to be relative to the start of the sequence
412  argument.
413  If either the start or end of this SeqEdit are not defined
414  this function will not do anything to the passed sequence.
415  Returntype : reference to the same sequence that was passed in
416  Exceptions : none
417  Caller : Transcript, Translation
418  Status : Stable
419 
420 =cut
421 
422 sub apply_edit {
423  my $self = shift;
424  my $seqref = shift;
425 
426  if(ref($seqref) ne 'SCALAR') {
427  throw("Reference to scalar argument expected");
428  }
429 
430  if(!defined($self->{'start'}) || !defined($self->{'end'})) {
431  return $seqref;
432  }
433 
434  my $len = $self->{'end'} - $self->{'start'} + 1;
435  substr($$seqref, $self->{'start'} - 1, $len) = $self->{'alt_seq'};
436 
437  return $seqref;
438 }
439 
440 
441 1;
Bio::EnsEMBL::Translation
Definition: Translation.pm:32
Bio::EnsEMBL::SeqEdit::get_Attribute
public Bio::EnsEMBL::Attribute get_Attribute()
Bio::EnsEMBL::SeqEdit::new
public Bio::EnsEMBL::SeqEdit new()
Bio::EnsEMBL
Definition: AltAlleleGroup.pm:5
Bio::EnsEMBL::Attribute::new
public Bio::EnsEMBL::Attribute new()
Bio::EnsEMBL::SeqEdit
Definition: SeqEdit.pm:55
Bio::EnsEMBL::Transcript
Definition: Transcript.pm:44
Bio::EnsEMBL::Attribute
Definition: Attribute.pm:34
Bio::EnsEMBL::Translation::end
public Int end()
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34
Bio::EnsEMBL::Utils::Exception
Definition: Exception.pm:68