ensembl-hive  2.8.1
SeqFeature.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::SeqFeature - Ensembl specific sequence feature.
34 
35 =head1 DESCRIPTION
36 
37 Do not use this module if you can avoid it. It has been replaced by
38 Bio::EnsEMBL::Feature. This module has a long history of usage but has
39 become very bloated, and quite unweildy. It was decided to replace
40 it completely with a smaller, light-weight feature class rather than
41 attempting to refactor this class, and maintain strict backwards
42 compatibility.
43 
44 Part of the complexity of this class was in its extensive
45 inheritance. As an example the following is a simplified inheritance
46 heirarchy that was present for Bio::EnsEMBL::DnaAlignFeature:
47 
48  Bio::EnsEMBL::DnaAlignFeature
52  Bio::SeqFeatureI
53  Bio::RangeI
54  Bio::Root::RootI
55 
56 The new Bio::EnsEMBL::Feature class is much shorter, and hopefully much
57 easier to understand and maintain.
58 
59 =head1 METHODS
60 
61 =cut
62 
63 
64 # Let the code begin...
65 
66 
68 
69 use vars qw(@ISA);
70 use strict;
71 
72 
74 
75 use Bio::EnsEMBL::Utils::Argument qw(rearrange);
76 
77 sub new {
78  my($caller,@args) = @_;
79 
80  my $self = {};
81 
82  if(ref $caller) {
83  bless $self, ref $caller;
84  } else {
85  bless $self, $caller;
86  }
87 
88  $self->{'_gsf_tag_hash'} = {};
89  $self->{'_gsf_sub_array'} = [];
90  $self->{'_parse_h'} = {};
91  $self->{'_is_splittable'} = 0;
92 
93  my ($start,$end,$strand,$frame,$score,$analysis,$seqname, $source_tag,
94  $primary_tag, $percent_id, $p_value, $phase, $end_phase) =
95 
96  &rearrange([qw(START
97  END
98  STRAND
99  FRAME
100  SCORE
101  ANALYSIS
102  SEQNAME
103  SOURCE_TAG
104  PRIMARY_TAG
105  PERCENT_ID
106  P_VALUE
107  PHASE
108  END_PHASE
109  )],@args);
110 
111  # $gff_string && $self->_from_gff_string($gff_string);
112 
113  if ( defined $analysis && $analysis ne "") { $self->analysis($analysis)};
114  if ( defined ($start) && $start ne "" ) { $self->start($start)};
115  if ( defined ($end ) && $end ne "" ) { $self->end($end)}
116  if ( defined $strand && $strand ne "") { $self->strand($strand)}
117  if ( defined $frame && $frame ne "") { $self->frame($frame)}
118  if ( defined $score && $score ne "") { $self->score($score)}
119  if ( defined $seqname && $seqname ne "") { $self->seqname($seqname)};
120  if ( defined $percent_id && $percent_id ne ""){ $self->percent_id($percent_id)};
121  if ( defined $p_value && $p_value ne "") { $self->p_value($p_value)};
122  if ( defined $phase && $phase ne "") { $self->phase($phase)};
123  if ( defined $end_phase && $end_phase ne "") { $self->end_phase($end_phase)};
124 
125  return $self; # success - we hope!
126 
127 }
128 
129 
130 
131 
132 
133 
134 =head2 start
135 
136  Title : start
137  Usage : $start = $feat->start
138  $feat->start(20)
139  Function: Get/set on the start coordinate of the feature
140  Returns : integer
141  Args : none
142 
143 
144 =cut
145 
146 sub start{
147  my ($self,$value) = @_;
148 
149  if (defined($value)) {
150  if ($value !~ /^\-?\d+/ ) {
151  $self->throw("$value is not a valid start");
152  }
153 
154  $self->{'_gsf_start'} = $value
155  }
156 
157  return $self->{'_gsf_start'};
158 
159 }
160 
161 =head2 end
162 
163  Title : end
164  Usage : $end = $feat->end
165  $feat->end($end)
166  Function: get/set on the end coordinate of the feature
167  Returns : integer
168  Args : none
169 
170 
171 =cut
172 
173 sub end{
174  my ($self,$value) = @_;
175 
176  if (defined($value)) {
177  if( $value !~ /^\-?\d+/ ) {
178  $self->throw("[$value] is not a valid end");
179  }
180 
181  $self->{'_gsf_end'} = $value;
182  }
183 
184  return $self->{'_gsf_end'};
185 }
186 
187 =head2 length
188 
189  Title : length
190  Usage :
191  Function:
192  Example :
193  Returns :
194  Args :
195 
196 
197 =cut
198 
199 sub length{
200  my ($self,@args) = @_;
201 
202  return $self->end - $self->start +1;
203 }
204 
205 
206 =head2 strand
207 
208  Title : strand
209  Usage : $strand = $feat->strand()
210  $feat->strand($strand)
211  Function: get/set on strand information, being 1,-1 or 0
212  Returns : -1,1 or 0
213  Args : none
214 
215 
216 =cut
217 
218 sub strand {
219  my ($self,$value) = @_;
220 
221  if (defined($value)) {
222  if( $value eq '+' ) { $value = 1; }
223  if( $value eq '-' ) { $value = -1; }
224  if( $value eq '.' ) { $value = 0; }
225 
226  if( $value != -1 && $value != 1 && $value != 0 ) {
227  $self->throw("$value is not a valid strand info");
228  }
229  $self->{'_gsf_strand'} = $value;
230  }
231 
232  return $self->{'_gsf_strand'};
233 }
234 
235 
236 =head2 move
237 
238  Arg [1] : int $start
239  Arg [2] : int $end
240  Arg [3] : (optional) int $strand
241  Example : $feature->move(100, 200, -1);
242  Description: Moves a feature to a different location. This is faster
243  then calling 3 seperate accesors in a large loop.
244  Returntype : none
245  Exceptions : none
246  Caller : BaseFeatureAdaptor
247 
248 =cut
249 
250 sub move {
251  my ($self, $start, $end, $strand) = @_;
252 
253  $self->{'_gsf_start'} = $start;
254  $self->{'_gsf_end'} = $end;
255  if(defined $strand) {
256  $self->{'_gsf_strand'} = $strand;
257  }
258 }
259 
260 
261 =head2 score
262 
263  Title : score
264  Usage : $score = $feat->score()
265  $feat->score($score)
266  Function: get/set on score information
267  Returns : float
268  Args : none if get, the new value if set
269 
270 
271 =cut
272 
273 sub score {
274  my ($self,$value) = @_;
275 
276  if(defined ($value) ) {
277  if( $value !~ /^[+-]?\d+\.?\d*(e-\d+)?/ ) {
278  $self->throw("'$value' is not a valid score");
279  }
280  $self->{'_gsf_score'} = $value;
281  }
282 
283  return $self->{'_gsf_score'};
284 }
285 
286 =head2 frame
287 
288  Title : frame
289  Usage : $frame = $feat->frame()
290  $feat->frame($frame)
291  Function: get/set on frame information
292  Returns : 0,1,2
293  Args : none if get, the new value if set
294 
295 
296 =cut
297 
298 sub frame {
299  my ($self,$value) = @_;
300 
301  if (defined($value)) {
302  if( $value != 1 && $value != 2 && $value != 3 ) {
303  $self->throw("'$value' is not a valid frame");
304  }
305  $self->{'_gsf_frame'} = $value;
306  }
307 
308  return $self->{'_gsf_frame'};
309 }
310 
311 =head2 primary_tag
312 
313  Title : primary_tag
314  Usage : $tag = $feat->primary_tag()
315  $feat->primary_tag('exon')
316  Function: get/set on the primary tag for a feature,
317  eg 'exon'
318  Returns : a string
319  Args : none
320 
321 
322 =cut
323 
324 sub primary_tag{
325  my ($self,$arg) = @_;
326 
327  unless($self->analysis) {
328  return '';
329  }
330 
331  return $self->analysis->gff_feature();
332 }
333 
334 =head2 source_tag
335 
336  Title : source_tag
337  Usage : $tag = $feat->source_tag()
338  $feat->source_tag('genscan');
339  Function: Returns the source tag for a feature,
340  eg, 'genscan'
341  Returns : a string
342  Args : none
343 
344 
345 =cut
346 
347 sub source_tag{
348  my ($self,$arg) = @_;
349 
350  unless($self->analysis) {
351  return "";
352  }
353 
354  return $self->analysis->gff_source();
355 }
356 
357 
358 =head2 analysis
359 
360  Title : analysis
361  Usage : $sf->analysis();
362  Function: Store details of the program/database
363  and versions used to create this feature.
364 
365  Example :
366  Returns :
367  Args :
368 
369 
370 =cut
371 
372 sub analysis {
373  my ($self,$value) = @_;
374 
375  if (defined($value)) {
376  unless(ref($value) && $value->isa('Bio::EnsEMBL::Analysis')) {
377  $self->throw("Analysis is not a Bio::EnsEMBL::Analysis object "
378  . "but a $value object");
379  }
380 
381  $self->{_analysis} = $value;
382  } else {
383  #if _analysis is not defined, create a new analysis object
384  unless(defined $self->{_analysis}) {
385  $self->{_analysis} = new Bio::EnsEMBL::Analysis();
386  }
387  }
388 
389  return $self->{_analysis};
390 }
391 
392 =head2 validate
393 
394  Title : validate
395  Usage : $sf->validate;
396  Function: Checks whether all the data is present in the
397  object.
398  Example :
399  Returns :
400  Args :
401 
402 
403 =cut
404 
405 sub validate {
406  my ($self) = @_;
407 
408  $self->vthrow("Seqname not defined in feature") unless defined($self->seqname);
409  $self->vthrow("start not defined in feature") unless defined($self->start);
410  $self->vthrow("end not defined in feature") unless defined($self->end);
411  $self->vthrow("strand not defined in feature") unless defined($self->strand);
412  $self->vthrow("score not defined in feature") unless defined($self->score);
413  $self->vthrow("analysis not defined in feature") unless defined($self->analysis);
414 
415  if ($self->end < $self->start) {
416  $self->vthrow("End coordinate < start coordinate");
417  }
418 
419 }
420 
421 
422 
423 sub vthrow {
424  my ($self,$message) = @_;
425 
426  print(STDERR "Error validating feature [$message]\n");
427  print(STDERR " Seqname : [" . $self->{_seqname} . "]\n");
428  print(STDERR " Start : [" . $self->{_gsf_start} . "]\n");
429  print(STDERR " End : [" . $self->{_gsf_end} . "]\n");
430  print(STDERR " Strand : [" .
431  ((defined ($self->{_gsf_strand})) ? $self->{_gsf_strand} : "undefined") . "]\n");
432 
433  print(STDERR " Score : [" . $self->{_gsf_score} . "]\n");
434 
435  print(STDERR " Analysis : [" . $self->{_analysis}->dbID . "]\n");
436 
437  $self->throw("Invalid feature - see dump on STDERR");
438 }
439 
440 
441 =head2 validate_prot_feature
442 
443  Title : validate_prot_feature
444  Usage :
445  Function:
446  Example :
447  Returns :
448  Args :
449 
450 
451 =cut
452 
453 # Shouldn't this go as "validate" into Pro_SeqFeature?
454 sub validate_prot_feature{
455  my ($self,$num) = @_;
456  $self->throw("Seqname not defined in feature") unless defined($self->seqname);
457  $self->throw("start not defined in feature") unless defined($self->start);
458  $self->throw("end not defined in feature") unless defined($self->end);
459  if ($num == 1) {
460  $self->throw("score not defined in feature") unless defined($self->score);
461  $self->throw("percent_id not defined in feature") unless defined($self->percent_id);
462  $self->throw("evalue not defined in feature") unless defined($self->p_value);
463  }
464  $self->throw("analysis not defined in feature") unless defined($self->analysis);
465 }
466 
467 
468 
469 # These methods are specified in the SeqFeatureI interface but we don't want
470 # people to store data in them. These are just here in order to keep
471 # existing code working
472 
473 
474 =head2 has_tag
475 
476  Title : has_tag
477  Usage : $value = $self->has_tag('some_tag')
478  Function: Returns the value of the tag (undef if
479  none)
480  Returns :
481  Args :
482 
483 
484 =cut
485 
486 sub has_tag{
487  my ($self,$tag) = (shift, shift);
488 
489  return exists $self->{'_gsf_tag_hash'}->{$tag};
490 }
491 
492 =head2 add_tag_value
493 
494  Title : add_tag_value
495  Usage : $self->add_tag_value('note',"this is a note");
496  Returns : nothing
497  Args : tag (string) and value (any scalar)
498 
499 
500 =cut
501 
502 sub add_tag_value{
503  my ($self,$tag,$value) = @_;
504 
505  if( !defined $self->{'_gsf_tag_hash'}->{$tag} ) {
506  $self->{'_gsf_tag_hash'}->{$tag} = [];
507  }
508 
509  push(@{$self->{'_gsf_tag_hash'}->{$tag}},$value);
510 }
511 
512 =head2 each_tag_value
513 
514  Title : each_tag_value
515  Usage :
516  Function:
517  Example :
518  Returns :
519  Args :
520 
521 
522 =cut
523 
524 sub each_tag_value {
525  my ($self,$tag) = @_;
526  if( ! exists $self->{'_gsf_tag_hash'}->{$tag} ) {
527  $self->throw("asking for tag value that does not exist $tag");
528  }
529 
530  return @{$self->{'_gsf_tag_hash'}->{$tag}};
531 }
532 
533 
534 =head2 all_tags
535 
536  Title : all_tags
537  Usage : @tags = $feat->all_tags()
538  Function: gives all tags for this feature
539  Returns : an array of strings
540  Args : none
541 
542 
543 =cut
544 
545 sub all_tags{
546  my ($self,@args) = @_;
547 
548  return keys %{$self->{'_gsf_tag_hash'}};
549 }
550 
551 
552 
553 =head2 seqname
554 
555  Arg [1] : string $seqname
556  Example : $seqname = $self->seqname();
557  Description: Obtains the seqname of this features sequence. This is set
558  automatically when a sequence with a name is attached, or may
559  be set manually.
560  Returntype : string
561  Exceptions : none
562  Caller : general, attach_seq
563 
564 =cut
565 
566 sub seqname{
567  my ($self,$seqname) = @_;
568 
569  my $seq = $self->contig();
570 
571  if(defined $seqname) {
572  $self->{_seqname} = $seqname;
573  } else {
574  if($seq && ref $seq && $seq->can('name')) {
575  $self->{_seqname} = $seq->name();
576  }
577  }
578 
579  return $self->{_seqname};
580 }
581 
582 
583 =head2 attach_seq
584 
585  Title : attach_seq
586  Usage : $sf->attach_seq($seq)
587  Function: Attaches a Bio::PrimarySeqI object to this feature. This
588  Bio::PrimarySeqI object is for the *entire* sequence: ie
589  from 1 to 10000
590  Example :
591  Returns :
592  Args :
593 
594 
595 =cut
596 
597 sub attach_seq{
598  my ($self, $seq) = @_;
599 
600  $self->contig($seq);
601 }
602 
603 =head2 seq
604 
605  Example : $tseq = $sf->seq()
606  Function: returns the sequence (if any ) for this feature truncated to the range spanning the feature
607  Returns : a Bio::PrimarySeq object (I reckon)
608 
609 =cut
610 
611 sub seq{
612  my ($self,$arg) = @_;
613 
614  if( defined $arg ) {
615  $self->throw("Calling SeqFeature::Generic->seq with an argument. " .
616  "You probably want attach_seq");
617  }
618 
619  if( ! exists $self->{'_gsf_seq'} ) {
620  return undef;
621  }
622 
623  # assumming our seq object is sensible, it should not have to yank
624  # the entire sequence out here.
625 
626  my $seq = $self->{'_gsf_seq'}->trunc($self->start(),$self->end());
627 
628 
629  if( $self->strand == -1 ) {
630 
631  # ok. this does not work well (?)
632  #print STDERR "Before revcom", $seq->str, "\n";
633  $seq = $seq->revcom;
634  #print STDERR "After revcom", $seq->str, "\n";
635  }
636 
637  return $seq;
638 }
639 
640 =head2 entire_seq
641 
642  Title : entire_seq
643  Usage : $whole_seq = $sf->entire_seq()
644  Function: gives the entire sequence that this seqfeature is attached to
645  Example :
646  Returns :
647  Args :
648 
649 
650 =cut
651 
652 sub entire_seq{
653  my ($self) = @_;
654 
655  return $self->contig;
656 }
657 
658 
659 =head2 sub_SeqFeature
660 
661  Title : sub_SeqFeature
662  Usage : @feats = $feat->sub_SeqFeature();
663  Function: Returns an array of sub Sequence Features
664  Returns : An array
665  Args : none
666 
667 
668 =cut
669 
670 sub sub_SeqFeature {
671  my ($self) = @_;
672 
673  if ( $self->{'_gsf_sub_array'} ) {
674  return @{ $self->{'_gsf_sub_array'} };
675  } else {
676  return ();
677  }
678 }
679 
680 =head2 add_sub_SeqFeature
681 
682  Title : add_sub_SeqFeature
683  Usage : $feat->add_sub_SeqFeature($subfeat);
684  $feat->add_sub_SeqFeature($subfeat,'EXPAND')
685  Function: adds a SeqFeature into the subSeqFeature array.
686  with no 'EXPAND' qualifer, subfeat will be tested
687  as to whether it lies inside the parent, and throw
688  an exception if not.
689 
690  If EXPAND is used, the parents start/end/strand will
691  be adjusted so that it grows to accommodate the new
692  subFeature
693  Returns : nothing
694  Args : An object which has the SeqFeatureI interface
695 
696 
697 =cut
698 
699 sub add_sub_SeqFeature{
700  my ($self,$feat,$expand) = @_;
701 
702  if( !$feat->isa('Bio::SeqFeatureI') ) {
703  $self->warn("$feat does not implement Bio::SeqFeatureI. Will add it anyway, but beware...");
704  }
705 
706  if( $expand eq 'EXPAND' ) {
707  # if this doesn't have start/end set - forget it!
708  if( !defined $self->start && !defined $self->end ) {
709  $self->start($feat->start());
710  $self->end($feat->end());
711  $self->strand($feat->strand);
712  } else {
713  my ($start,$end);
714  if( $feat->start < $self->start ) {
715  $start = $feat->start;
716  }
717 
718  if( $feat->end > $self->end ) {
719  $end = $feat->end;
720  }
721 
722  $self->start($start);
723  $self->end($end);
724 
725  }
726  } else {
727  if( !defined($feat->start()) || !defined($feat->end()) ||
728  !defined($self->start()) || !defined($self->end())) {
729  $self->throw( "This SeqFeature and the sub_SeqFeature must define".
730  " start and end.");
731  }
732  if($feat->start() > $feat->end() || $self->start() > $self->end()) {
733  $self->throw("This SeqFeature and the sub_SeqFeature must have " .
734  "start that is less than or equal to end.");
735  }
736  if($feat->start() < $self->start() || $feat->end() > $self->end() ) {
737  $self->throw("$feat is not contained within parent feature, " .
738  "and expansion is not valid");
739  }
740  }
741 
742  push(@{$self->{'_gsf_sub_array'}},$feat);
743 
744 }
745 
746 =head2 flush_sub_SeqFeature
747 
748  Title : flush_sub_SeqFeature
749  Usage : $sf->flush_sub_SeqFeature
750  Function: Removes all sub SeqFeature
751  (if you want to remove only a subset, take
752  an array of them all, flush them, and add
753  back only the guys you want)
754  Example :
755  Returns : none
756  Args : none
757 
758 
759 =cut
760 
761 sub flush_sub_SeqFeature {
762  my ($self) = @_;
763 
764  $self->{'_gsf_sub_array'} = []; # zap the array implicitly.
765 }
766 
767 
768 sub id {
769  my ($self,$value) = @_;
770 
771  if (defined($value)) {
772  $self->{_id} = $value;
773  }
774 
775  return $self->{_id};
776 
777 }
778 
779 =head2 percent_id
780 
781  Title : percent_id
782  Usage : $pid = $feat->percent_id()
783  $feat->percent_id($pid)
784  Function: get/set on percentage identity information
785  Returns : float
786  Args : none if get, the new value if set
787 
788 =cut
789 
790 sub percent_id {
791  my ($self,$value) = @_;
792 
793  if (defined($value))
794  {
795  $self->{_percent_id} = $value;
796  }
797 
798  return $self->{_percent_id};
799 }
800 
801 =head2 p_value
802 
803  Title : p_value
804  Usage : $p_val = $feat->p_value()
805  $feat->p_value($p_val)
806  Function: get/set on p value information
807  Returns : float
808  Args : none if get, the new value if set
809 
810 =cut
811 
812 sub p_value {
813  my ($self,$value) = @_;
814 
815  if (defined($value))
816  {
817  $self->{_p_value} = $value;
818  }
819 
820  return $self->{_p_value};
821 }
822 
823 =head2 phase
824 
825  Title : phase
826  Usage : $phase = $feat->phase()
827  $feat->phase($phase)
828  Function: get/set on start phase of predicted exon feature
829  Returns : [0,1,2]
830  Args : none if get, 0,1 or 2 if set.
831 
832 =cut
833 
834 sub phase {
835  my ($self, $value) = @_;
836 
837  if (defined($value) )
838  {
839  $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2);
840  $self->{_phase} = $value;
841  }
842 
843  return $self->{_phase};
844 }
845 
846 =head2 end_phase
847 
848  Title : end_phase
849  Usage : $end_phase = $feat->end_phase()
850  $feat->end_phase($end_phase)
851  Function: returns end_phase based on phase and length of feature
852  Returns : [0,1,2]
853  Args : none if get, 0,1 or 2 if set.
854 
855 =cut
856 
857 sub end_phase {
858  my ($self, $value) = @_;
859 
860  if (defined($value))
861  {
862  $self->throw("Valid values for Phase are [0,1,2]") if ($value < 0 || $value > 2);
863  $self->{_end_phase} = $value;
864  }
865 
866  return $self->{_end_phase};
867 }
868 
869 
870 sub gffstring {
871  my ($self) = @_;
872 
873  my $str;
874 
875  my $strand = "+";
876 
877  if ((defined $self->strand)&&($self->strand == -1)) {
878  $strand = "-";
879  }
880 
881  $str .= (defined $self->seqname) ? $self->seqname."\t" : "\t";
882  $str .= (defined $self->source_tag) ? $self->source_tag."\t" : "\t";
883  $str .= (defined $self->primary_tag) ? $self->primary_tag."\t" : "\t";
884  $str .= (defined $self->start) ? $self->start."\t" : "\t";
885  $str .= (defined $self->end) ? $self->end."\t" : "\t";
886  $str .= (defined $self->score) ? $self->score."\t" : "\t";
887  $str .= (defined $self->strand) ? $strand."\t" : ".\t";
888  $str .= (defined $self->phase) ? $self->phase."\t" : ".\t";
889  eval{
890  $str .= (defined $self->end_phase) ? $self->end_phase."\t" : ".\t";
891  };
892 
893  return $str;
894 }
895 
896 
897 =head2 external_db
898 
899  Title : external_db
900  Usage : $pid = $feat->external_db()
901  $feat->external_db($dbid)
902  Function: get/set for an external db accession number (e.g.: Interpro)
903  Returns :
904  Args : none if get, the new value if set
905 
906 =cut
907 
908 sub external_db {
909  my ($self,$value) = @_;
910 
911  if (defined($value))
912  {
913  $self->{'_external_db'} = $value;
914  }
915 
916  return $self->{'_external_db'};
917 }
918 
919 
920 
921 =head2 contig
922 
923  Arg [1] : Bio::PrimarySeqI $seq
924  Example : $seq = $self->contig;
925  Description: Accessor to attach/retrieve a sequence to/from a feature
926  Returntype : Bio::PrimarySeqI
927  Exceptions : none
928  Caller : general
929 
930 =cut
931 
932 sub contig {
933  my ($self, $arg) = @_;
934 
935  if ($arg) {
936  unless (defined $arg && ref $arg && $arg->isa("Bio::PrimarySeqI")) {
937  $self->throw("Must attach Bio::PrimarySeqI objects to SeqFeatures");
938  }
939 
940  $self->{'_gsf_seq'} = $arg;
941 
942  # attach to sub features if they want it
943 
944  foreach my $sf ($self->sub_SeqFeature) {
945  if ($sf->can("attach_seq")) {
946  $sf->attach_seq($arg);
947  }
948  }
949  }
950  #print STDERR "contig is ".$self->{'_gsf_seq'}." with name ".$self->{'_gsf_seq'}->name."\n" unless(!$self->{'_gsf_seq'});
951 # my ($p, $f, $l) = caller;
952 # print STDERR "Caller = ".$f." ".$l."\n";
953  return $self->{'_gsf_seq'};
954 }
955 
956 
957 
958 sub is_splittable {
959  my ($self, $arg) = @_;
960 
961  if (defined $arg) {
962  $self->{'_is_splittable'} = $arg;
963  }
964  return $self->{'_is_splittable'};
965 }
966 
967 
968 sub transform {
969  my ($self, $slice) = @_;
970 
971  unless (defined $slice) {
972 
973  if ((defined $self->contig) &&
974  ($self->contig->isa("Bio::EnsEMBL::RawContig"))) {
975 
976  # we are already in rawcontig coords, nothing needs to be done
977  return $self;
978 
979  }
980  else {
981  # transform to raw_contig coords from Slice coords
982  return $self->_transform_to_RawContig();
983  }
984  }
985 
986  if (defined $self->contig) {
987 
988  if ($self->contig->isa("Bio::EnsEMBL::RawContig")) {
989 
990  # transform to slice coords from raw contig coords
991  return $self->_transform_to_Slice($slice);
992  }
993  elsif ($self->contig->isa( "Bio::EnsEMBL::Slice" ) or $self->contig->isa( "Bio::EnsEMBL::LRGSlice" )) {
994 
995  # transform to slice coords from other slice coords
996  return $self->_transform_between_Slices($slice);
997  }
998  else {
999 
1000  # Unknown contig type
1001  $self->throw("Cannot transform unknown contig type @{[$self->contig]}");
1002  }
1003  }
1004  else {
1005 
1006  #Can't convert to slice coords without a contig to work with
1007  return $self->throw("Object's contig is not defined - cannot transform");
1008  }
1009 
1010 }
1011 
1012 
1013 sub _transform_to_Slice {
1014  my ($self, $slice) = @_;
1015 
1016  $self->throw("can't transform coordinates of $self without a contig defined")
1017  unless $self->contig;
1018 
1019  unless($self->contig->adaptor) {
1020  $self->throw("cannot transform coordinates of $self without adaptor " .
1021  "attached to contig");
1022  }
1023 
1024  my $dbh = $self->contig->adaptor->db;
1025 
1026  my $mapper =
1027  $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
1028  my $rca = $dbh->get_RawContigAdaptor;
1029 
1030  my @mapped = $mapper->map_coordinates_to_assembly(
1031  $self->contig->dbID,
1032  $self->start,
1033  $self->end,
1034  $self->strand
1035  );
1036 
1037  unless (@mapped) {
1038  $self->throw("couldn't map $self to Slice");
1039  }
1040 
1041  unless (@mapped == 1) {
1042  $self->throw("$self should only map to one chromosome - " .
1043  "something bad has happened ...");
1044  }
1045 
1046  if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) {
1047  $self->warn("feature lies on gap\n");
1048  return;
1049  }
1050 
1051  if( ! defined $slice->chr_name() ) {
1052  my $slice_adaptor = $slice->adaptor();
1053  %$slice = %{$slice_adaptor->fetch_by_chr_name( $mapped[0]->id() )};
1054  }
1055 
1056  # mapped coords are on chromosome - need to convert to slice
1057  if($slice->strand == 1) {
1058  $self->start ($mapped[0]->start - $slice->chr_start + 1);
1059  $self->end ($mapped[0]->end - $slice->chr_start + 1);
1060  $self->strand ($mapped[0]->strand);
1061  } else {
1062  $self->start ($slice->chr_end - $mapped[0]->end + 1);
1063  $self->end ($slice->chr_end - $mapped[0]->start + 1);
1064  $self->strand ($mapped[0]->strand * -1);
1065  }
1066 
1067  $self->seqname($mapped[0]->id);
1068 
1069  #set the contig to the slice
1070  $self->contig($slice);
1071 
1072  return $self;
1073 }
1074 
1075 
1076 sub _transform_between_Slices {
1077  my ($self, $to_slice) = @_;
1078 
1079  my $from_slice = $self->contig;
1080 
1081  $self->throw("New contig [$to_slice] is not a Bio::EnsEMBL::Slice")
1082  unless ($to_slice->isa("Bio::EnsEMBL::Slice") or $to_slice->isa("Bio::EnsEMBL::LRGSlice") );
1083 
1084  if ((my $c1 = $from_slice->chr_name) ne (my $c2 = $to_slice->chr_name)) {
1085  $self->warn("Can't transform between chromosomes: $c1 and $c2");
1086  return;
1087  }
1088 
1089  my($start, $end, $strand);
1090 
1091  #first convert to assembly coords
1092  if($from_slice->strand == 1) {
1093  $start = $from_slice->chr_start + $self->start - 1;
1094  $end = $from_slice->chr_start + $self->end - 1;
1095  $strand = $self->strand;
1096  } else {
1097  $start = $from_slice->chr_end - $self->end + 1;
1098  $end = $from_slice->chr_end - $self->start + 1;
1099  $strand = $self->strand;
1100  }
1101 
1102  #now convert to the other slice's coords
1103  if($to_slice->strand == 1) {
1104  $self->start ($start - $to_slice->chr_start + 1);
1105  $self->end ($end - $to_slice->chr_start + 1);
1106  $self->strand($strand);
1107  } else {
1108  $self->start ($to_slice->chr_end - $end + 1);
1109  $self->end ($to_slice->chr_end - $start + 1);
1110  $self->strand($strand * -1);
1111  }
1112 
1113  $self->contig($to_slice);
1114 
1115  return $self;
1116 }
1117 
1118 
1119 sub _transform_to_RawContig {
1120  my($self) = @_;
1121 
1122  #print STDERR "transforming ".$self." to raw contig coords\n";
1123  $self->throw("can't transform coordinates of $self without a contig defined")
1124  unless $self->contig;
1125 
1126  my $slice = $self->contig;
1127 
1128  unless($slice->adaptor) {
1129  $self->throw("can't transform coordinates of $self without an adaptor " .
1130  "attached to the feature's slice");
1131  }
1132 
1133  my $dbh = $slice->adaptor->db;
1134 
1135  my $mapper =
1136  $dbh->get_AssemblyMapperAdaptor->fetch_by_type($slice->assembly_type);
1137  my $rca = $dbh->get_RawContigAdaptor;
1138 
1139  #first convert the features coordinates to assembly coordinates
1140  my($start, $end, $strand);
1141  if($slice->strand == 1) {
1142  $start = $slice->chr_start + $self->start - 1;
1143  $end = $slice->chr_start + $self->end - 1;
1144  $strand = $self->strand;
1145  } else {
1146  $start = $slice->chr_end - $self->end + 1;
1147  $end = $slice->chr_end - $self->start + 1;
1148  $strand = $self->strand * -1;
1149  }
1150 
1151  #convert the assembly coordinates to RawContig coordinates
1152  my @mapped = $mapper->map_coordinates_to_rawcontig(
1153  $slice->chr_name,
1154  $start,
1155  $end,
1156  $strand
1157  );
1158 
1159  unless (@mapped) {
1160  $self->throw("couldn't map $self");
1161  return $self;
1162  }
1163 
1164  if (@mapped == 1) {
1165 
1166  if ($mapped[0]->isa("Bio::EnsEMBL::Mapper::Gap")) {
1167  $self->warn("feature lies on gap\n");
1168  return;
1169  }
1170 
1171  my $rc = $rca->fetch_by_dbID($mapped[0]->id);
1172 
1173  $self->start ($mapped[0]->start);
1174  $self->end ($mapped[0]->end);
1175  $self->strand ($mapped[0]->strand);
1176  $self->seqname ($mapped[0]->id);
1177  #print STDERR "setting contig to be ".$mapped[0]->id."\n";
1178  $self->contig($rca->fetch_by_dbID($mapped[0]->id));
1179 
1180  return $self;
1181  }
1182  else {
1183 
1184  # more than one object returned from mapper
1185  # possibly more than one RawContig in region
1186 
1187  my (@gaps, @coords);
1188 
1189  foreach my $m (@mapped) {
1190 
1191  if ($m->isa("Bio::EnsEMBL::Mapper::Gap")) {
1192  push @gaps, $m;
1193  }
1194  elsif ($m->isa("Bio::EnsEMBL::Mapper::Coordinate")) {
1195  push @coords, $m;
1196  }
1197  }
1198 
1199  # case where only one RawContig maps
1200  if (@coords == 1) {
1201 
1202  $self->start ($coords[0]->start);
1203  $self->end ($coords[0]->end);
1204  $self->strand ($coords[0]->strand);
1205  $self->seqname($coords[0]->id);
1206  #print STDERR "2 setting contig to be ".$coords[0]->id."\n";
1207  $self->contig ($rca->fetch_by_dbID($coords[0]->id));
1208 
1209  $self->warn("Feature [$self] truncated as lies partially on a gap");
1210  return $self;
1211  }
1212 
1213  unless ($self->is_splittable) {
1214  $self->warn("Feature spans >1 raw contig - can't split\n");
1215  return;
1216  }
1217 
1218  my @out;
1219  my $obj = ref $self;
1220 
1221  SPLIT: foreach my $map (@mapped) {
1222 
1223  if ($map->isa("Bio::EnsEMBL::Mapper::Gap")) {
1224  $self->warn("piece of evidence lies on gap\n");
1225  next SPLIT;
1226  }
1227 
1228  my $feat = $obj->new;
1229 
1230  $feat->start ($map->start);
1231  $feat->end ($map->end);
1232  $feat->strand ($map->strand);
1233  #print STDERR "3 setting contig to be ".$mapped[0]->id."\n";
1234  $feat->contig ($rca->fetch_by_dbID($map->id));
1235  $feat->adaptor($self->adaptor) if $self->adaptor();
1236  $feat->display_label($self->display_label) if($self->can('display_label'));
1237  $feat->analysis($self->analysis);
1238  push @out, $feat;
1239  }
1240 
1241  return @out;
1242  }
1243 }
1244 
1245 
1246 1;
usage
public usage()
Bio::EnsEMBL::BaseAlignFeature
Definition: BaseAlignFeature.pm:90
Bio::EnsEMBL::SeqFeature
Definition: SeqFeature.pm:30
Bio::EnsEMBL::Feature
Definition: Feature.pm:47
accession
public accession()
exon
public exon()
Bio::EnsEMBL::FeaturePair
Definition: FeaturePair.pm:56
Bio::EnsEMBL::Analysis
Definition: PairAlign.pm:3
Bio
Definition: AltAlleleGroup.pm:4
Bio::EnsEMBL::Utils::Argument
Definition: Argument.pm:34