ensembl-hive  2.7.0
bio_ens_hit.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 DESCRIPTION
32 
33 Sequence alignment hits were previously stored within the core database
34 as ungapped alignments. This imposed 2 major constraints on alignments:
35 
36  a) alignments for a single hit record would require multiple rows in
37  the database, and
38  b) it was not possible to accurately retrieve the exact original alignment.
39 
40 Therefore, in the new branch sequence alignments are now stored as
41 ungapped alignments in the cigar line format (where CIGAR stands for
42 Concise Idiosyncratic Gapped Alignment Report).
43 
44 In the cigar line format alignments are sotred as follows:
45 
46  M: Match
47  D: Deletino
48  I: Insertion
49 
50 An example of an alignment for a hypthetical protein match is shown
51 below:
52 
53 
54  Query: 42 PGPAGLP----GSVGLQGPRGLRGPLP-GPLGPPL...
55  PG P G GP R PLGP
56  Sbjct: 1672 PGTP*TPLVPLGPWVPLGPSSPR--LPSGPLGPTD...
57 
58 protein_align_feature table as the following cigar line:
59 
60  7M4D12M2I2MD7M
61 
62 =cut
63 
64 package Bio::EnsEMBL::Utils::Converter::bio_ens_hit;
65 
66 use strict;
67 use vars qw(@ISA);
73 
75 
76 sub _initialize {
77  my ($self, @args) = @_;
78  $self->SUPER::_initialize(@args);
79 
80  # After super initialized, analysis and contig are ready.
81  my $bio_ens_seqFeature_converter = new Bio::EnsEMBL::Utils::Converter(
82  -in => 'Bio::SeqFeature::Generic',
83  -out => 'Bio::EnsEMBL::SeqFeature',
84  -analysis => $self->analysis,
85  -contig => $self->contig
86  );
87  $self->_bio_ens_seqFeature_converter($bio_ens_seqFeature_converter);
88 
89 }
90 
91 sub _convert_single {
92  my ($self, $input) = @_;
93 
94  my $in = $self->in;
95  my $out = $self->out;
96 
97  if($in =~ /Bio::Search::Hit::GenericHit/){
98  return $self->_convert_single_hit($input);
99  }elsif($in =~ /Bio::Search::HSP::GenericHSP/){
100  return $self->_convert_single_hsp($input);
101  }else{
102  $self->throw("[$in]->[$out], not implemented");
103  }
104 }
105 
106 sub _convert_single_hit {
107 
108 
109 }
110 
111 sub _convert_single_hsp {
112  my ($self, $hsp) = @_;
113 
114  unless(ref($hsp) && $hsp->isa('Bio::Search::HSP::GenericHSP')){
115  $self->throw("a GenericHSP object needed");
116  }
117 
118  my $bio_ens_seqFeature_converter = $self->_bio_ens_seqFeature_converter;
119  my $ens_feature1 = $bio_ens_seqFeature_converter->_convert_single(
120  $hsp->feature1);
121  my $ens_feature2 = $bio_ens_seqFeature_converter->_convert_single(
122  $hsp->feature2);
123 
124  $ens_feature1->p_value($hsp->evalue);
125  $ens_feature1->score($hsp->score);
126  $ens_feature1->percent_id($hsp->percent_identity);
127  $ens_feature2->p_value($hsp->evalue);
128  $ens_feature2->score($hsp->score);
129  $ens_feature2->percent_id($hsp->percent_identity);
130 
131  my $cigar_string = $hsp->cigar_string;
132  my @args = (
133  -feature1 => $ens_feature1,
134  -feature2 => $ens_feature2,
135  -cigar_string => $cigar_string
136  );
137 
138  my $contig = $self->contig;
139  # choose the AlignFeature based on the blast program
140  my $program = $hsp->algorithm;
141 
142  $self->throw("HSP does not have algorithm value") unless(defined($program));
143  my $align_feature;
144 
145  if($program =~ /blastn/i){
146  $align_feature = new Bio::EnsEMBL::DnaDnaAlignFeature(@args);
147  $align_feature->attach_seq($contig);
148  }elsif($program =~ /blastx/i){
149  $align_feature = new Bio::EnsEMBL::DnaPepAlignFeature(@args);
150  $align_feature->attach_seq($contig);
151  }else{
152  $self->throw("$program is not supported yet");
153  }
154 
155  return $align_feature;
156 }
157 
158 # an internal getter/setter for a converter used for seq feature conversion.
159 
160 sub _bio_ens_seqFeature_converter {
161  my ($self, $arg) = @_;
162  if(defined $arg){
163  $self->{_bio_ens_seqFeature_converter} = $arg;
164  }
165  return $self->{_bio_ens_seqFeature_converter};
166 }
167 
168 1;
Bio::EnsEMBL::DnaPepAlignFeature
Definition: DnaPepAlignFeature.pm:12
Bio::EnsEMBL::Utils::Sequence
Definition: Sequence.pm:22
Bio::EnsEMBL::Utils::Converter::bio_ens
Definition: bio_ens.pm:12
Bio::EnsEMBL::ProteinFeature
Definition: ProteinFeature.pm:24
Bio::EnsEMBL::Utils::Converter
Definition: bio_ens.pm:8
Bio::EnsEMBL::PepDnaAlignFeature
Definition: PepDnaAlignFeature.pm:10
Bio::EnsEMBL::DnaDnaAlignFeature
Definition: DnaDnaAlignFeature.pm:15