ensembl-hive  2.8.1
LRGSlice.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::LRGSlice - Arbitary Slice of a genome
34 
35 =head1 SYNOPSIS
36 
37  $sa = $db->get_SliceAdaptor;
38 
39  $slice =
40  $sa->fetch_by_region( 'LRG', 'LRG3');
41 
42  # get some attributes of the slice
43  my $seqname = $slice->seq_region_name();
44  my $start = $slice->start();
45  my $end = $slice->end();
46 
47  # get the sequence from the slice
48  my $seq = $slice->seq();
49 
50  # get some features from the slice
51  foreach $gene ( @{ $slice->get_all_Genes } ) {
52  # do something with a gene
53  }
54 
55  foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
56  # do something with dna-dna alignments
57  }
58 
59 =head1 DESCRIPTION
60 
61 A LRG Slice object represents a region of a genome. It can be used to retrieve
62 sequence or features from an area of interest.
63 
64 =head1 METHODS
65 
66 =cut
67 
68 package Bio::EnsEMBL::LRGSlice;
69 
70 use vars qw(@ISA);
71 use strict;
72 
73 use Bio::PrimarySeqI;
74 
76 
77 use vars qw(@ISA);
78 
79 @ISA = qw(Bio::EnsEMBL::Slice);
80 
81 sub new{
82  my $class = shift;
83 
84  my $self = bless {}, $class ;
85 
86  my $slice = $self = $class->SUPER::new( @_);
87 
88  return $self;
89 }
90 
91 sub stable_id {
92  my $self = shift;
93  return $self->seq_region_name;
94 }
95 
96 
97 sub display_xref {
98  my $self = shift;
99  return $self->seq_region_name;
100 }
101 
102 sub feature_Slice {
103  my $self = shift;
104  return $self->{_chrom_slice} if defined($self->{_chrom_slice});
105 
106  my $max;
107  my $min;
108  my $chrom;
109  my $strand;
110  my %segments;
111 
112  foreach my $segment (@{$self->project('chromosome')}) {
113  my $from_start = $segment->from_start();
114  my $from_end = $segment->from_end();
115  my $slice = $segment->to_Slice;
116 
117  my $seq_region_id = $slice->adaptor->get_seq_region_id($slice);
118  my $to_name = $slice->seq_region_name();
119 
120  next if (!$seq_region_id);
121 
122  if (!$segments{$seq_region_id}) {
123  $segments{$seq_region_id}{'chr_name'} = $to_name;
124  $segments{$seq_region_id}{'strand'} = $slice->strand;
125  $max=-99999999999;
126  $min=9999999999;
127  } else {
128  $max = $segments{$seq_region_id}{'max'};
129  $min = $segments{$seq_region_id}{'min'};
130  }
131 
132  my $to_start = $slice->start();
133  my $to_end = $slice->end();
134  if($to_start > $max){
135  $max = $to_start;
136  }
137  if($to_start < $min){
138  $min = $to_start;
139  }
140  if($to_end > $max){
141  $max = $to_end;
142  }
143  if($to_end < $min){
144  $min = $to_end;
145  }
146 
147  $segments{$seq_region_id}{'max'} = $max;
148  $segments{$seq_region_id}{'min'} = $min;
149  }
150 
151  my @seq_region_ids = sort {$a <=> $b} keys (%segments);
152 
153  # Keep the chromosome mapping instead of the patch, if possible
154  if (scalar @seq_region_ids) {
155  my $seq_id = $seq_region_ids[0];
156  $chrom = $segments{$seq_id}{'chr_name'};
157  $max = $segments{$seq_id}{'max'};
158  $min = $segments{$seq_id}{'min'};
159  $strand = $segments{$seq_id}{'strand'};
160  }
161 
162  if(!defined($chrom)){
163  warn "Could not project to chromosome for ".$self->name." ??\n";
164  return undef;
165  }
166  my $chrom_slice = $self->adaptor->fetch_by_region("chromosome",$chrom, $min, $max, $strand);
167  $self->{_chrom_slice} = $chrom_slice;
168  return $self->{_chrom_slice};
169 }
170 
171 sub DESTROY{
172 }
173 
174 sub get_all_differences{
175  my $self = shift;
176 
177  my @results;
178 
179  # get seq_region_attrib diffs (always same-length substitutions)
180  ################################################################
181 
182  my $sth = $self->adaptor->prepare(qq{
183  SELECT sra.value
184  FROM seq_region_attrib sra, attrib_type at
185  WHERE at.code = '_rna_edit'
186  AND at.attrib_type_id = sra.attrib_type_id
187  AND sra.seq_region_id = ?
188  });
189 
190  $sth->execute($self->get_seq_region_id);
191 
192  my $edit_string;
193 
194  $sth->bind_columns(\$edit_string);
195 
196  while($sth->fetch()) {
197  my ($start, $end, $edit) = split " ", $edit_string;
198 
199  my $slice = $self->sub_Slice($start, $end);
200  my $chr_proj = $slice->project("chromosome");
201  my $ref_seq = '-';
202  if(scalar @$chr_proj == 1) {
203  $ref_seq = $chr_proj->[0]->[2]->seq;
204  }
205 
206 
207  my $diff = {
208  'start' => $start,
209  'end' => $end,
210  'type' => 'substitution',
211  'seq' => $edit,
212  'ref' => $ref_seq,
213  };
214 
215  push @results, $diff;
216  }
217 
218  # get more complex differences via projections
219  ##############################################
220 
221  # project the LRG slice to contig coordinates
222  my @segs = @{$self->project("contig")};
223 
224  # if the LRG projects into more than one segment
225  if(scalar @segs > 1) {
226 
227  my ($prev_end, $prev_chr_start, $prev_chr_end, $prev_was_chr);
228 
229  foreach my $seg(@segs) {
230 
231  # is this a novel LRG contig, or does it project to a chromosome?
232  my @chr_proj = @{$seg->[2]->project("chromosome")};
233 
234  # if it is a normal contig
235  if(scalar @chr_proj) {
236 
237  # check if there has been a deletion in LRG
238  if($prev_was_chr && $prev_end == $seg->[0] - 1) {
239 
240  # check it's not just a break in contigs
241  unless(
242  ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_start == $chr_proj[0]->[2]->end + 1) ||
243  ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_end == $chr_proj[0]->[2]->start - 1)
244  ) {
245 
246  # now get deleted slice coords, depends on the strand rel to LRG
247  my ($s, $e);
248 
249  # opposite strand
250  if($chr_proj[0]->[2]->strand != $self->strand) {
251  ($s, $e) = ($prev_chr_start - 1, $chr_proj[0]->[2]->end + 1);
252  }
253 
254  # same strand
255  else {
256  ($s, $e) = ($prev_chr_end + 1, $chr_proj[0]->[2]->start - 1);
257  }
258 
259  if($s > $e) {
260  warn "Oops, trying to create a slice from $s to $e (could have been ", $prev_chr_start - 1, "-", $chr_proj[0]->[2]->end + 1, " or ", $prev_chr_end + 1, "-", $chr_proj[0]->[2]->start - 1, ")";
261  }
262 
263  else {
264  # get a slice representing the sequence that was deleted
265  my $deleted_slice = $self->adaptor->fetch_by_region("chromosome", $chr_proj[0]->[2]->seq_region_name, $s, $e, $chr_proj[0]->[2]->strand);
266 
267  my $diff = {
268  'start' => $seg->[0],
269  'end' => $prev_end,
270  'type' => 'deletion',
271  'seq' => '-',
272  'ref' => $deleted_slice->seq." ".$deleted_slice->start.'-'.$deleted_slice->end,
273  };
274 
275  push @results, $diff;
276  }
277  }
278  }
279 
280  $prev_was_chr = 1;
281 
282  $prev_chr_start = $chr_proj[0]->[2]->start;
283  $prev_chr_end = $chr_proj[0]->[2]->end;
284  }
285 
286  # if it is an LRG made-up contig for an insertion
287  else {
288  $prev_was_chr = 0;
289 
290  my $diff = {
291  'start' => $seg->[0],
292  'end' => $seg->[1],
293  'type' => 'insertion',
294  'seq' => substr($self->seq, $seg->[0] - 1, $seg->[1] - $seg->[0] + 1),
295  'ref' => '-',
296  };
297 
298  push @results, $diff;
299  }
300 
301  $prev_end = $seg->[1];
302  }
303  }
304 
305  # return results sorted by start, then end position
306  return [sort {$a->{start} <=> $b->{start} || $a->{end} <=> $b->{end}} @results];
307 }
308 
309 1;
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio::EnsEMBL::Slice::seq_region_name
public String seq_region_name()
Bio::EnsEMBL::LRGSlice
Definition: LRGSlice.pm:37