ensembl-hive  2.7.0
LRGSlice.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 
21 =head1 CONTACT
22 
23  Please email comments or questions to the public Ensembl
24  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25 
26  Questions may also be sent to the Ensembl help desk at
27  <http://www.ensembl.org/Help/Contact>.
28 
29 =cut
30 
31 =head1 NAME
32 
33 Bio::EnsEMBL::LRGSlice - Arbitary Slice of a genome
34 
35 =head1 SYNOPSIS
36 
37  $sa = $db->get_SliceAdaptor;
38 
39  $slice =
40  $sa->fetch_by_region( 'LRG', 'LRG3');
41 
42  # get some attributes of the slice
43  my $seqname = $slice->seq_region_name();
44  my $start = $slice->start();
45  my $end = $slice->end();
46 
47  # get the sequence from the slice
48  my $seq = $slice->seq();
49 
50  # get some features from the slice
51  foreach $gene ( @{ $slice->get_all_Genes } ) {
52  # do something with a gene
53  }
54 
55  foreach my $feature ( @{ $slice->get_all_DnaAlignFeatures } ) {
56  # do something with dna-dna alignments
57  }
58 
59 =head1 DESCRIPTION
60 
61 A LRG Slice object represents a region of a genome. It can be used to retrieve
62 sequence or features from an area of interest.
63 
64 =head1 METHODS
65 
66 =cut
67 
68 package Bio::EnsEMBL::LRGSlice;
69 use vars qw(@ISA);
70 use strict;
71 
72 use Bio::PrimarySeqI;
73 
75 
76 use vars qw(@ISA);
77 
78 @ISA = qw(Bio::EnsEMBL::Slice);
79 
80 sub new{
81  my $class = shift;
82 
83  my $self = bless {}, $class ;
84 
85  my $slice = $self = $class->SUPER::new( @_);
86 
87  return $self;
88 }
89 
90 sub stable_id {
91  my $self = shift;
92  return $self->seq_region_name;
93 }
94 
95 
96 sub display_xref {
97  my $self = shift;
98  return $self->seq_region_name;
99 }
100 
101 sub feature_Slice {
102  my $self = shift;
103  return $self->{_chrom_slice} if defined($self->{_chrom_slice});
104 
105  my $max;
106  my $min;
107  my $chrom;
108  my $strand;
109  my %segments;
110 
111  foreach my $segment (@{$self->project('chromosome')}) {
112  my $from_start = $segment->from_start();
113  my $from_end = $segment->from_end();
114  my $slice = $segment->to_Slice;
115 
116  my $seq_region_id = $slice->adaptor->get_seq_region_id($slice);
117  my $to_name = $slice->seq_region_name();
118 
119  next if (!$seq_region_id);
120 
121  if (!$segments{$seq_region_id}) {
122  $segments{$seq_region_id}{'chr_name'} = $to_name;
123  $segments{$seq_region_id}{'strand'} = $slice->strand;
124  $max=-99999999999;
125  $min=9999999999;
126  } else {
127  $max = $segments{$seq_region_id}{'max'};
128  $min = $segments{$seq_region_id}{'min'};
129  }
130 
131  my $to_start = $slice->start();
132  my $to_end = $slice->end();
133  if($to_start > $max){
134  $max = $to_start;
135  }
136  if($to_start < $min){
137  $min = $to_start;
138  }
139  if($to_end > $max){
140  $max = $to_end;
141  }
142  if($to_end < $min){
143  $min = $to_end;
144  }
145 
146  $segments{$seq_region_id}{'max'} = $max;
147  $segments{$seq_region_id}{'min'} = $min;
148  }
149 
150  my @seq_region_ids = sort {$a <=> $b} keys (%segments);
151 
152  # Keep the chromosome mapping instead of the patch, if possible
153  if (scalar @seq_region_ids) {
154  my $seq_id = $seq_region_ids[0];
155  $chrom = $segments{$seq_id}{'chr_name'};
156  $max = $segments{$seq_id}{'max'};
157  $min = $segments{$seq_id}{'min'};
158  $strand = $segments{$seq_id}{'strand'};
159  }
160 
161  if(!defined($chrom)){
162  warn "Could not project to chromosome for ".$self->name." ??\n";
163  return undef;
164  }
165  my $chrom_slice = $self->adaptor->fetch_by_region("chromosome",$chrom, $min, $max, $strand);
166  $self->{_chrom_slice} = $chrom_slice;
167  return $self->{_chrom_slice};
168 }
169 
170 sub DESTROY{
171 }
172 
173 sub get_all_differences{
174  my $self = shift;
175 
176  my @results;
177 
178  # get seq_region_attrib diffs (always same-length substitutions)
179  ################################################################
180 
181  my $sth = $self->adaptor->prepare(qq{
182  SELECT sra.value
183  FROM seq_region_attrib sra, attrib_type at
184  WHERE at.code = '_rna_edit'
185  AND at.attrib_type_id = sra.attrib_type_id
186  AND sra.seq_region_id = ?
187  });
188 
189  $sth->execute($self->get_seq_region_id);
190 
191  my $edit_string;
192 
193  $sth->bind_columns(\$edit_string);
194 
195  while($sth->fetch()) {
196  my ($start, $end, $edit) = split " ", $edit_string;
197 
198  my $slice = $self->sub_Slice($start, $end);
199  my $chr_proj = $slice->project("chromosome");
200  my $ref_seq = '-';
201  if(scalar @$chr_proj == 1) {
202  $ref_seq = $chr_proj->[0]->[2]->seq;
203  }
204 
205 
206  my $diff = {
207  'start' => $start,
208  'end' => $end,
209  'type' => 'substitution',
210  'seq' => $edit,
211  'ref' => $ref_seq,
212  };
213 
214  push @results, $diff;
215  }
216 
217  # get more complex differences via projections
218  ##############################################
219 
220  # project the LRG slice to contig coordinates
221  my @segs = @{$self->project("contig")};
222 
223  # if the LRG projects into more than one segment
224  if(scalar @segs > 1) {
225 
226  my ($prev_end, $prev_chr_start, $prev_chr_end, $prev_was_chr);
227 
228  foreach my $seg(@segs) {
229 
230  # is this a novel LRG contig, or does it project to a chromosome?
231  my @chr_proj = @{$seg->[2]->project("chromosome")};
232 
233  # if it is a normal contig
234  if(scalar @chr_proj) {
235 
236  # check if there has been a deletion in LRG
237  if($prev_was_chr && $prev_end == $seg->[0] - 1) {
238 
239  # check it's not just a break in contigs
240  unless(
241  ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_start == $chr_proj[0]->[2]->end + 1) ||
242  ($chr_proj[0]->[2]->strand != $self->strand && $prev_chr_end == $chr_proj[0]->[2]->start - 1)
243  ) {
244 
245  # now get deleted slice coords, depends on the strand rel to LRG
246  my ($s, $e);
247 
248  # opposite strand
249  if($chr_proj[0]->[2]->strand != $self->strand) {
250  ($s, $e) = ($prev_chr_start - 1, $chr_proj[0]->[2]->end + 1);
251  }
252 
253  # same strand
254  else {
255  ($s, $e) = ($prev_chr_end + 1, $chr_proj[0]->[2]->start - 1);
256  }
257 
258  if($s > $e) {
259  warn "Oops, trying to create a slice from $s to $e (could have been ", $prev_chr_start - 1, "-", $chr_proj[0]->[2]->end + 1, " or ", $prev_chr_end + 1, "-", $chr_proj[0]->[2]->start - 1, ")";
260  }
261 
262  else {
263  # get a slice representing the sequence that was deleted
264  my $deleted_slice = $self->adaptor->fetch_by_region("chromosome", $chr_proj[0]->[2]->seq_region_name, $s, $e, $chr_proj[0]->[2]->strand);
265 
266  my $diff = {
267  'start' => $seg->[0],
268  'end' => $prev_end,
269  'type' => 'deletion',
270  'seq' => '-',
271  'ref' => $deleted_slice->seq." ".$deleted_slice->start.'-'.$deleted_slice->end,
272  };
273 
274  push @results, $diff;
275  }
276  }
277  }
278 
279  $prev_was_chr = 1;
280 
281  $prev_chr_start = $chr_proj[0]->[2]->start;
282  $prev_chr_end = $chr_proj[0]->[2]->end;
283  }
284 
285  # if it is an LRG made-up contig for an insertion
286  else {
287  $prev_was_chr = 0;
288 
289  my $diff = {
290  'start' => $seg->[0],
291  'end' => $seg->[1],
292  'type' => 'insertion',
293  'seq' => substr($self->seq, $seg->[0] - 1, $seg->[1] - $seg->[0] + 1),
294  'ref' => '-',
295  };
296 
297  push @results, $diff;
298  }
299 
300  $prev_end = $seg->[1];
301  }
302  }
303 
304  # return results sorted by start, then end position
305  return [sort {$a->{start} <=> $b->{start} || $a->{end} <=> $b->{end}} @results];
306 }
307 
308 1;
Bio::EnsEMBL::Slice
Definition: Slice.pm:50
Bio::EnsEMBL::Slice::seq_region_name
public String seq_region_name()
Bio::EnsEMBL::LRGSlice
Definition: LRGSlice.pm:37