3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
44 # Retrieve coordinates of upstream region
45 my $upstream_region_start = $upstream->
upstart;
46 my $upstream_region_end = $upstream->upend;
48 # Retrieve coordinates in 'downstream' first intron
49 my $intron_region_start = $upstream->downstart;
50 my $intron_region_end = $upstream->downend;
52 # Coordinates are returned in the same scheme as the input transcript.
53 # However, the coordinates of an upstream region can be transformed to
54 # any other scheme using a slice
56 $upstream->transform($slice);
58 # Coordinates can be retrieved in scheme in the same manner as the
63 An
object that determines the upstream region of a
transcript. Such a
64 region is non-coding and ensures that other genes or transcripts are
65 not present. Ultimately, these objects can be used to looking
for
66 promoter elements. To
this end, it is also possible to derive a region
67 downstream of the first
exon, within the first intron and where promoter
68 elements sometimes are found.
74 package Bio::EnsEMBL::Upstream;
90 Arg [length] : (optional)
int $length
93 Description: Creates a
new upstream
object
102 my ($class, @args) = @_;
108 $length) = rearrange([qw(TRANSCRIPT
112 $self->transcript($transcript)
if defined $transcript;
113 $self->length($length)
if $length;
121 Example : $self->transcript($transcript);
122 Description: Getter/setter
for transcript object
124 Exceptions : Throws
if argument is not undefined
126 Caller : $self->
new, $self->_derive_coords,
127 $self->_first_coding_Exon
137 $self->{_transcript} = shift;
139 if (defined $self->{_transcript}) {
140 throw(
"Transcript is not a Bio::EnsEMBL::Transcript")
141 if (! $self->{_transcript}->isa(
"Bio::EnsEMBL::Transcript"));
146 return $self->{_transcript}
151 Arg : (optional)
int $length
152 Example : $self->length(2000); # bp
153 Description: Getter/setter
for upstream region length.
155 Exceptions : Throws
if length is requested before it has been set.
156 Caller : $self->new, $self->_derive_coords
165 $self->{_length} = shift;
169 throw(
"Region length has not been set.")
170 unless $self->{_length};
172 return $self->{_length}
178 Example : $self->_flush_cache;
179 Description: Empties cached coordinates (called when
180 coordinate scheme or region length has changed).
183 Caller : $self->length, $self->transform
191 $self->upstart(undef);
193 $self->downstart(undef);
194 $self->downend(undef);
200 Example : $self->upstart;
201 Description: Returns the start coordinate of the region
203 is always the furthest from the translation
204 initiation codon, whereas upend always abutts
205 the translation initiation codon.
217 $self->{_upstart} = shift @_;
221 if (! defined $self->{_upstart}) {
222 $self->_derive_coords(
'up');
225 return $self->{_upstart}
231 Example : $self->upend;
232 Description: Returns the end coordinate of the region
234 always always abutts the translation
235 initiation codon, whereas upstart always
236 returns the coorindate furthest from the
237 translation initiation codon.
249 $self->{_upend} = shift @_;
253 if (! defined $self->{_upend}) {
254 $self->_derive_coords(
'up');
257 return $self->{_upend}
263 Example : $self->downstart;
264 Description: Returns the start coordinate of the region
266 coordinate is always closest to the first
267 exon (irregardless of strand).
279 $self->{_downstart} = shift @_;
283 if (! defined $self->{_downstart}) {
284 $self->_derive_coords(
'down');
287 return $self->{_downstart}
293 Example : $self->downend;
294 Description: Returns the end coordinate of the region
296 coordinate is always furthest from the first
297 exon (irregardless of strand).
309 $self->{_downend} = shift @_;
313 if (! defined $self->{_downend}) {
314 $self->_derive_coords(
'down');
317 return $self->{_downend}
324 Description: Not yet implemented
333 # Over-riding inherited class. As yet unimplemented.
338 throw(
"No transform method implemented for " . $self);
341 =head2 derive_upstream_coords
344 Example : my ($upstart, $upend)
345 = $self->derive_upstream_coords;
346 Description: Derives upstream coordinates (
for
347 compatability with older scripts).
348 Returntype : arrayref
355 sub derive_upstream_coords {
358 return [$self->upstart, $self->upend]
361 =head2 derive_downstream_coords
364 Example : my ($downstart, $downend)
365 = $self->derive_downstream_coords;
366 Description: Derives downstream coordinates (
for
367 compatability with older scripts).
368 Returntype : arrayref
375 sub derive_downstream_coords {
378 return [$self->downstart, $self->downend]
381 =head2 _derive_coords
383 Arg :
string $direction (either
'up' or
'down').
384 Example : $self->_derive_coords(
'up');
385 Description: Determines the coordinates of either upstream
386 or downstream region.
388 Exceptions : Throws
if argument is not either
'up' or
'down'
389 Caller : $self->upstart, $self->upend, $self->downstart,
396 my ($self, $direction) = @_;
399 throw(
"Must specify either \'up\' of \'down\'-stream direction to derive coords.")
400 unless (($direction eq
'up')||($direction eq
'down'));
402 # Put things in easily accessible places.
403 my $core_db_slice_adaptor = $self->transcript->slice->adaptor;
404 my $region_length = $self->length;
406 # Whatever coord system the gene is currently is, transform to the toplevel.
407 my $transcript = $self->transcript->transform(
'toplevel');
409 # Use our transformed transcript to determine the upstream region coords.
410 # End should always be just before the coding start (like ATG), including 3' UTR.
411 # Start is the outer limit of the region upstream (furthest from ATG).
416 if ($transcript->strand == 1){
417 if ($direction eq
'up'){
418 $region_end = $transcript->coding_region_start - 1;
419 $region_start = $region_end - $region_length;
420 } elsif ($direction eq
'down'){
421 $region_end = $self->_first_coding_Exon->end + 1;
422 $region_start = $region_end + $region_length;
424 } elsif ($transcript->strand == -1) {
425 if ($direction eq
'up'){
426 $region_end = $transcript->coding_region_end + 1;
427 $region_start = $region_end + $region_length;
429 } elsif ($direction eq
'down'){
430 $region_end = $self->_first_coding_Exon->start - 1;
431 $region_start = $region_end - $region_length;
435 # Trim the upstream/downstream region to remove extraneous coding sequences
436 # from other genes and/or transcripts.
438 my ($slice_low_coord, $slice_high_coord) = sort {$a <=> $b} ($region_start, $region_end);
441 = $core_db_slice_adaptor->fetch_by_region($transcript->slice->coord_system->name,
442 $transcript->slice->seq_region_name,
446 if ($transcript->strand == 1) {
447 if ($direction eq
'up') {
448 $region_start += $self->_bases_to_trim(
'left_end', $region_slice);
449 } elsif ($direction eq
'down') {
450 $region_start -= $self->_bases_to_trim(
'right_end', $region_slice);
452 } elsif ($transcript->strand == -1) {
453 if ($direction eq
'up') {
454 $region_start -= $self->_bases_to_trim(
'right_end', $region_slice);
455 } elsif ($direction eq
'down') {
456 $region_start += $self->_bases_to_trim(
'left_end', $region_slice);
460 # Always return start < end
462 ($region_start, $region_end) = sort {$a <=> $b} ($region_start, $region_end);
464 if ($direction eq
'up') {
465 $self->upstart($region_start);
466 $self->upend($region_end);
467 } elsif ($direction eq
'down') {
468 $self->downstart($region_start);
469 $self->downend($region_end);
473 =head2 _bases_to_trim
475 Arg :
string $end_to_trim (either
'right_end' or
478 Example : $self->_derive_coords(
'right_end', $slice);
479 Description: Finds exons from other genes/transcripts that
480 invade our upstream/downstream slice and
481 returns the number of bases that should be
482 truncated from the appropriate end of the
483 upstream/downstream region.
485 Exceptions : Throws
if argument is not either
'right_end'
487 Caller : $self->_derive_coords
492 # Method to look for coding regions that invade the upstream region. For
493 # now, this method returns the number of bases to trim. I doesn't yet
494 # do anything special if an exon is completely swallowed (truncates at
495 # the end of the overlapping exon and discards any non-coding sequence
496 # further upstream) or overlaps the 'wrong' end of the region (cases where
497 # two alternate exons share one end of sequence - does this happen?).
499 # The input argument 'end' defines the end of the slice that should be
503 my ($self, $end_to_trim, $slice) = @_;
505 throw "Slice end argument must be either left_end or right_end"
506 unless ($end_to_trim eq
'right_end' || $end_to_trim eq
'left_end');
509 my $slice_length = $slice->length;
513 foreach my $exon (@{$slice->get_all_Exons}){
514 next
if $exon->stable_id eq $self->_first_coding_Exon->stable_id;
516 my $start = $exon->start;
517 my $end = $exon->end;
519 # Choose from four possible exon arrangements
521 # -----|********************|----- Slice
522 # --|=========================|--- Exon arrangement 1
523 # ----------|======|-------------- Exon arrangement 2
524 # --|=======|--------------------- Exon arrangement 3
525 # -------------------|=========|-- Exon arrangement 4
528 if ($start <= 0 && $end >= $slice_length) { #
exon arrangement 1
529 $right_trim = $slice_length - 1;
530 $left_trim = $slice_length - 1;
533 } elsif ($start >= 0 && $end <= $slice_length) { #
exon arrangement 2
534 my $this_right_trim = ($slice_length - $start) + 1;
536 $right_trim = $this_right_trim
537 if $this_right_trim > $right_trim;
540 if $end > $left_trim;
542 } elsif ($start <= 0 && $end < $slice_length) { #
exon arrangement 3
543 $right_trim = $slice_length; # a bit draconian
545 if $end > $left_trim;
547 } elsif ($start > 0 && $end >= $slice_length) { #
exon arrangement 4
548 my $this_right_trim = ($slice_length - $start) + 1;
550 $right_trim = $this_right_trim
551 if $this_right_trim > $right_trim;
553 $left_trim = $slice_length; # also a bit draconian
558 return $right_trim
if $end_to_trim eq
'right_end';
559 return $left_trim
if $end_to_trim eq
'left_end';
562 =head2 _first_coding_Exon
565 Example : $self->_first_coding_Exon;
567 contains coding bases.
570 Caller : $self->_derive_coords, $self->_bases_to_trim
575 sub _first_coding_Exon {
578 unless ($self->{_first_coding_exon}){
580 my $exons = $self->transcript->get_all_translateable_Exons;
582 $self->{_first_coding_exon} = $exons->[0]
583 if $self->transcript->
strand == 1;
584 $self->{_first_coding_exon} = $exons->[-1]
585 if $self->transcript->strand == -1;
588 return $self->{_first_coding_exon}