3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
33 TranscriptMapper - A utility
class used to perform coordinate conversions
34 between a number of coordinate systems relating to transcripts
42 @coords = $trmapper->genomic2cdna( 141, 500, -1 );
44 @coords = $trmapper->genomic2cds( 141, 500, -1 );
46 @coords = $trmapper->pep2genomic( 10, 60 );
48 @coords = $trmapper->genomic2pep( 123, 400, 1 );
52 This is a utility
class which can be used to perform coordinate conversions
53 between a number of coordinate systems relating to transcripts.
56 object attached. The
Slice provides vital information for the mapping process.
64 package Bio::EnsEMBL::TranscriptMapper;
83 various coordinate transformations relating to transcripts.
85 time of creation to perform the conversions, and that a
new
87 'Genomic' coordinates are coordinates which are relative to the
90 Exceptions :
throws if a
transcript is not an argument
91 Caller : Transcript::get_TranscriptMapper
98 my $transcript = shift;
100 my $class = ref($caller) || $caller;
102 if(!ref($transcript) || !$transcript->isa(
'Bio::EnsEMBL::Transcript')) {
103 throw(
"Transcript argument is required.");
107 my $exons = $transcript->get_all_Exons();
110 $start_phase = $exons->[0]->phase;
115 # Create a cdna <-> genomic mapper and load it with exon coords
116 my $mapper = _load_mapper($transcript);
118 my $self = bless({
'exon_coord_mapper' => $mapper,
119 'start_phase' => $start_phase,
120 'cdna_coding_start' => $transcript->cdna_coding_start(),
121 'cdna_coding_end' => $transcript->cdna_coding_end()},
129 The
transcript for which a mapper should be created.
130 Example : my $mapper = _load_mapper($transcript);
131 Description: loads the mapper
140 my $transcript = shift;
144 my $edits_on = $transcript->edits_enabled();
148 @edits = @{$transcript->get_all_SeqEdits()};
149 @edits = sort {$a->start() <=> $b->start()} @edits;
154 my $cdna_start = undef;
159 foreach my $ex (@{$transcript->get_all_Exons}) {
160 my $gen_start = $ex->seq_region_start();
161 my $gen_end = $ex->seq_region_end();
163 $cdna_start = $cdna_end + 1;
164 $cdna_end = $cdna_start + $ex->length() - 1;
166 my $strand = $ex->strand();
168 # add deletions and insertions into pairs when SeqEdits turned on
169 # ignore mismatches (i.e. treat as matches)
171 while(@edits && $edits[0]->start() + $edit_shift <= $cdna_end) {
173 my $edit = shift(@edits);
174 my $len_diff = $edit->length_diff();
177 # break pair into two parts, finish first pair just before edit
179 my $prev_cdna_end = $edit->start() + $edit_shift - 1;
180 my $prev_cdna_start = $cdna_start;
181 my $prev_len = $prev_cdna_end - $prev_cdna_start + 1;
186 $prev_gen_start = $gen_start;
187 $prev_gen_end = $gen_start + $prev_len - 1;
189 $prev_gen_start = $gen_end - $prev_len + 1;
190 $prev_gen_end = $gen_end;
193 if($prev_len > 0) { # only create
map pair
if not boundary
case
194 $mapper->add_map_coordinates
195 (
'cdna', $prev_cdna_start, $prev_cdna_end, $strand,
196 'genome', $prev_gen_start,$prev_gen_end);
199 $cdna_start = $prev_cdna_end + 1;
202 $gen_start = $prev_gen_end + 1;
204 $gen_end = $prev_gen_start - 1;
207 $cdna_end += $len_diff;
210 $cdna_start += $len_diff;
214 $gen_start -= $len_diff;
216 $gen_end += $len_diff;
220 $edit_shift += $len_diff;
225 my $pair_len = $cdna_end - $cdna_start + 1;
228 $mapper->add_map_coordinates(
'cdna', $cdna_start, $cdna_end, $strand,
229 'genome', $gen_start, $gen_end);
240 The start position in cdna coordinates
242 The end position in cdna coordinates
243 Example : @cdna_coords = $transcript_mapper->cdna2genomic($start, $end);
244 Description: Converts cdna coordinates to genomic coordinates. The
245 return value is a list of coordinates and gaps, where the coordinates
246 are relative to the sequence region that the
transcript used to construct
250 Exceptions :
throws if no start or end
258 my ($self, $start, $end, $include_original_region, $cdna_coding_start) = @_;
260 if( !defined $end ) {
261 throw(
"Must call with start/end");
264 $cdna_coding_start = defined $cdna_coding_start ? $cdna_coding_start : 1;
265 my $mapper = $self->{
'exon_coord_mapper'};
267 return $mapper->map_coordinates(
'cdna', $start, $end, 1,
"cdna", $include_original_region, $cdna_coding_start);
275 The start position in genomic coordinates
277 The end position in genomic coordinates
279 The strand of the genomic coordinates (
default value 1)
280 Example : @coords = $trans_mapper->genomic2cdna($start, $end, $strnd);
281 Description: Converts genomic coordinates to cdna coordinates. The
282 return value is a list of coordinates and gaps. Gaps
283 represent intronic or upstream/downstream regions which
do
284 not comprise
this transcripts cdna. Coordinate objects
285 represent genomic regions which
map to exons (utrs included).
286 Note: A poorly formed
Transcript will cause
this method to
290 Exceptions :
throws if start, end or strand not defined
297 my ($self, $start, $end, $strand) = @_;
299 unless(defined $start && defined $end && defined $strand) {
300 throw(
"start, end and strand arguments are required\n");
303 my $mapper = $self->{
'exon_coord_mapper'};
305 return $mapper->map_coordinates(
"genome", $start, $end, $strand,
"genomic");
313 start position in cds coords
315 end position in cds coords
316 Arg [3] boolean (0 or 1) $include_original_region
317 option to include original input coordinate region mappings in the result
318 Example : @genomic_coords = $transcript_mapper->cds2genomic(69, 306);
319 Description: Converts cds coordinates into genomic coordinates. Coordinates returned
320 are relative to sequence region that the
transcript used to construct
324 Exceptions :
throws if no end
331 my ( $self, $start, $end, $include_original_region ) = @_;
333 if ( !( defined($start) && defined($end) ) ) {
334 throw(
"Must call with start and end");
337 # Move start end into translate cDNA coordinates now.
338 $start = $start +( $self->{
'cdna_coding_start'} - 1 ) ;
339 $end = $end + ( $self->{
'cdna_coding_start'} - 1 );
341 #Check if the start exceeds the cdna_coding_end, if yes, return
342 if($start > $self->{
'cdna_coding_end'}){
346 #Check if the end exceeds the cdna_coding_end, if yes, truncate it otherwise we will be including gaps
347 if($end > $self->{
'cdna_coding_end'}){
348 $end = $self->{
'cdna_coding_end'};
351 return $self->cdna2genomic( $start, $end, $include_original_region, $self->{
'cdna_coding_start'} );
357 start position in peptide coords
359 end position in peptide coords
360 Example : @genomic_coords = $transcript_mapper->pep2genomic(23, 102);
361 Description: Converts peptide coordinates into genomic coordinates. The
362 coordinates returned are relative to the sequence region that the
363 transcript used to construct
this TranscriptMapper was on.
366 Exceptions :
throws if no end
373 my ( $self, $start, $end ) = @_;
375 if ( !( defined($start) && defined($end) ) ) {
376 throw(
"Must call with start and end");
379 # Take possible N-padding at beginning of CDS into account.
380 my $start_phase = $self->{
'start_phase'};
381 my $shift = ( $start_phase > 0 ) ? $start_phase : 0;
383 # Move start end into translate cDNA coordinates now.
384 $start = 3*$start - 2 + ( $self->{
'cdna_coding_start'} - 1 ) - $shift;
385 $end = 3*$end + ( $self->{
'cdna_coding_start'} - 1 ) - $shift;
387 return $self->cdna2genomic( $start, $end );
394 The genomic start position
396 The genomic end position
397 Arg [3] :
int $strand
399 Example : @cds_coords = $trans_mapper->genomic2cds($start, $end, $strand);
400 Description: Converts genomic coordinates into CDS coordinates of the
404 Exceptions :
throw if start, end or strand not defined
411 my ($self, $start, $end, $strand) = @_;
413 if(!defined($start) || !defined($end) || !defined($strand)) {
414 throw(
"start, end and strand arguments are required");
417 if($start > $end + 1) {
418 throw(
"start arg must be less than or equal to end arg + 1");
421 my $cdna_cstart = $self->{
'cdna_coding_start'};
422 my $cdna_cend = $self->{
'cdna_coding_end'};
424 #this is a pseudogene if there is no coding region
425 if(!defined($cdna_cstart)) {
426 #return a gap of the entire requested region, there is no CDS
430 my @coords = $self->genomic2cdna($start, $end, $strand);
434 foreach my $coord (@coords) {
435 if($coord->isa(
'Bio::EnsEMBL::Mapper::Gap')) {
438 my $start = $coord->
start;
439 my $end = $coord->end;
441 if($coord->strand == -1 || $end < $cdna_cstart || $start > $cdna_cend) {
442 #is all gap - does not map to peptide
445 #we know area is at least partially overlapping CDS
447 my $cds_start = $start - $cdna_cstart + 1;
448 my $cds_end = $end - $cdna_cstart + 1;
450 if($start < $cdna_cstart) {
451 #start of coordinates are in the 5prime UTR
454 #start is now relative to start of CDS
459 if($end > $cdna_cend) {
460 #end of coordinates are in the 3prime UTR
462 #adjust end to relative to CDS start
463 $cds_end = $cdna_cend - $cdna_cstart + 1;
466 #start and end are now entirely in CDS and relative to CDS start
467 $coord->
start($cds_start);
468 $coord->end($cds_end);
473 #push out the region which was in the 3prime utr
488 The start position in genomic coordinates
490 The end position in genomic coordinates
492 The strand of the genomic coordinates
493 Example : @pep_coords = $transcript->genomic2pep($start, $end, $strand);
494 Description: Converts genomic coordinates to peptide coordinates. The
495 return value is a list of coordinates and gaps.
498 Exceptions :
throw if start, end or strand not defined
505 my ($self, $start, $end, $strand) = @_;
507 unless(defined $start && defined $end && defined $strand) {
508 throw(
"start, end and strand arguments are required");
511 my @coords = $self->genomic2cds($start, $end, $strand);
515 my $start_phase = $self->{
'start_phase'};
517 #take into account possible N padding at beginning of CDS
518 my $shift = ($start_phase > 0) ? $start_phase : 0;
520 foreach my $coord (@coords) {
521 if($coord->isa(
'Bio::EnsEMBL::Mapper::Gap')) {
525 #start and end are now entirely in CDS and relative to CDS start
527 #convert to peptide coordinates
528 my $pep_start = int(($coord->start + $shift + 2) / 3);
529 my $pep_end = int(($coord->end + $shift + 2) / 3);
530 $coord->start($pep_start);
531 $coord->end($pep_end);