3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
37 # create a new SyntenyRegion from a source and a target gene
39 $source_gene->start, $source_gene->end,
40 $source_gene->strand, $source_gene->seq_region_name,
41 $target_gene->start, $target_gene->end,
42 $target_gene->strand, $target_gene->seq_region_name,
46 # merge with another SyntenyRegion
47 my $merged_sr = $sr->merge($sr1);
49 # score a gene pair against this SyntenyRegion
51 $sr->score_location_relationship( $source_gene1, $target_gene1 );
55 This
object represents a synteny between a source and a target location.
56 SyntenyRegions are built from mapped genes, and the their score is
57 defined as the score of the gene mapping. For merged SyntenyRegions,
66 source_seq_region_name
70 target_seq_region_name
74 score_location_relationship
79 package Bio::EnsEMBL::IdMapping::SyntenyRegion;
83 no warnings
'uninitialized';
90 Arg[1] : Arrayref $array_ref - the arrayref to bless into the
94 Description : Constructor. On instantiation, source and target regions are
95 reverse complemented so that source is always on forward strand.
106 my $array_ref = shift;
108 # reverse complement source and target so that source is always on forward
109 # strand; this will make merging and other comparison operations easier
111 if ($array_ref->[2] == -1) {
113 $array_ref->[6] = -1 * $array_ref->[6];
116 return bless $array_ref, $class;
122 Arg[1] : (optional) Int - source location start coordinate
123 Description : Getter/setter
for source location start coordinate.
134 $self->[0] = shift if (@_);
141 Arg[1] : (optional) Int - source location end coordinate
142 Description : Getter/setter
for source location end coordinate.
153 $self->[1] = shift if (@_);
160 Arg[1] : (optional) Int - source location strand
161 Description : Getter/setter
for source location strand.
172 $self->[2] = shift if (@_);
177 =head2 source_seq_region_name
179 Arg[1] : (optional) String - source location seq_region name
180 Description : Getter/setter
for source location seq_region name.
189 sub source_seq_region_name {
191 $self->[3] = shift if (@_);
198 Arg[1] : (optional) Int - target location start coordinate
199 Description : Getter/setter
for target location start coordinate.
210 $self->[4] = shift if (@_);
217 Arg[1] : (optional) Int - target location end coordinate
218 Description : Getter/setter
for target location end coordinate.
229 $self->[5] = shift if (@_);
236 Arg[1] : (optional) Int - target location strand
237 Description : Getter/setter
for target location strand.
248 $self->[6] = shift if (@_);
253 =head2 target_seq_region_name
255 Arg[1] : (optional) String - target location seq_region name
256 Description : Getter/setter
for target location seq_region name.
265 sub target_seq_region_name {
267 $self->[7] = shift if (@_);
274 Arg[1] : (optional) Float - score
275 Description : Getter/setter
for the score between source and target location.
286 $self->[8] = shift if (@_);
295 Example : $merged_sr = $sr->
merge($other_sr);
296 Description : Merges two overlapping SyntenyRegions
if they meet certain
297 criteria (see documentation in the code
for details). Score is
298 calculated as a combined distance score. If the two
299 SyntenyRegions aren
't mergeable, this method returns undef.
300 Return type : Bio::EnsEMBL::IdMapping::SyntenyRegion or undef
301 Exceptions : warns on bad scores
302 Caller : Bio::EnsEMBL::IdMapping::SyntenyFramework
309 my ($self, $sr) = @_;
311 # must be on same seq_region
312 if ($self->source_seq_region_name ne $sr->source_seq_region_name or
313 $self->target_seq_region_name ne $sr->target_seq_region_name) {
317 # target must be on same strand
318 return 0 unless ($self->target_strand == $sr->target_strand);
320 # find the distance of source and target pair and compare
321 my $source_dist = $sr->source_start - $self->source_start;
323 if ($self->target_strand == 1) {
324 $target_dist = $sr->target_start - $self->target_start;
326 $target_dist = $self->target_end - $sr->target_end;
329 # prevent division by zero error
330 if ($source_dist == 0 or $target_dist == 0) {
331 warn("WARNING: source_dist ($source_dist) and/or target_dist ($target_dist) is zero.\n");
335 # calculate a distance score
336 my $dist = $source_dist - $target_dist;
337 $dist = -$dist if ($dist < 0);
338 my $d1 = $dist/$source_dist;
339 $d1 = -$d1 if ($d1 < 0);
340 my $d2 = $dist/$target_dist;
341 $d2 = -$d2 if ($d2 < 0);
342 my $dist_score = 1 - $d1 - $d2;
344 # distance score must be more than 50%
345 return 0 if ($dist_score < 0.5);
347 my $new_score = $dist_score * ($sr->score + $self->score)/2;
349 if ($new_score > 1) {
350 warn("WARNING: Bad merge score: $new_score\n");
353 # extend SyntenyRegion to cover both sources and targets, set merged score
355 if ($sr->source_start < $self->source_start) {
356 $self->source_start($sr->source_start);
358 if ($sr->source_end > $self->source_end) {
359 $self->source_end($sr->source_end);
362 if ($sr->target_start < $self->target_start) {
363 $self->target_start($sr->target_start);
365 if ($sr->target_end > $self->target_end) {
366 $self->target_end($sr->target_end);
369 $self->score($new_score);
377 Arg[1] : Float $factor - stretching factor
378 Example : $stretched_sr = $sr->stretch(2);
379 Description : Extends this SyntenyRegion to span a $factor * $score more area.
380 Return type : Bio::EnsEMBL::IdMapping::SyntenyRegion
382 Caller : Bio::EnsEMBL::IdMapping::SyntenyFramework
389 my ($self, $factor) = @_;
391 my $source_adjust = int(($self->source_end - $self->source_start + 1) *
392 $factor * $self->score);
393 $self->source_start($self->source_start - $source_adjust);
394 $self->source_end($self->source_end + $source_adjust);
395 #warn sprintf(" sss %d %d %d\n", $source_adjust, $self->source_start,
396 # $self->source_end);
398 my $target_adjust = int(($self->target_end - $self->target_start + 1) *
399 $factor * $self->score);
400 $self->target_start($self->target_start - $target_adjust);
401 $self->target_end($self->target_end + $target_adjust);
407 =head2 score_location_relationship
409 Arg[1] : Bio::EnsEMBL::IdMapping::TinyGene $source_gene - source gene
410 Arg[2] : Bio::EnsEMBL::IdMapping::TinyGene $target_gene - target gene
411 Example : my $score = $sr->score_location_relationship($source_gene,
413 Description : This function calculates how well the given source location
414 interpolates on given target location inside this SyntenyRegion.
416 Scoring is done the following way: Source and target location
417 are normalized with respect to this Regions source and target.
418 Source range will then be somewhere close to 0.0-1.0 and target
419 range anything around that.
421 The extend of the covered area between source and target range
422 is a measurement of how well they agree (smaller extend is
423 better). The extend (actually 2*extend) is reduced by the size
424 of the regions. This will result in 0.0 if they overlap
425 perfectly and bigger values if they dont.
427 This is substracted from 1.0 to give the score. The score is
428 likely to be below zero, but is cut off at 0.0f.
430 Finally, the score is multiplied with the score of the synteny
433 Exceptions : warns if score out of range
434 Caller : Bio::EnsEMBL::IdMapping::SyntenyFramework
442 sub score_location_relationship {
443 my ($self, $source_gene, $target_gene) = @_;
445 # must be on same seq_region
446 if (($self->source_seq_region_name ne $source_gene->seq_region_name) or
447 ($self->target_seq_region_name ne $target_gene->seq_region_name)) {
451 # strand relationship must be the same (use logical XOR to find out)
452 if (($self->source_strand == $source_gene->strand) xor
453 ($self->target_strand == $target_gene->strand)) {
457 # normalise source location
458 my $source_rel_start = ($source_gene->start - $self->source_start) /
459 ($self->source_end - $self->source_start + 1);
461 my $source_rel_end = ($source_gene->end - $self->source_start + 1) /
462 ($self->source_end - $self->source_start + 1);
464 #warn " aaa ".$self->to_string."\n";
465 #warn sprintf(" bbb %.6f %.6f\n", $source_rel_start, $source_rel_end);
467 # cut off if the source location is completely outside
468 return 0 if ($source_rel_start > 1.1 or $source_rel_end < -0.1);
470 # normalise target location
471 my ($target_rel_start, $target_rel_end);
472 my $t_length = $self->target_end - $self->target_start + 1;
474 if ($self->target_strand == 1) {
476 $target_rel_start = ($target_gene->start - $self->target_start) / $t_length;
478 $target_rel_end = ($target_gene->end - $self->target_start + 1) / $t_length;
481 $target_rel_start = ($self->target_end - $target_gene->end) / $t_length;
482 $target_rel_end = ($self->target_end - $target_gene->start + 1) / $t_length;
485 my $added_range = (($target_rel_end > $source_rel_end) ? $target_rel_end :
487 (($target_rel_start < $source_rel_start) ? $target_rel_start :
490 my $score = $self->score * (1 - (2 * $added_range - $target_rel_end -
491 $source_rel_end + $target_rel_start + $source_rel_start));
493 #warn " ccc ".sprintf("%.6f:%.6f:%.6f:%.6f:%.6f\n", $added_range,
494 # $source_rel_start, $source_rel_end, $target_rel_start, $target_rel_end);
496 $score = 0 if ($score < 0);
500 warn "Out of range score ($score) for ".$source_gene->id.":".
501 $target_gene->id."\n";
510 Example : print LOG $sr->to_string, "\n";
511 Description : Returns a string representation of the SyntenyRegion object.
512 Useful for debugging and logging.
515 Caller : Bio::EnsEMBL::IdMapping::SyntenyFramework
523 return sprintf("%s:%s-%s:%s %s:%s-%s:%s %.6f",
524 $self->source_seq_region_name,
527 $self->source_strand,
528 $self->target_seq_region_name,
531 $self->target_strand,