my ($assembly_mappings) = @_;
my @chain_mappings;
my ($t_name, $t_size, $t_strand, $t_start, $t_end);
my ($q_name, $q_size, $q_strand, $q_start, $q_end);
my $chain_id = 1;
my @chain_gaps;
my $length = scalar(@{$assembly_mappings});
for (my $i = 0; $i < $length; $i++) {
my $current = $assembly_mappings->[$i];
my $next = ($i+1 != $length) ? $assembly_mappings->[$i+1] : undef;
my $ori = $current->{ori};
my ($asm_diff, $cmp_diff);
if($next) {
$asm_diff = ($next->{asm_start} - $current->{asm_end})-1;
# Rev strands means the next cmp region has a lower start than the
# current end (because it's running in reverse). Rember length in 1-based
# coords always is (high-low)-1
$cmp_diff = ($ori == 1) ? ($next->{cmp_start} - $current->{cmp_end})-1 : ($current->{cmp_start} - $next->{cmp_end})-1;
}
if(! $t_name) {
# Reset variables to current
@chain_gaps = ();
$chain_id++;
($t_name, $t_size, $t_strand, $t_start) = ($current->{asm_name}, $current->{asm_length}, 1, $current->{asm_start});
($q_name, $q_size, $q_strand) = ($current->{cmp_name}, $current->{cmp_length}, $current->{ori});
$q_start = ($ori == 1) ? $current->{cmp_start} : $current->{cmp_end};
}
# Block that decides we need to start a new chain definition
#
# Can mean we have run out of mappings, we are into a new chromsome (both source and target)
# or strand has swapped.
# Final reason is we've had an out-of-order meaning a negative gap was produced
# (we're going backwards). In any situation this means the chain is finished
if( ! defined $next ||
$t_name ne $next->{asm_name} ||
$q_name ne $next->{cmp_name} || # we can switch target chromosomes. e.g. cross chromsome mappings in mouse NCBI37->GRCm38
$ori != $next->{ori} ||
$asm_diff < 0 ||
$cmp_diff < 0) {
# Add the last gap on which is just the length of this alignment
push(@chain_gaps, [$current->{length}]);
# Set the ends of the chain since this is the last block
$t_end = $current->{asm_end};
$q_end = ($ori == 1) ? $current->{cmp_end} : $current->{cmp_start};
#If strand was negative we need to represent all data as reverse complemented regions
if($q_strand == -1) {
# $t_start = ($t_size - $t_start)+1;
# $t_end = ($t_size - $t_end)+1;
$q_start = ($q_size - $q_start)+1;
$q_end = ($q_size - $q_end)+1;
}
# Convert to UCSC formats (0-based half-open intervals and +/- strands)
$t_start--;
$q_start--;
$t_strand = ($t_strand == 1) ? '+' : '-';
$q_strand = ($q_strand == 1) ? '+' : '-';
#Store the chain
my $chain_score = 1;
push(@chain_mappings, {
header => ['chain', $chain_score, $t_name, $t_size, $t_strand, $t_start, $t_end, $q_name, $q_size, $q_strand, $q_start, $q_end, $chain_id],
gaps => [@chain_gaps]
});
if(! defined $next) {
last;
}
# Clear variables
($t_name, $t_size, $t_strand, $t_start) = ();
($q_name, $q_size, $q_strand, $q_start) = ();
}
push(@chain_gaps, [$current->{length}, $asm_diff, $cmp_diff]);
}
return \@chain_mappings;
}