2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
20 align_by_clone_identity.pl - create a whole genome alignment between two closely
21 related assemblies, step 1
25 align_by_clone_identity.pl [arguments]
29 --dbname, db_name=NAME database name NAME
30 --host, --dbhost, --db_host=HOST database host HOST
31 --port, --dbport, --db_port=PORT database port PORT
32 --user, --dbuser, --db_user=USER database username USER
33 --pass, --dbpass, --db_pass=PASS database passwort PASS
34 --assembly=ASSEMBLY assembly version ASSEMBLY
36 --altdbname=NAME alternative database NAME
37 --altassembly=ASSEMBLY alternative assembly version ASSEMBLY
41 --chromosomes, --chr=LIST only process LIST toplevel seq_regions
42 --skipclones=FILE read list of clones to skip from FILE
44 --conffile, --conf=FILE read parameters from FILE
45 (
default: conf/Conversion.ini)
47 --logfile, --log=FILE log to FILE (
default: *STDOUT)
48 --logpath=PATH write logfile to PATH (
default: .)
49 --logappend, --log_append append to logfile (
default: truncate)
51 -v, --verbose=0|1 verbose logging (
default:
false)
52 -i, --interactive=0|1
run script interactively (
default:
true)
53 -n, --dry_run, --dry=0|1 don
't write results to database
54 -h, --help, -? print help (this message)
58 This script is part of a series of scripts to create a mapping between two
59 assemblies. It assembles the toplevel coordinate systems of two different
60 assemblies of a genome by creating a whole genome alignment between the two.
62 The process assumes that the two assemblies are reasonably similar, i.e. there
63 are no major rearrangements or clones moved from one toplevel seq_region to
66 See "Related scripts" below for an overview of the whole process.
68 This particular script creates a whole genome alignment between two closely
69 related assemblies. You will need a database containing the reference assembly
70 and the alternative toplevel seq_regions which can be created using
71 load_alternative_assembly.pl.
73 The alignment is created in two steps:
75 1. Match clones with same name and version directly and create alignment
76 blocks for these regions. Clones can be tagged manually to be excluded
77 from these direct matches by listing them in a file of clones to skip
78 (--skipclones argument). This can be useful to get better results in
79 regions with major assembly differences.
81 The result is stored in the assembly table as an assembly between the
82 toplevel seq_regions of both genome assemblies.
84 2. Store non-aligned blocks in a temporary table (tmp_align). They can
85 later be aligned using blastz by align_nonident_regions.pl.
89 The whole process of creating a whole genome alignment between two assemblies
90 is done by a series of scripts. Please see
92 ensembl/misc-scripts/assembly/README
94 for a high-level description of this process, and POD in the individual scripts
100 Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
104 Please post comments/questions to the Ensembl development list
105 <http://lists.ensembl.org/mailman/listinfo/dev>
111 no warnings 'uninitialized
';
113 use FindBin qw($Bin);
114 use vars qw($SERVERROOT);
117 $SERVERROOT = "$Bin/../../..";
118 unshift(@INC, "$SERVERROOT/ensembl/modules");
119 unshift(@INC, "$SERVERROOT/bioperl-live");
124 use Bio::EnsEMBL::Utils::ConversionSupport;
125 use Bio::EnsEMBL::Attribute;
129 my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT);
132 $support->parse_common_options(@_);
133 $support->parse_extra_options(
141 'chromosomes|chr=s@
',
142 'skipclones|skip_clones=s
',
144 $support->allowed_params(
145 $support->get_common_params,
157 if ($support->param('help
') or $support->error) {
158 warn $support->error if $support->error;
162 $support->comma_to_list('chromosomes
');
164 # ask user to confirm parameters to proceed
165 $support->confirm_params;
167 # get log filehandle and print heading and parameters to logfile
170 $support->check_required_params(
176 # suggest to run non-verbosely
177 my $txt = qq(Running this script with the --verbose option will create a lot of output.
178 It is recommended to do this only for debug purposes.
179 Shall I switch to non-verbose logging for you?);
181 if ($support->param('verbose
') and $support->user_proceed($txt)) {
182 $support->param('verbose
', 0);
186 # connect to database and get adaptors
188 my ($dba, $dbh, $sql, $sth);
190 # first set connection parameters for alternative db if not different from
192 map { $support->param("alt$_", $support->param($_)) unless ($support->param("alt$_")) } qw(host port user);
195 my $R_dba = $support->get_database('ensembl
');
196 my $R_dbh = $R_dba->dbc->db_handle;
197 my $R_sa = $R_dba->get_SliceAdaptor;
199 # database containing the alternative assembly
200 my $A_dba = $support->get_database('core
', 'alt
');
201 my $A_sa = $A_dba->get_SliceAdaptor;
204 # create temporary table for storing non-aligned blocks
206 unless ($support->param('dry_run
')) {
208 CREATE TABLE IF NOT EXISTS tmp_align (
209 tmp_align_id int(10) unsigned NOT NULL auto_increment,
210 alt_seq_region_name varchar(20) NOT NULL,
211 alt_start int(10) UNSIGNED NOT NULL,
212 alt_end int(10) UNSIGNED NOT NULL,
213 ref_seq_region_name varchar(20) NOT NULL,
214 ref_start int(10) UNSIGNED NOT NULL,
215 ref_end int(10) UNSIGNED NOT NULL,
217 PRIMARY KEY (tmp_align_id)
221 # clear tmp_align table of entries from previous runs
222 $R_dbh->do(qq(DELETE FROM tmp_align));
226 # get reference and alternative toplevel seq_regions
228 my $R_chrlength = $support->get_chrlength($R_dba, $support->param('assembly
'), 'toplevel
');
229 my $A_chrlength = $support->get_chrlength($R_dba, $support->param('altassembly
'), 'toplevel
');
232 # read list of clones to skip from file
234 $support->log("Reading list of clones to skip from file...\n");
236 if ($support->param('skipclones
')) {
237 my $infh = $support->filehandle('<
', $support->param('skipclones
'));
243 $support->log("Done.\n\n");
246 # loop over toplevel seq_regions
248 $support->log_stamped("Looping over toplevel seq_regions...\n");
255 my $fmt1 = "%-40s%10.0f\n";
256 my $fmt2 = "%-40s%9.2f%%\n";
257 my $fmt3 = "%-12s%-12s%-12s%-12s%-12s%-9s\n";
258 my $fmt4 = "%10.0f %10.0f %7.0f %10.0f %10.0f %7.0f\n";
259 my $fmt5 = "%-40s%10s\n";
260 my $fmt6 = "%-10s%-12s%-10s%-12s\n";
262 my $sth1 = $R_dbh->prepare(qq(
263 INSERT IGNORE INTO assembly (asm_seq_region_id, cmp_seq_region_id,
264 asm_start, asm_end, cmp_start, cmp_end, ori)
265 VALUES (?, ?, ?, ?, ?, ?, 1)
267 my $sth2 = $R_dbh->prepare(qq(
268 INSERT INTO tmp_align values(NULL, ?, ?, ?, ?, ?, ?
271 foreach my $R_chr ($support->sort_chromosomes($R_chrlength)) {
273 $support->log_stamped("Toplevel seq_region $R_chr...\n", 1);
277 # fetch toplevel seq_region slices
278 my $R_slice = $R_sa->fetch_by_region('toplevel
', $R_chr, undef, undef, undef, $support->param('assembly
'));
279 my $A_slice = $A_sa->fetch_by_region('toplevel
', $A_chr, undef, undef, undef, $support->param('altassembly
'));
281 unless ($R_slice and $A_slice) {
282 $support->log("Seq_region not found in either ref or alt assembly. Skipping.\n", 2);
286 # we need to fetch the alternative slice from the reference db explicitely by
287 # coord_system, since toplevel attribute is not set there
288 my $cs_name = $A_slice->coord_system_name;
289 my $A_slice_ref = $R_sa->fetch_by_region($cs_name, $A_chr, undef, undef, undef, $support->param('altassembly
'));
291 # project to contigs (not to clones because for WGS assembly regions there
293 my @R_contigs = @{ $R_slice->project('contig
') };
294 my @A_contigs = @{ $A_slice->project('contig
') };
296 # loop over alternative clontigs
303 foreach my $A_seg (@A_contigs) {
305 my $A_contig = $A_seg->to_Slice;
307 # project contig to clone for clone name comparison
308 my @A_clones = @{ $A_contig->project('clone
') };
311 $A_clone = $A_clones[0]->to_Slice if (@A_clones);
313 $support->log_verbose("Alternative contig ($j) ".$A_contig->seq_region_name.":".$A_contig->start."-".$A_contig->end.":".$A_contig->strand." $A_chr:".$A_seg->from_start."-".$A_seg->from_end."\n", 2);
315 # walk reference contigs
317 for (my $i = $last; $i < scalar(@R_contigs); $i++) {
319 my $R_contig = $R_contigs[$i]->to_Slice;
321 # project contig to clone for clone name comparison
322 my @R_clones = @{ $R_contig->project('clone
') };
325 $R_clone = $R_clones[0]->to_Slice if (@R_clones);
327 # same clone name.version and contig and clone strand found
328 if ($A_clone and $R_clone and
329 $A_clone->seq_region_name eq $R_clone->seq_region_name and
330 $A_clone->strand == $R_clone->strand and
331 $A_contig->strand == $R_contig->strand) {
333 # same clone start/end -> identical assembly
334 if ($A_clone->start == $R_clone->start and $A_clone->end == $R_clone->end) {
335 # check if clone is tagged to be skipped
336 # this can be used to resolve some odd assembly differences
337 if ($skip{$A_clone->seq_region_name}) {
339 $support->log_verbose("Skipping matching reference clone ($i)".
340 $R_clone->seq_region_name.":".$R_clone->start."-".
341 $R_clone->end.":".$R_clone->strand."$R_chr:".
342 $R_contigs[$i]->from_start."-".$R_contigs[$i]->from_end.
345 &found_nomatch($R_chr, $A_chr, $match, $nomatch, $A_seg,
346 $last_A_seg, $R_contigs[$i], $R_contigs[$i-1],
350 $stats_chr{'skipped
'}++;
355 $support->log_verbose("Found matching reference clone ($i)".
356 $R_clone->seq_region_name.":".$R_clone->start."-".
357 $R_clone->end.":".$R_clone->strand."$R_chr:".
358 $R_contigs[$i]->from_start."-".$R_contigs[$i]->from_end.
361 &found_match($R_chr, $A_chr, $match, $nomatch, $A_seg,
362 $last_A_seg, $R_contigs[$i], $R_contigs[$i-1],
366 $stats_chr{'identical
'}++;
373 $support->log_verbose("Start/end mismatch for clone ($i) ".$R_contig->seq_region_name.":".$R_contig->start."-".$R_contig->end.":".$R_contig->strand." $R_chr:".$R_contigs[$i]->from_start."-".$R_contigs[$i]->from_end."\n", 2);
376 $R_chr, $A_chr, $match, $nomatch, $A_seg, $last_A_seg,
377 $R_contigs[$i], $R_contigs[$i-1], $match_flag, $i, $j
380 $stats_chr{'mismatch
'}++;
387 # different clones or no clones found
390 $support->log_verbose("Skipping clone ($i)".$R_contig->seq_region_name.":".$R_contig->start."-".$R_contig->end.":".$R_contig->strand." $R_chr:".$R_contigs[$i]->from_start."-".$R_contigs[$i]->from_end."\n", 2);
392 &found_nomatch($R_chr, $A_chr, $match, $nomatch, $A_seg, $last_A_seg, $R_contigs[$i], $R_contigs[$i-1], $match_flag, $i, $j);
399 $last_A_seg = $A_seg;
403 # adjust the final clone count
406 # last clone was a match, adjust matching clone count
407 if ($match->{$R_chr}) {
409 my $c = scalar(@{ $match->{$R_chr} }) - 1;
410 $match->{$R_chr}->[$c]->[2] = scalar(@A_contigs) - $match->{$R_chr}->[$c]->[2];
411 $match->{$R_chr}->[$c]->[5] = scalar(@R_contigs) - $match->{$R_chr}->[$c]->[5];
417 # last clone was a non-match, adjust non-matching clone count
418 if ($nomatch->{$R_chr}) {
420 my $c = scalar(@{ $nomatch->{$R_chr} }) - 1;
421 $nomatch->{$R_chr}->[$c]->[2] = scalar(@A_contigs) - $nomatch->{$R_chr}->[$c]->[2];
422 $nomatch->{$R_chr}->[$c]->[5] = scalar(@R_contigs) - $nomatch->{$R_chr}->[$c]->[5];
428 # filter single assembly inserts from non-aligned blocks (i.e. cases where
429 # a block has clones only in one assembly, not in the other) - there is
430 # nothing to be done with them
431 @{ $nomatch->{$R_chr} } = grep { $_->[2] > 0 and $_->[5] > 0 }
432 @{ $nomatch->{$R_chr} } if ($nomatch->{$R_chr});
434 # store directly aligned blocks in assembly table
435 unless ($support->param('dry_run
')) {
437 $support->log("Adding assembly entries for directly aligned blocks...\n", 1);
440 for ($c = 0; $c < scalar(@{ $match->{$R_chr} || [] }); $c++) {
442 $R_sa->get_seq_region_id($R_slice),
443 $R_sa->get_seq_region_id($A_slice_ref),
444 $match->{$R_chr}->[$c]->[3],
445 $match->{$R_chr}->[$c]->[4],
446 $match->{$R_chr}->[$c]->[0],
447 $match->{$R_chr}->[$c]->[1]
451 $support->log("Done inserting $c entries.\n", 1);
454 # store non-aligned blocks in tmp_align table
455 unless ($support->param('dry_run
')) {
457 if ($nomatch->{$R_chr}) {
459 $support->log("Storing non-aligned blocks in tmp_align table...\n", 1);
462 for ($c = 0; $c < scalar(@{ $nomatch->{$R_chr} }); $c++) {
464 $nomatch->{$R_chr}->[$c]->[6],
465 $nomatch->{$R_chr}->[$c]->[0],
466 $nomatch->{$R_chr}->[$c]->[1],
468 $nomatch->{$R_chr}->[$c]->[3],
469 $nomatch->{$R_chr}->[$c]->[4],
473 $support->log("Done inserting $c entries.\n", 1);
477 # stats for this toplevel seq_region
478 $stats_chr{'A_only
'} = scalar(@A_contigs) - $stats_chr{'identical
'} - $stats_chr{'mismatch
'};
479 $stats_chr{'R_only
'} = scalar(@R_contigs) - $stats_chr{'identical
'} - $stats_chr{'mismatch
'};
480 for (my $c = 0; $c < scalar(@{ $match->{$R_chr} || [] }); $c++) {
481 $stats_chr{'A_matchlength
'} += $match->{$R_chr}->[$c]->[1] - $match->{$R_chr}->[$c]->[0];
482 $stats_chr{'R_matchlength
'} += $match->{$R_chr}->[$c]->[4] - $match->{$R_chr}->[$c]->[3];
484 $stats_chr{'A_coverage
'} = 100 * $stats_chr{'A_matchlength
'} / $A_slice->length;
485 $stats_chr{'R_coverage
'} = 100 * $stats_chr{'R_matchlength
'} / $R_slice->length;
486 map { $stats_total{$_} += $stats_chr{$_} } keys %stats_chr;
488 $support->log("\nStats for toplevel seq_region $R_chr:\n\n", 1);
489 $support->log(sprintf($fmt5, "Alternative toplevel seq_region name:", $A_chr), 2);
490 $support->log(sprintf($fmt1, "Length (alternative):", $A_slice->length), 2);
491 $support->log(sprintf($fmt1, "Length (reference):", $R_slice->length), 2);
492 $support->log(sprintf($fmt1, "Identical clones:", $stats_chr{'identical
'}), 2);
493 $support->log(sprintf($fmt1, "Identical clones that were skipped:", $stats_chr{'skipped
'}), 2);
494 $support->log(sprintf($fmt1, "Clones with start/end mismatch:", $stats_chr{'mismatch
'}), 2);
495 $support->log(sprintf($fmt1, "Clones only in alternative assembly:", $stats_chr{'A_only
'}), 2);
496 $support->log(sprintf($fmt1, "Clones only in refernce assembly:", $stats_chr{'R_only
'}), 2);
497 $support->log(sprintf($fmt2, "Direct match coverage (alternative):", $stats_chr{'A_coverage
'}), 2);
498 $support->log(sprintf($fmt2, "Direct match coverage (reference):", $stats_chr{'R_coverage
'}), 2);
501 if ($match->{$R_chr}) {
503 $support->log("\nDirectly aligned blocks:\n\n", 1);
504 $support->log(sprintf($fmt3, qw(ALT_START ALT_END ALT_CLONES REF_START REF_END REF_CLONES)), 2);
505 $support->log(('-
'x71)."\n", 2);
507 for (my $c = 0; $c < scalar(@{ $match->{$R_chr} }); $c++) {
509 $support->log(sprintf($fmt4, @{ $match->{$R_chr}->[$c] }), 2);
511 # sanity check: aligned region pairs must have same length
512 my $e_len = $match->{$R_chr}->[$c]->[1] - $match->{$R_chr}->[$c]->[0] + 1;
513 my $v_len = $match->{$R_chr}->[$c]->[4] - $match->{$R_chr}->[$c]->[3] + 1;
515 $support->log_warning("Length mismatch: $e_len <> $v_len\n", 2) unless ($e_len == $v_len);
521 if ($nomatch->{$R_chr}) {
523 $support->log("\nNon-aligned blocks:\n\n", 1);
524 $support->log(sprintf($fmt3, qw(ALT_START ALT_END ALT_CLONES REF_START REF_END REF_CLONES)), 2);
525 $support->log(('-
'x71)."\n", 2);
527 for (my $c = 0; $c < scalar(@{ $nomatch->{$R_chr} }); $c++) {
529 $support->log(sprintf($fmt4, @{ $nomatch->{$R_chr}->[$c] }), 2);
531 # find longest non-aligned block
532 my $A_length = $nomatch->{$R_chr}->[$c]->[1] - $nomatch->{$R_chr}->[$c]->[0] + 1;
533 my $R_length = $nomatch->{$R_chr}->[$c]->[4] - $nomatch->{$R_chr}->[$c]->[3] + 1;
534 push @block_length, [$A_chr, $A_length, $R_chr, $R_length];
539 $support->log_stamped("\nDone with toplevel seq_region $R_chr.\n", 1);
543 $support->log("\nOverall stats:\n");
544 $support->log(sprintf($fmt1, "Identical clones:", $stats_total{'identical
'}), 1);
545 $support->log(sprintf($fmt1, "Identical clones that were skipped:", $stats_total{'skipped
'}), 1);
546 $support->log(sprintf($fmt1, "Clones with start/end mismatch:", $stats_total{'mismatch
'}), 1);
547 $support->log(sprintf($fmt1, "Clones only in alternative assembly:", $stats_total{'A_only
'}), 1);
548 $support->log(sprintf($fmt1, "Clones only in reference assembly:", $stats_total{'R_only
'}), 1);
550 $support->log("\nNon-match block lengths:\n");
551 $support->log(sprintf($fmt6, qw(ALT_CHR ALT_LENGTH REF_CHR REF_LENGTH)), 1);
552 $support->log(('-
'x42)."\n", 1);
553 foreach my $block (sort { $a->[1] <=> $b->[1] } @block_length) {
554 $support->log(sprintf("%-10s%10.0f %-10s%10.0f\n", @{ $block }), 1);
557 $support->log_stamped("\nDone.\n");
560 $support->finish_log;
568 Arg[1] : String $R_chr - reference toplevel seq_region name
569 Arg[2] : String $A_chr - alternative toplevel seq_region name
570 Arg[3] : Hashref $match - datastructure to store aligned blocks
571 Arg[4] : Hashref $nomatch - datastructure to store non-aligned blocks
572 Arg[5] : Bio::EnsEMBL::ProjectionSegment $A_seg - current alternative
574 Arg[6] : Bio::EnsEMBL::ProjectionSegment $last_A_seg - last alternative
576 Arg[7] : Bio::EnsEMBL::ProjectionSegment $R_seg - current reference
578 Arg[8] : Bio::EnsEMBL::ProjectionSegment $last_R_seg - last reference
580 Arg[9] : Boolean $match_flag - flag indicating if last clone was a match
581 Arg[10] : Int $i - reference clone count
582 Arg[11] : Int $j - alternative clone count
583 Description : This function is called when two clones match (i.e. have the
584 same name.version in both assemblies). Depending on the state
585 of the last clone (match or nomatch), it extends aligned blocks
586 or finishes the non-aligned block and creates a new aligned
595 my ($R_chr, $A_chr, $match, $nomatch, $A_seg, $last_A_seg, $R_seg, $last_R_seg, $match_flag, $i, $j) = @_;
597 # last clone was a match
600 # adjust align block end
601 if ($match->{$R_chr}) {
603 my $c = scalar(@{ $match->{$R_chr} }) - 1;
605 # if the gaps between this clone and the last are different, start
607 if (($A_seg->from_start - $match->{$R_chr}->[$c]->[1]) !=
608 ($R_seg->from_start - $match->{$R_chr}->[$c]->[4])) {
610 $support->log("Gap size mismatch at A:$A_chr:".$match->{$R_chr}->[$c]->[1].'-
'.$A_seg->from_start.", R:$R_chr:".$match->{$R_chr}->[$c]->[4].'-
'.$R_seg->from_start."\n", 2);
612 # finish the last align block
613 $match->{$R_chr}->[$c]->[1] = $last_A_seg->from_end;
614 $match->{$R_chr}->[$c]->[2] = $j - $match->{$R_chr}->[$c]->[2];
615 $match->{$R_chr}->[$c]->[4] = $last_R_seg->from_end;
616 $match->{$R_chr}->[$c]->[5] = $i - $match->{$R_chr}->[$c]->[5];
618 # start a new align block
619 push @{ $match->{$R_chr} }, [
629 # adjust align block end
631 $match->{$R_chr}->[$c]->[1] = $A_seg->from_end;
632 $match->{$R_chr}->[$c]->[4] = $R_seg->from_end;
636 # last clone was a non-match
639 # start a new align block
640 push @{ $match->{$R_chr} }, [
650 # finish the last non-align block
651 if ($nomatch->{$R_chr} and $last_A_seg) {
652 my $c = scalar(@{ $nomatch->{$R_chr} }) - 1;
653 $nomatch->{$R_chr}->[$c]->[1] = $last_A_seg->from_end;
654 $nomatch->{$R_chr}->[$c]->[2] = $j - $nomatch->{$R_chr}->[$c]->[2];
655 $nomatch->{$R_chr}->[$c]->[4] = $last_R_seg->from_end;
656 $nomatch->{$R_chr}->[$c]->[5] = $i - $nomatch->{$R_chr}->[$c]->[5];
663 Arg[1] : String $R_chr - reference toplevel seq_region name
664 Arg[2] : String $A_chr - alternative toplevel seq_region name
665 Arg[3] : Hashref $match - datastructure to store aligned blocks
666 Arg[4] : Hashref $nomatch - datastructure to store non-aligned blocks
667 Arg[5] : Bio::EnsEMBL::ProjectionSegment $A_seg - current alternative
669 Arg[6] : Bio::EnsEMBL::ProjectionSegment $last_A_seg - last alternative
671 Arg[7] : Bio::EnsEMBL::ProjectionSegment $R_seg - current reference
673 Arg[8] : Bio::EnsEMBL::ProjectionSegment $last_R_seg - last reference
675 Arg[9] : Boolean $match_flag - flag indicating if last clone was a match
676 Arg[10] : Int $i - reference clone count
677 Arg[11] : Int $j - alternative clone count
678 Description : This function is called when two clones don't match (either
679 different name.version or length mismatch in the two
680 assemblies). Depending on the state of the last clone (nomatch
681 or match), it extends non-aligned blocks or finishes the
682 aligned block and creates a
new non-aligned block.
690 my ($R_chr, $A_chr, $match, $nomatch, $A_seg, $last_A_seg, $R_seg, $last_R_seg, $match_flag, $i, $j) = @_;
692 # last clone was a match
695 # start a new non-align block
696 push @{ $nomatch->{$R_chr} }, [
706 # finish the last align block
707 if ($match->{$R_chr}) {
708 my $c = scalar(@{ $match->{$R_chr} }) - 1;
709 $match->{$R_chr}->[$c]->[1] = $last_A_seg->from_end;
710 $match->{$R_chr}->[$c]->[2] = $j - $match->{$R_chr}->[$c]->[2];
711 $match->{$R_chr}->[$c]->[4] = $last_R_seg->from_end;
712 $match->{$R_chr}->[$c]->[5] = $i - $match->{$R_chr}->[$c]->[5];
715 # last clone was a non-match
718 # adjust non-align block end
719 if ($nomatch->{$R_chr}) {
720 my $c = scalar(@{ $nomatch->{$R_chr} }) - 1;
721 $nomatch->{$R_chr}->[$c]->[1] = $A_seg->from_end;
722 $nomatch->{$R_chr}->[$c]->[4] = $R_seg->from_end;
724 # we're at the beginning of a seq_region, so start a new non-align block
726 push @{ $nomatch->{$R_chr} }, [