3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 # Convert release 18/19-era schemas to use new non-clone/contig schema
27 package SeqStoreConverter::BasicConverter;
29 ###############################################################################
31 ###############################################################################
34 my ( $class, $user, $pass, $host, $source, $target, $schema, $vega_schema, $force, $verbose, $limit ) = @_;
36 my $self = bless {}, $class;
39 ($host, $port) = split(/:/, $host);
42 my $dbh = DBI->connect(
"DBI:mysql:host=$host:port=$port", $user, $pass,
46 $self->verbose( $verbose );
48 $self->force( $force );
49 $self->source( $source );
50 $self->target( $target );
51 $self->schema( $schema );
52 $self->vegaschema( $vega_schema);
54 $self->password( $pass);
60 #check to see if the destination and source databases exist already.
61 my %dbs =
map {$_->[0] => 1} @{$dbh->selectall_arrayref(
'show databases')};
62 if( !$dbs{$source} ) {
63 die (
"Source db $source does not exist" );
68 $dbh->do(
"drop database $target" );
70 die(
"Target db $target already exists. Use -force option to overwrite.");
74 $dbh->do(
"create database ".$self->target() );
76 $self->debug(
"Building schema for $target from $schema");
77 die
"Cannot open $schema" if (! -e $schema);
78 my $cmd =
"/usr/local/mysql/bin/mysql -u $user -p$pass -P $port -h $host $target < $schema";
82 $self->debug(
"Adding vega tables for $target");
83 die
"Cannot open vega creation script" if (! -e $vega_schema);
84 my $cmd =
"/usr/local/mysql/bin/mysql -u $user -p$pass -P $port -h $host $target < $vega_schema";
88 $self->debug(
"Creating temporary tables");
89 #create a temporary table to store the mapping of old ids to new ids
91 (
"CREATE TEMPORARY TABLE $target.tmp_cln_map (" .
92 "old_id INT, new_id INT, INDEX new_idx (new_id))");
94 #create a temp table which will store the mapping of old chromosome
95 #identifiers to new identifiers
96 $dbh->do(
"CREATE TEMPORARY TABLE $target.tmp_chr_map (" .
97 " old_id INT, new_id INT,".
98 " INDEX new_idx (new_id))");
100 #create a temporary table to hold old supercontig name -> new id mappings
101 $dbh->do(
"CREATE TEMPORARY TABLE $target.tmp_superctg_map (" .
102 "name VARCHAR(255), new_id INT, ".
103 "INDEX new_idx (new_id))");
109 ###############################################################################
110 # Getter/Setters for converter properties
111 ###############################################################################
115 $self->{
'force'} = shift
if(@_);
116 return $self->{
'force'};
122 $self->{
'dbh'} = shift
if(@_);
123 return $self->{
'dbh'};
128 $self->{
'user'} = shift
if(@_);
129 return $self->{
'user'};
133 $self->{
'host'} = shift
if(@_);
134 return $self->{
'host'};
139 $self->{
'port'} = shift
if(@_);
140 return $self->{
'port'};
145 $self->{
'password'} = shift
if(@_);
146 return $self->{
'password'};
152 $self->{
'verbose'} = shift
if (@_);
153 return $self->{
'verbose'};
158 $self->{
'schema'} = shift
if (@_);
159 return $self->{
'schema'};
164 $self->{
'vega_schema'} = shift
if (@_);
165 return $self->{
'vega_schema'};
170 $self->{
'source'} = shift
if(@_);
171 return $self->{
'source'};
177 $self->{
'target'} = shift
if(@_);
178 return $self->{
'target'};
183 $self->{
'limit'} = shift
if(@_);
184 return $self->{
'limit'};
188 ###############################################################################
190 ###############################################################################
195 print STDERR $str .
"\n" if $self->verbose();
200 my ($self, @tables) = @_;
202 foreach my $table (@tables) {
203 $self->debug(
"Copying $table");
205 my $source = $self->source();
206 my $target = $self->target();
209 my $sth = $self->dbh()->prepare
210 (
"INSERT INTO $target.$table SELECT * FROM $source.$table");
216 warn(
"Copy of table $table failed: $@\n");
223 sub get_coord_system_id {
226 my $cs_version = shift;
228 my $target = $self->target();
230 my @bind_vals = ($cs_name);
231 my $sql =
"SELECT cs.coord_system_id " .
232 "FROM $target.coord_system cs " .
236 push(@bind_vals, $cs_version);
237 $sql .=
" AND cs.version = ?";
240 my $sth = $self->dbh()->prepare($sql);
241 $sth->execute(@bind_vals);
243 if($sth->rows() != 1) {
244 die(
"Id for non-existant or ambiguous coord system requested " .
245 "$cs_name:$cs_version");
248 my ($id) = $sth->fetchrow_array();
255 sub get_default_assembly {
258 my $source = $self->source();
260 my $sth = $self->dbh->prepare
261 (
"SELECT meta_value FROM $source.meta WHERE meta_key='assembly.default'");
264 if(!$sth->rows() == 1) {
265 die(
"This species has an ambiguous or non-existant assembly.default" .
266 " in the meta table");
269 my ($result) = $sth->fetchrow_array();
276 sub contig_to_seq_region {
278 my $target_cs_name = shift;
280 my $target = $self->target();
281 my $source = $self->source();
282 my $dbh = $self->dbh();
284 $target_cs_name ||=
'contig';
286 $self->debug(
"Transforming contigs into $target_cs_name seq_regions");
288 my $cs_id = $self->get_coord_system_id($target_cs_name);
290 my $sth = $dbh->prepare
291 (
"INSERT INTO $target.seq_region " .
292 "SELECT contig_id, name, $cs_id, length FROM $source.contig");
301 sub clone_to_seq_region {
303 my $target_cs_name = shift;
305 my $target = $self->target();
306 my $source = $self->source();
307 my $dbh = $self->dbh();
309 # target coord_system will have a different ID
310 $target_cs_name ||=
"clone";
311 my $cs_id = $self->get_coord_system_id($target_cs_name);
313 $self->debug(
"Transforming clones into $target_cs_name seq_regions");
315 my $select_sth = $dbh->prepare
316 (
"SELECT cl.clone_id,
317 CONCAT(cl.embl_acc, '.', cl.embl_version),
318 MAX(ctg.embl_offset+ctg.length-1)
319 FROM $source.clone cl, $source.contig ctg
320 WHERE cl.clone_id = ctg.clone_id GROUP BY ctg.clone_id");
321 $select_sth->execute();
323 my ($clone_id, $embl_acc, $length);
324 $select_sth->bind_columns(\$clone_id, \$embl_acc, \$length);
326 my $insert_sth = $dbh->prepare
327 (
"INSERT INTO $target.seq_region (name, coord_system_id, length) " .
330 my $tmp_insert_sth = $dbh->prepare
331 (
"INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?, ?)");
333 while ($select_sth->fetch()) {
334 $insert_sth->execute(
"$embl_acc", $cs_id, $length);
336 #store mapping of old -> new ids in temp table
337 $tmp_insert_sth->execute($clone_id, $insert_sth->{
'mysql_insertid'});
340 $select_sth->finish();
341 $insert_sth->finish();
342 $tmp_insert_sth->finish();
349 sub chromosome_to_seq_region {
351 my $target_cs_name = shift;
353 my $target = $self->target();
354 my $source = $self->source();
355 my $dbh = $self->dbh();
357 $target_cs_name ||=
"chromosome";
358 my $cs_id = $self->get_coord_system_id($target_cs_name);
360 $self->debug(
"Transforming chromosomes into $target_cs_name seq_regions");
363 my $select_sth = $dbh->prepare
364 (
"SELECT chromosome_id, name, length FROM $source.chromosome");
366 my $insert_sth = $dbh->prepare
367 (
"INSERT INTO $target.seq_region (name, coord_system_id, length) " .
370 my $tmp_insert_sth = $dbh->prepare
371 (
"INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)");
373 $select_sth->execute();
375 my ($chrom_id, $name, $length);
376 $select_sth->bind_columns(\$chrom_id, \$name, \$length);
378 while ($select_sth->fetch()) {
379 #insert into seq_region table
380 $insert_sth->execute($name, $cs_id, $length);
381 #copy old/new mapping into temporary table
382 $tmp_insert_sth->execute($chrom_id, $insert_sth->{
'mysql_insertid'});
385 $select_sth->finish();
386 $insert_sth->finish();
387 $tmp_insert_sth->finish();
394 sub supercontig_to_seq_region {
396 my $target_cs_name = shift ||
"supercontig";
398 my $target = $self->target();
399 my $source = $self->source();
400 my $dbh = $self->dbh();
402 $self->debug(
"Transforming supercontigs into $target_cs_name seq_regions");
404 my $cs_id = $self->get_coord_system_id($target_cs_name);
406 my $select_sth = $dbh->prepare
407 (
"SELECT superctg_name, " .
408 "MAX(superctg_end) AS length " .
409 "FROM $source.assembly " .
410 "GROUP BY superctg_name");
412 my $insert_sth = $dbh->prepare
413 (
"INSERT INTO $target.seq_region (name, coord_system_id, length) " .
416 my $tmp_insert_sth = $dbh->prepare
417 (
"INSERT INTO $target.tmp_superctg_map (name, new_id) VALUES (?, ?)");
420 $select_sth->execute();
421 $select_sth->bind_columns(\$name, \$length);
423 while ($select_sth->fetch()) {
424 $insert_sth->execute($name, $cs_id, $length);
425 $tmp_insert_sth->execute($name, $insert_sth->{
'mysql_insertid'});
428 $select_sth->finish();
429 $insert_sth->finish();
430 $tmp_insert_sth->finish();
436 sub assembly_contig_chromosome {
439 $self->debug(
"Building assembly table - contig/chromosome");
441 my $source = $self->source();
442 my $target = $self->target();
443 my $dbh = $self->dbh();
446 "INSERT INTO $target.assembly " .
447 "SELECT tcm.new_id, " . # asm_seq_region_id (old-
new chromosome ID mapping)
448 "a.contig_id, " . # cmp_seq_region_id
449 "a.chr_start, " . # asm_start
450 "a.chr_end, " . # asm_end
451 "a.contig_start, " . # cmp_start
452 "a.contig_end, " . # cmp_end
453 "a.contig_ori " . # ori
454 "FROM $target.tmp_chr_map tcm, $source.assembly a, $source.contig c " .
455 "WHERE tcm.old_id = a.chromosome_id " .
456 "AND c.contig_id = a.contig_id "); # only copy assembly entries that
457 # refer to valid contigs (test db has
458 # superfluous assembly entries)
462 sub assembly_contig_clone {
465 $self->debug(
"Building assembly table - contig/clone");
467 my $source = $self->source();
468 my $target = $self->target();
469 my $dbh = $self->dbh();
472 "INSERT INTO $target.assembly " .
473 "SELECT tcm.new_id, " . # asm_seq_region_id (old-
new clone ID mapping)
474 "ctg.contig_id, ". # cmp_seq_region_id
475 "ctg.embl_offset, " . # asm_start
476 "ctg.embl_offset+ctg.length-1, " . # asm_end
478 "ctg.length, " . # cmp_end
479 "1 " . # ori - contig always positively oriented on the clone
480 "FROM $target.tmp_cln_map tcm, " .
481 "$source.clone cln, $source.contig ctg " .
482 "WHERE tcm.old_id = cln.clone_id " .
483 "AND cln.clone_id = ctg.clone_id");
488 sub assembly_contig_supercontig {
491 $self->debug(
"Building assembly table - contig/supercontig");
493 my $source = $self->source();
494 my $target = $self->target();
495 my $dbh = $self->dbh();
498 "INSERT INTO $target.assembly " .
499 "SELECT tsm.new_id, " . # asm_seq_region_id (superctg name-sr_id mapping)
500 "a.contig_id, " . # cmp_seq_region_id
501 "a.superctg_start, " . # asm_start
502 "a.superctg_end, " . # asm_end
503 "a.contig_start, " . # cmp_start
504 "a.contig_end, " . # cmp_end
505 "a.contig_ori " . # ori
506 "FROM $target.tmp_superctg_map tsm, $source.assembly a, $source.contig c ".
507 "WHERE tsm.name = a.superctg_name " .
508 "AND c.contig_id = a.contig_id "); # only copy assembly entries that
509 # refer to valid contigs (test db might
516 sub assembly_supercontig_chromosome {
519 $self->debug(
"Building assembly table - supercontig/chromosome");
521 my $source = $self->source();
522 my $target = $self->target();
523 my $dbh = $self->dbh();
526 "INSERT INTO $target.assembly " .
527 "SELECT tcm.new_id, " . # asm_seq_region_id (chr
id)
528 "tsm.new_id, " . # cmp_seq_region_id (supercontig
id)
529 "min(a.chr_start), " . # asm_start
530 "max(a.chr_end), " . # asm_end
531 "min(a.superctg_start), " . # cmp_start
532 "max(a.superctg_end), " . # cmp_end
533 "a.superctg_ori " . # ori
534 "FROM $target.tmp_superctg_map tsm, $target.tmp_chr_map tcm, " .
535 " $source.assembly a ".
536 "WHERE tsm.name = a.superctg_name " .
537 "AND tcm.old_id = a.chromosome_id " .
538 "GROUP BY superctg_name");
544 ###############################################################################
545 # Base class implementations of transfer methods. Can be overridden to
546 # create species specific behaviour
547 ###############################################################################
550 sub create_coord_systems {
553 my $target = $self->target();
554 my $dbh = $self->dbh();
556 my $ass_def = $self->get_default_assembly();
559 ([
"chromosome" , $ass_def,
"default_version" ,1],
560 [
"supercontig", undef ,
"default_version" ,2],
561 [
"clone" , undef ,
"default_version" ,3],
562 [
"contig", undef ,
"default_version,sequence_level",4]);
564 my @assembly_mappings = (
"chromosome:$ass_def|contig",
566 "supercontig|contig",
567 "supercontig|contig|clone",
568 "chromosome:$ass_def|contig|clone",
569 "chromosome:$ass_def|contig|supercontig");
571 $self->debug(
"Building coord_system table");
573 my $sth = $dbh->prepare(
"INSERT INTO $target.coord_system " .
574 "(name, version, attrib,rank) VALUES (?,?,?,?)");
576 my %coord_system_ids;
578 foreach my $cs (@coords) {
580 $coord_system_ids{$cs->[0]} = $sth->{
'mysql_insertid'};
584 $self->debug(
"Adding assembly.mapping entries to meta table");
586 $sth = $dbh->prepare(
"INSERT INTO $target.meta(meta_key, meta_value) " .
587 "VALUES ('assembly.mapping', ?)");
589 foreach my $mapping (@assembly_mappings) {
590 $sth->execute($mapping);
600 # populates the contents of the meta_coord table
601 # must be executed after all of the feature tables in the target database
602 # have already been populated
605 sub create_meta_coord {
608 $self->debug(
"Building meta_coord table");
610 my $target = $self->target();
611 my $dbh = $self->dbh();
613 my @feature_tables = qw(density_feature
621 prediction_transcript
622 protein_align_feature
627 foreach my $ft (@feature_tables) {
629 $dbh->do(qq{INSERT INTO $target.meta_coord(table_name, coord_system_id,
631 SELECT
'$ft', sr.coord_system_id,
632 MAX(f.seq_region_end - f.seq_region_start + 1)
633 FROM $target.$ft f, $target.seq_region sr
634 WHERE sr.seq_region_id = f.seq_region_id
635 GROUP BY sr.coord_system_id});
638 # special case for assembly exception, features are created from both
641 $dbh->do(qq{INSERT INTO $target.meta_coord
642 SELECT
'assembly_exception', sr.coord_system_id,
643 MAX(IF(ae.seq_region_end - ae.seq_region_start > ae.exc_seq_region_end - ae.exc_seq_region_start, ae.seq_region_end - ae.seq_region_start + 1, ae.exc_seq_region_end - ae.exc_seq_region_start + 1))
644 FROM $target.assembly_exception ae, $target.seq_region sr
645 WHERE sr.seq_region_id = ae.seq_region_id
646 GROUP BY sr.coord_system_id});
651 sub create_seq_regions {
654 my $target = $self->target();
655 my $dbh = $self->dbh();
657 #default behaviour is to simply copy all tables as they come
659 $self->contig_to_seq_region(
'contig');
660 $self->chromosome_to_seq_region();
661 $self->supercontig_to_seq_region();
662 $self->clone_to_seq_region();
667 sub create_assembly {
670 $self->assembly_contig_chromosome();
671 $self->assembly_contig_clone();
672 $self->assembly_contig_supercontig();
680 my $target = $self->target();
681 my $source = $self->source();
682 my $dbh = $self->dbh();
684 $self->debug(
"Building dna table");
686 $dbh->do(
"INSERT INTO $target.dna " .
687 "SELECT c.contig_id as seq_region_id, d.sequence as sequence " .
688 "FROM $source.dna d, $source.contig c " .
689 "WHERE c.dna_id = d.dna_id");
697 my $target = $self->target();
698 my $source = $self->source();
699 my $dbh = $self->dbh();
702 # Transfer the gene table
705 $self->debug(
"Building gene table");
708 (
"INSERT INTO $target.gene " .
709 "SELECT g.gene_id, g.type, g.analysis_id, tcm.new_id, " .
710 "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
711 " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
712 "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
713 " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
714 " a.contig_ori*e.contig_strand as strand, " .
715 " g.display_xref_id " .
716 "FROM $source.transcript t, $source.exon_transcript et, " .
717 " $source.exon e, $source.assembly a, $source.gene g, " .
718 " $target.tmp_chr_map tcm " .
719 "WHERE t.transcript_id = et.transcript_id " .
720 "AND et.exon_id = e.exon_id " .
721 "AND e.contig_id = a.contig_id " .
722 "AND g.gene_id = t.gene_id " .
723 "AND a.chromosome_id = tcm.old_id " .
724 "GROUP BY g.gene_id");
728 # Transfer the transcript table
730 $self->debug(
"Building transcript table");
733 (
"INSERT INTO $target.transcript " .
734 "SELECT t.transcript_id, t.gene_id, tcm.new_id, " .
735 "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
736 " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
737 "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
738 " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
739 " a.contig_ori*e.contig_strand as strand, " .
740 " t.display_xref_id " .
741 "FROM $source.transcript t, $source.exon_transcript et, " .
742 " $source.exon e, $source.assembly a, $target.tmp_chr_map tcm " .
743 "WHERE t.transcript_id = et.transcript_id " .
744 "AND et.exon_id = e.exon_id " .
745 "AND e.contig_id = a.contig_id " .
746 "AND a.chromosome_id = tcm.old_id " .
747 "GROUP BY t.transcript_id");
750 # Transfer the exon table
752 $self->debug(
"Building exon table");
755 (
"INSERT INTO $target.exon " .
756 "SELECT e.exon_id, tcm.new_id, " .
757 "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
758 " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
759 "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
760 " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
761 " a.contig_ori*e.contig_strand as strand, " .
762 " e.phase, e.end_phase " .
763 "FROM $source.transcript t, $source.exon_transcript et, " .
764 " $source.exon e, $source.assembly a, $source.gene g, " .
765 " $target.tmp_chr_map tcm " .
766 "WHERE t.transcript_id = et.transcript_id " .
767 "AND et.exon_id = e.exon_id " .
768 "AND e.contig_id = a.contig_id " .
769 "AND g.gene_id = t.gene_id " .
770 "AND a.chromosome_id = tcm.old_id " .
771 "GROUP BY e.exon_id");
774 # Transfer translation table
777 $self->debug(
"Building translation table");
780 (
"INSERT INTO $target.translation " .
781 "SELECT tl.translation_id, ts.transcript_id, tl.seq_start, " .
782 " tl.start_exon_id, tl.seq_end, tl.end_exon_id " .
783 "FROM $source.transcript ts, $source.translation tl " .
784 "WHERE ts.translation_id = tl.translation_id");
789 sub transfer_prediction_transcripts {
792 my $source = $self->source();
793 my $target = $self->target();
794 my $dbh = $self->dbh();
796 # prediction_transcript / prediction_exon
798 $self->debug(
"Building prediction_exon table" );
801 (
"INSERT INTO $target.prediction_exon ".
802 "( prediction_transcript_id, seq_region_id, seq_region_start, " .
803 " seq_region_end, seq_region_strand, start_phase, score, p_value," .
805 "SELECT prediction_transcript_id, contig_id, contig_start, contig_end, " .
806 " contig_strand, start_phase, score, p_value, exon_rank " .
807 "FROM $source.prediction_transcript" );
809 $self->debug(
"Building prediction_transcript table");
812 (
"INSERT INTO $target.prediction_transcript ".
813 "( prediction_transcript_id, seq_region_id, seq_region_start, " .
814 "seq_region_end, seq_region_strand, analysis_id ) " .
815 "SELECT prediction_transcript_id, contig_id, MIN(contig_start), " .
816 " MAX(contig_end), contig_strand, analysis_id ".
817 "FROM $source.prediction_transcript " .
818 "GROUP BY prediction_transcript_id ");
825 sub transfer_features {
828 my $target = $self->target();
829 my $source = $self->source();
830 my $dbh = $self->dbh();
834 $limit =
' limit ' . $self->limit();
840 # Note that we can just rename contig_* to set_region_* since the
841 # contig IDs were copied verbatim into seq_region
844 # For some reason mysql occasionally refuses to use the index on large
845 # tables following copies like the following.
846 # So: drop the indexes first and then re-add them after
849 $self->debug(
"Building simple_feature table");
851 (
"INSERT INTO $target.simple_feature (simple_feature_id, seq_region_id, ".
852 " seq_region_start, seq_region_end, seq_region_strand, " .
853 " display_label, analysis_id, score) " .
854 "SELECT simple_feature_id, contig_id, contig_start, contig_end, " .
855 " contig_strand, display_label, analysis_id, score " .
856 "FROM $source.simple_feature $limit");
859 $self->debug(
"Dropping indexes on repeat_feature");
860 $dbh->do(
"ALTER TABLE $target.repeat_feature DROP INDEX seq_region_idx");
861 $dbh->do(
"ALTER TABLE $target.repeat_feature DROP INDEX repeat_idx");
862 $dbh->do(
"ALTER TABLE $target.repeat_feature DROP INDEX analysis_idx");
864 $self->debug(
"Building repeat_feature table");
866 (
"INSERT INTO $target.repeat_feature (repeat_feature_id, seq_region_id, " .
867 " seq_region_start, seq_region_end, seq_region_strand, analysis_id, " .
868 " repeat_start, repeat_end, repeat_consensus_id, score) " .
869 "SELECT repeat_feature_id, contig_id, contig_start, contig_end, " .
870 " contig_strand, analysis_id, repeat_start, repeat_end, " .
871 " repeat_consensus_id, score FROM $source.repeat_feature $limit");
873 $self->debug(
"Reading indexes on repeat_feature");
874 $dbh->do(
"ALTER TABLE $target.repeat_feature " .
875 "ADD INDEX seq_region_idx( seq_region_id, seq_region_start)");
876 $dbh->do(
"ALTER TABLE $target.repeat_feature " .
877 "ADD INDEX repeat_idx( repeat_consensus_id )");
878 $dbh->do(
"ALTER TABLE $target.repeat_feature " .
879 "ADD INDEX analysis_idx(analysis_id)");
881 # protein_align_feature
882 $self->debug(
"Dropping indexes on protein_align_feature");
883 $dbh->do(
"ALTER TABLE $target.protein_align_feature DROP INDEX hit_idx");
884 $dbh->do(
"ALTER TABLE $target.protein_align_feature " .
885 "DROP INDEX seq_region_idx");
887 $self->debug(
"Building protein_align_feature table");
888 $dbh->do(
"INSERT INTO $target.protein_align_feature " .
889 " (protein_align_feature_id, seq_region_id, seq_region_start, " .
890 " seq_region_end, seq_region_strand, analysis_id, hit_start, " .
891 " hit_end, hit_name, cigar_line, evalue, perc_ident, score) " .
892 "SELECT protein_align_feature_id, contig_id, contig_start, " .
893 " contig_end, contig_strand, analysis_id, hit_start, " .
894 " hit_end, hit_name, cigar_line, evalue, perc_ident, score ".
895 "FROM $source.protein_align_feature $limit");
897 $self->debug(
"Reading indexes on protein_align_feature");
898 $dbh->do( qq{ALTER TABLE $target.protein_align_feature
899 ADD index seq_region_idx( analysis_id, seq_region_id,
900 seq_region_start, score )});
901 $dbh->do(
"ALTER TABLE $target.protein_align_feature " .
902 "ADD index hit_idx(hit_name)");
905 $self->debug(
"Dropping indexes on dna_align_feature");
906 $dbh->do(
"ALTER TABLE $target.dna_align_feature DROP INDEX seq_region_idx");
907 $dbh->do(
"ALTER TABLE $target.dna_align_feature DROP INDEX hit_idx");
909 $self->debug(
"Building dna_align_feature table");
910 $dbh->do(
"INSERT INTO $target.dna_align_feature " .
911 " (dna_align_feature_id, seq_region_id, seq_region_start, ".
912 " seq_region_end, seq_region_strand, analysis_id, " .
913 " hit_start, hit_end, hit_name, hit_strand, cigar_line, " .
914 " evalue, perc_ident, score) " .
915 "SELECT dna_align_feature_id, contig_id, contig_start, " .
916 " contig_end, contig_strand, analysis_id, hit_start, " .
917 " hit_end, hit_name, hit_strand, cigar_line, evalue, " .
918 " perc_ident, score FROM $source.dna_align_feature $limit");
921 $self->debug(
"Reading indexes on dna_align_feature");
922 $dbh->do( qq{ALTER TABLE $target.dna_align_feature
923 ADD INDEX seq_region_idx(seq_region_id, analysis_id,
924 seq_region_start, score)});
925 $dbh->do(
"ALTER TABLE $target.dna_align_feature " .
926 "ADD index hit_idx(hit_name)");
929 $self->debug(
"Building marker_feature table");
930 $dbh->do(
"INSERT INTO $target.marker_feature " .
931 " (marker_feature_id, marker_id, seq_region_id, " .
932 " seq_region_start, seq_region_end, analysis_id, " .
934 "SELECT marker_feature_id, marker_id, contig_id, contig_start, ".
935 " contig_end, analysis_id, map_weight " .
936 "FROM $source.marker_feature $limit");
939 # Note this uses chromosome coords so we have to join with tmp_chr_map to
941 $self->debug(
"Building qtl_feature table");
944 (
"INSERT INTO $target.qtl_feature( seq_region_id, seq_region_start, " .
945 " seq_region_end, qtl_id, analysis_id) " .
946 "SELECT tcm.new_id, " .
947 " q.start, q.end, q.qtl_id, q.analysis_id " .
948 "FROM $target.tmp_chr_map tcm, $source.qtl_feature q " .
949 "WHERE tcm.old_id = q.chromosome_id $limit");
951 # These tables now have seq_region_* instead of chromosome_*
953 $self->debug(
"Building karyotype table");
955 "INSERT INTO $target.karyotype " .
956 "SELECT null, tcm.new_id, " .
957 " k.chr_start, k.chr_end, k.band, k.stain " .
958 "FROM $target.tmp_chr_map tcm, $source.karyotype k " .
959 "WHERE tcm.old_id = k.chromosome_id $limit");
962 $self->debug(
"Building marker_map_location table");
964 "INSERT INTO $target.marker_map_location " .
965 "SELECT mml.marker_id, mml.map_id, " .
967 " mml.marker_synonym_id, mml.position, mml.lod_score " .
968 "FROM $source.chromosome c, $source.marker_map_location mml " .
969 "WHERE c.chromosome_id = mml.chromosome_id $limit");
971 $self->debug(
"Building misc_feature table" );
973 (
"INSERT INTO $target.misc_feature( misc_feature_id, seq_region_id, " .
974 " seq_region_start, seq_region_end, seq_region_strand ) " .
975 "SELECT m.mapfrag_id, sr.seq_region_id, m.seq_start, m.seq_end, " .
977 "FROM $source.mapfrag m, $target.seq_region sr, $source.dnafrag d " .
978 "WHERE m.dnafrag_id = d.dnafrag_id " .
979 "AND d.name = sr.name $limit" );
981 $self->debug(
"Building misc_set table" );
983 (
"INSERT INTO $target.misc_set( misc_set_id, code, name, description, " .
985 "SELECT mapset_id, code, name, description, max_length " .
986 "FROM $source.mapset ms" );
988 $self->debug(
"Building misc_attrib table" );
990 (
"INSERT INTO $target.misc_attrib( misc_feature_id, attrib_type_id, " .
992 "SELECT mapfrag_id, mapannotationtype_id, value " .
993 "FROM $source.mapannotation" );
996 (
"INSERT INTO $target.misc_attrib( misc_feature_id, attrib_type_id, " .
998 "SELECT mf.mapfrag_id, at.attrib_type_id, mf.name " .
999 "FROM $source.mapfrag mf, $target.attrib_type at " .
1000 "WHERE at.code = 'name'");
1003 (
"INSERT INTO $target.misc_attrib( misc_feature_id, attrib_type_id, " .
1005 "SELECT mf.mapfrag_id, at.attrib_type_id, mf.type " .
1006 "FROM $source.mapfrag mf, $target.attrib_type at " .
1007 "WHERE at.code = 'type'");
1009 $self->debug(
"Building misc_feature_misc_set table" );
1011 (
"INSERT INTO $target.misc_feature_misc_set(misc_feature_id, misc_set_id)".
1012 "SELECT mapfrag_id, mapset_id ".
1013 "FROM $source.mapfrag_mapset $limit" );
1019 sub transfer_stable_ids {
1022 my $source = $self->source();
1023 my $target = $self->target();
1024 my $dbh = $self->dbh();
1026 $self->debug(
"Building stable id event tables");
1029 (
"stable_id_event",
"mapping_session",
"gene_archive",
"peptide_archive");
1034 sub transfer_vega_stable_ids {
1037 my $source = $self->source();
1038 my $target = $self->target();
1039 my $dbh = $self->dbh();
1041 $self->debug(
"Building vega_stable id event tables");
1045 (
"stable_id_event",
"mapping_session",
"gene_archive",
"peptide_archive");
1053 my $source = $self->source();
1054 my $target = $self->target();
1056 my $dbh = $self->dbh();
1058 $dbh->do(
"INSERT INTO $target.meta (meta_key, meta_value) " .
1059 "SELECT m.meta_key, m.meta_value FROM $source.meta m " .
1060 "ORDER BY meta_id");
1066 sub copy_other_tables {
1070 $self->copy_tables(
"xref",
1076 #marker/qtl related tables
1083 "supporting_feature",
1092 sub copy_repeat_consensus {
1095 my $source = $self->source();
1096 my $target = $self->target();
1098 my $dbh = $self->dbh();
1100 $self->debug(
"Converting repeat_consensus table.");
1102 $dbh->do(
"INSERT INTO $target.repeat_consensus " .
1103 "(repeat_consensus_id, repeat_name, repeat_class, repeat_type, ".
1104 " repeat_consensus) " .
1105 "SELECT repeat_consensus_id, repeat_name, repeat_class, " .
1106 " '', repeat_consensus " .
1107 "FROM $source.repeat_consensus rc" );
1114 sub create_attribs {
1117 my $source = $self->source();
1118 my $target = $self->target();
1119 my $dbh = $self->dbh();
1121 #copy the attrib types from the mapannotation type table
1124 (
"INSERT INTO $target.attrib_type( attrib_type_id, code, " .
1125 " name, description ) " .
1126 "SELECT mapannotationtype_id, code, name, description " .
1127 "FROM $source.mapannotationtype " );
1130 (
"INSERT INTO $target.attrib_type( code, name, description ) " .
1131 "VALUES ('name', 'Name',''), ('type', 'Type of feature','')");
1138 # The process of actually identifying toplevel seq_regions using the info in
1139 # the database is quite slow. Make the assumption that the coordsystem with
1140 # lowest rank value is going to have all of the toplevel seq_regions.
1142 # This method must be overridden if alternate behaviour is required.
1148 my $target = $self->target();
1149 my $dbh = $self->dbh();
1151 my $attrib_type_id = $self->add_attrib_code();
1153 $self->debug(
"Setting toplevel attributes of seq_regions");
1155 my $sth = $dbh->prepare(
"DELETE FROM $target.seq_region_attrib " .
1156 "WHERE attrib_type_id = ?");
1157 $sth->execute($attrib_type_id);
1161 $sth = $dbh->prepare(
"SELECT coord_system_id FROM $target.coord_system " .
1162 "ORDER BY RANK ASC LIMIT 1");
1165 my ($cs_id) = $sth->fetchrow_array();
1169 $sth = $dbh->prepare(
"INSERT INTO $target.seq_region_attrib " .
1170 '(seq_region_id, attrib_type_id, value) ' .
1171 "SELECT sr.seq_region_id, $attrib_type_id, 1 " .
1172 "FROM $target.seq_region sr " .
1173 "WHERE sr.coord_system_id = $cs_id");
1180 sub add_attrib_code {
1182 my $dbh = $self->dbh();
1183 my $target = $self->target();
1185 # add a toplevel code to the attrib_type table if it is not there already
1187 my $sth = $dbh->prepare(
"SELECT attrib_type_id " .
1188 "FROM $target.attrib_type " .
1189 "WHERE code = 'toplevel'");
1194 my ($attrib_type_id) = $sth->fetchrow_array();
1196 return $attrib_type_id;
1201 $sth = $dbh->prepare(
"INSERT INTO $target.attrib_type " .
1202 "SET code = 'toplevel', " .
1203 "name = 'Top Level', " .
1204 "description = 'Top Level Non-Redundant Sequence Region'");
1207 my $attrib_type_id = $sth->{
'mysql_insertid'};
1210 return $attrib_type_id;