2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
23 use vars qw($opt_d $opt_s $opt_u $opt_h $opt_p $opt_P);
25 #get command line arguments
26 getopts(
's:d:h:u:p:P:');
28 my $usage =
"Usage: " .
29 "test_genome.pl -s srcDB -d destDB -h host -u user [-p pass] [-P port]\n";
31 my ($user, $pass, $host, $destDB, $srcDB, $port) =
32 ($opt_u, $opt_p, $opt_h, $opt_d, $opt_s, $opt_P);
38 # If needed command line args are missing print the usage string and quit
39 $user and $host and $destDB and $srcDB or die $usage;
43 my $dsn =
"DBI:mysql:host=$host;port=$port";
45 #print "User: $user; Pass: $pass; DSN: $dsn\n";
48 # Connect to the mySQL host
50 my $dbh = DBI->connect( $dsn, $user, $pass, {RaiseError => 1})
51 or die
"Could not connect to database host : " . DBI->errstr;
53 print
"\nWARNING: If the $destDB database already exists the existing copy \n"
54 .
"will be destroyed. Proceed (Y/N)? ";
58 unless( $key =~ /y/ ) {
60 print
"Test Genome Creation Aborted\n";
64 print
"Proceeding with test genome database $destDB creation\n";
67 # Create the new database, dropping any existing database
69 $dbh->do(
"DROP DATABASE $destDB");
71 $dbh->do(
"CREATE DATABASE " . $destDB )
72 or die
"Could not create database $destDB: " . $dbh->errstr;
75 # Dump the source database table structure (w/o data) and use it to create
76 # the new database schema
79 # May have to eliminate the -p pass part... not sure
81 my $rc = 0xffff & system(
82 "mysqldump -p$pass -u $user -h $host -P $port --no-data $srcDB | " .
83 "mysql -p$pass -u $user -h $host -P port $destDB");
87 die
"mysqldump and insert failed with return code: $rc";
91 # Create a temp table to store ids of clones we are interested in
94 CREATE TEMPORARY TABLE $destDB.tmp1(
95 clone_id INT(10) NOT NULL)
96 ") or die
"Could create tmp1 table " . $dbh->errstr;
99 # Find clones present in a central, exciting 1Mb region of Chromosome 20
101 INSERT INTO $destDB.tmp1
102 SELECT distinct( c.clone_id )
103 FROM $srcDB.contig c, $srcDB.assembly a, $srcDB.chromosome chr
104 WHERE a.contig_id = c.contig_id
105 AND a.chromosome_id = chr.chromosome_id
107 AND a.chr_end >= 30252000
108 AND a.chr_start < 31252001
109 ") or die
"Could not do tmp1 chr2 clones insert statement:" . $dbh->errstr;
112 #Select relevant clones from the source database for the new database
114 INSERT INTO $destDB.clone
116 FROM $srcDB.clone c, $destDB.tmp1 t
117 WHERE c.clone_id = t.clone_id
118 ") or die
"Could not do clone insertion statement:" . $dbh->errstr;
121 # Retrieve all contigs on the clones present in the first 1Mbases whether
122 # or not they are on the golden path
125 INSERT INTO $destDB.contig
126 SELECT c.contig_id, c.name, c.clone_id, c.length, c.embl_offset, c.dna_id
127 FROM $srcDB.contig c, $destDB.tmp1 t
128 WHERE c.clone_id = t.clone_id
129 ") or die
"Could not do contig insertion statement:" . $dbh->errstr;
132 # Create the relevant dna table for the contigs in the test genome
136 INSERT INTO $destDB.dna
138 FROM $srcDB.dna d, $destDB.contig c
139 WHERE d.dna_id = c.dna_id
140 ") or die
"Could not do dna insertion statement:" . $dbh->errstr;
143 # Copy the entire analysis table (This could be improved I think [mcvicker])
147 INSERT INTO $destDB.analysis
150 ") or die
"Could not do analysis insertion statement:" . $dbh->errstr;
153 # Copy the static golden path table
156 INSERT INTO $destDB.assembly
158 FROM $srcDB.assembly a, $srcDB.chromosome chr
159 WHERE a.chromosome_id = chr.chromosome_id
161 ") or die
"Could not do assembly insertion statement for chr2: $dbh->errstr";
164 # Copy overlapping features, repeats, genes, exons, etc
167 # first find gene start end thing
170 CREATE TABLE $destDB.gene_global_start_end
171 SELECT STRAIGHT_JOIN tr.gene_id,
172 MIN(IF( a.contig_ori=1,
173 (e.contig_start+a.chr_start-a.contig_start),
174 (a.chr_start+a.contig_end-e.contig_end)))
176 MAX(IF( a.contig_ori=1,
177 (e.contig_end+a.chr_start-a.contig_start),
178 (a.chr_start+a.contig_end-e.contig_start)))
180 IF (a.contig_ori=1, e.contig_strand, (-e.contig_strand)) as strand,
181 chr.name as chromosome
183 FROM $srcDB.transcript tr, $srcDB.exon_transcript et,
184 $srcDB.exon e, $srcDB.assembly a, $srcDB.chromosome chr
185 WHERE tr.transcript_id = et.transcript_id
186 AND et.exon_id = e.exon_id
187 AND e.contig_id = a.contig_id
188 AND a.chromosome_id = chr.chromosome_id
190 ") or die
"Could not do gene_global_start_end table creation statement: $dbh->errstr";
193 # Then make gene list for the area. Genes have to be completely in
197 CREATE TABLE $destDB.gene_list
199 FROM $destDB.gene_global_start_end g
200 WHERE g.chromosome = '20'
201 AND g.end >= 30252000
202 AND g.start < 31252001
203 ") or die
"Could not do gene_list table insertion from chr2: " . $dbh->errstr;
206 ALTER TABLE $destDB.gene_list ADD INDEX gene_id_idx( gene_id )
207 ") or die
"Could not prepare genelist table alteration: " . $dbh->errstr;
212 INSERT INTO $destDB.gene
214 FROM $srcDB.gene g, $destDB.gene_list gl
215 WHERE g.gene_id = gl.gene_id
216 ") or die
"Could not do gene insertion statement: " . $dbh->errstr;
221 INSERT INTO $destDB.transcript
223 FROM $srcDB.transcript tr, $destDB.gene_list gl
224 WHERE tr.gene_id = gl.gene_id
225 ") or die
"Could not do transcript insertion: " . $dbh->errstr;
231 INSERT INTO $destDB.translation
233 FROM $srcDB.translation tl, $destDB.transcript tr
234 WHERE tr.translation_id = tl.translation_id
235 ") or die
"Could not do translation insertion " . $dbh->errstr;
241 INSERT INTO $destDB.exon_transcript
243 FROM $srcDB.exon_transcript et, $destDB.transcript tr
244 WHERE tr.transcript_id = et.transcript_id
245 ") or die
"Could not do exon_transcript insertion: " . $dbh->errstr;
247 # exons are tricky, first make a unique list
250 CREATE TABLE $destDB.exon_unique
251 SELECT distinct exon_id
252 FROM $destDB.exon_transcript
253 ") or die
"Could not do exon_unique table create: " . $dbh->errstr;
255 # then uses this list to copy
258 INSERT INTO $destDB.exon
260 FROM $srcDB.exon e, $destDB.exon_unique eu
261 WHERE e.exon_id = eu.exon_id
262 ") or die
"Could not do exon insertion: " . $dbh->errstr;
265 $dbh->do(
"drop table $destDB.exon_unique")
266 or die
"Could not drop exon_unique temp table: $dbh->errstr\n";
267 $dbh->do(
"drop table $destDB.gene_list")
268 or die
"Could not drop gene_list temp table: $dbh->errstr\n";
269 $dbh->do(
"drop table $destDB.gene_global_start_end")
270 or die
"Could not drop gene_global_start_end temp table: $dbh->errstr\n";
275 INSERT INTO $destDB.meta
278 ") or die
"Could not do meta table insertion: $dbh->errstr\n";
281 INSERT INTO $destDB.chromosome
283 FROM $srcDB.chromosome
284 ") or die
"Could not do meta table insertion: $dbh->errstr\n";
286 # object_xref, identity_xref, xref
291 INSERT INTO $destDB.gene_description
293 FROM $srcDB.gene_description gd, $destDB.gene g
294 WHERE gd.gene_id = g.gene_id
295 ") or die
"Could not do gene description insertion: $dbh->errstr\n";
298 INSERT INTO $destDB.object_xref
300 FROM $srcDB.object_xref ox, $destDB.translation tr
301 WHERE ox.ensembl_id = tr.translation_id
302 AND ox.ensembl_object_type = 'Translation'
303 ") or die
"Could not do object_xref insertion: $dbh->errstr\n";
306 INSERT INTO $destDB.xref
308 FROM $srcDB.xref x, $destDB.object_xref ox
309 WHERE x.xref_id = ox.xref_id
310 ") or die
"Could not do xref insertion: $dbh->errstr\n";
313 INSERT INTO $destDB.identity_xref
315 FROM $srcDB.identity_xref ix, $destDB.object_xref ox
316 WHERE ix.object_xref_id = ox.object_xref_id
317 ") or die
"Could not do identity insertion: $dbh->errstr\n";
320 INSERT INTO $destDB.external_db
322 FROM $srcDB.external_db ed
323 ") or die
"Could not do external_db insertion: $dbh->errstr\n";
325 # the interpro we need are those which are in xref
328 # copy across associated simple features
330 INSERT INTO $destDB.simple_feature
332 FROM $destDB.contig dc, $srcDB.simple_feature esf
333 WHERE esf.contig_id = dc.contig_id
334 ") or die
"Could not do simple_feature insertion: $dbh->errstr\n";
337 # copy across associated dna_align_features
339 INSERT INTO $destDB.dna_align_feature
341 FROM $destDB.contig dc, $srcDB.dna_align_feature edna
342 WHERE edna.contig_id = dc.contig_id
343 ") or die
"Could not do dna_align_feature insertion: $dbh->errstr\n";
346 # copy across associated protein_align_features
348 INSERT INTO $destDB.protein_align_feature
350 FROM $destDB.contig dc, $srcDB.protein_align_feature eprot
351 WHERE eprot.contig_id = dc.contig_id
352 ") or die
"Could not do protein_align_feature insertion: $dbh->errstr\n";
355 # copy across associated protein_align_features
357 INSERT INTO $destDB.prediction_transcript
359 FROM $destDB.contig dc, $srcDB.prediction_transcript epred
360 WHERE epred.contig_id = dc.contig_id
361 ") or die
"Could not do prediction_transcript insertion: $dbh->errstr\n";
364 # copy across associated supporting_features
366 INSERT INTO $destDB.supporting_feature
368 FROM $destDB.exon dex, $srcDB.supporting_feature esup
369 WHERE esup.exon_id = dex.exon_id
370 ") or die
"Could not do supporting_feature insertion: $dbh->errstr\n";
373 # copy across associated protein_features
375 INSERT INTO $destDB.protein_feature
377 FROM $destDB.translation dt, $srcDB.protein_feature eprot
378 WHERE eprot.translation_id = dt.translation_id
379 ") or die
"Could not do protein_feature insertion: $dbh->errstr\n";
382 # copy across associated repeat_features
384 INSERT INTO $destDB.repeat_feature
386 FROM $destDB.contig dc, $srcDB.repeat_feature erep
387 WHERE erep.contig_id = dc.contig_id
388 ") or die
"Could not do protein_align_feature insertion: $dbh->errstr\n";
391 # copy across associated repeat_consensi
393 INSERT INTO $destDB.repeat_consensus
395 FROM $destDB.repeat_feature dr, $srcDB.repeat_consensus econ
396 WHERE econ.repeat_consensus_id = dr.repeat_consensus_id
397 ") or die
"Could not do repeat_consensus insertion: $dbh->errstr\n";
400 # copy across associated karyotype info
402 INSERT INTO $destDB.karyotype
404 FROM $srcDB.karyotype ek
405 WHERE ek.chromosome_id = '20'
406 AND ek.chr_end >= 30252000
407 AND ek.chr_start < 31252001
408 ") or die
"Could not do karyotype insertion: $dbh->errstr\n";
412 # Create a temp table to store superctg_name we are interested in
415 CREATE TEMPORARY TABLE $destDB.tmp2(
416 superctg_name VARCHAR(20) NOT NULL)
417 ") or die
"Could create tmp2 table " . $dbh->errstr;
420 # grab the unique superctg_names
422 INSERT INTO $destDB.tmp2
423 SELECT distinct(superctg_name)
424 FROM $destDB.assembly
425 ") or die
"Could not select the unique superctg_names: $dbh->errstr\n";
428 # copy across mapfrag
430 INSERT INTO $destDB.mapfrag
432 FROM $srcDB.mapfrag emf, $destDB.tmp2 sctg
433 WHERE emf.name = sctg.superctg_name
434 ") or die
"Could not do mapfrag insertion: $dbh->errstr\n";
439 INSERT INTO $destDB.mapset
441 FROM $srcDB.mapset ems
442 ") or die
"Could not do mapset insertion: $dbh->errstr\n";
445 # copy across map_density
447 INSERT INTO $destDB.map_density
449 FROM $srcDB.map_density emd
450 WHERE emd.chromosome_id = '20'
451 AND emd.chr_start < 31252001
452 AND emd.chr_end >= 30252000
453 ") or die
"Could not do map_density insertion: $dbh->errstr\n";
456 # copy across mapannotation
458 INSERT INTO $destDB.mapannotation
460 FROM $srcDB.mapannotation ema, $destDB.tmp2 sctg
461 WHERE ema.value = sctg.superctg_name
462 ") or die
"Could not do mapannotation insertion: $dbh->errstr\n";
465 # copy across mapannotationtype
467 INSERT INTO $destDB.mapannotationtype
469 FROM $srcDB.mapannotationtype emt
470 ") or die
"Could not do mapannotationtype insertion: $dbh->errstr\n";
473 # copy across mapfrag_mapset
475 INSERT INTO $destDB.mapfrag_mapset
477 FROM $srcDB.mapfrag_mapset emfms, $destDB.mapfrag dmf
478 WHERE emfms.mapfrag_id = dmf.mapfrag_id
479 ") or die
"Could not do mapfrag_mapset insertion: $dbh->errstr\n";
482 # copy across dnafrag
484 INSERT INTO $destDB.dnafrag
486 FROM $srcDB.dnafrag edf
487 ") or die
"Could not do dnafrag insertion: $dbh->errstr\n";
490 # finally, drop the temporary tables
492 DROP TABLE $destDB.tmp1
493 ") or die
"Could not drop temporary table tmp1: $dbh->errstr\n";
496 DROP TABLE $destDB.tmp2
497 ") or die
"Could not drop temporary table tmp2: $dbh->errstr\n";
500 #disconnect from the DB
503 print
"Test genome database $destDB created\n";