ensembl-hive  2.7.0
BasicConverter.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 # Convert release 18/19-era schemas to use new non-clone/contig schema
21 
22 use strict;
23 use warnings;
24 use DBI;
25 
26 
27 package SeqStoreConverter::BasicConverter;
28 
29 ###############################################################################
30 # Constructor
31 ###############################################################################
32 
33 sub new {
34  my ( $class, $user, $pass, $host, $source, $target, $schema, $vega_schema, $force, $verbose, $limit ) = @_;
35 
36  my $self = bless {}, $class;
37 
38  my $port;
39  ($host, $port) = split(/:/, $host);
40  $port ||= 3306;
41 
42  my $dbh = DBI->connect( "DBI:mysql:host=$host:port=$port", $user, $pass,
43  {'RaiseError' => 1});
44 
45 
46  $self->verbose( $verbose );
47  $self->dbh( $dbh );
48  $self->force( $force );
49  $self->source( $source );
50  $self->target( $target );
51  $self->schema( $schema );
52  $self->vegaschema( $vega_schema);
53  $self->host( $host );
54  $self->password( $pass);
55  $self->user($user);
56  $self->port($port);
57  $self->limit($limit);
58 
59 
60  #check to see if the destination and source databases exist already.
61  my %dbs = map {$_->[0] => 1} @{$dbh->selectall_arrayref('show databases')};
62  if( !$dbs{$source} ) {
63  die ("Source db $source does not exist" );
64  }
65 
66  if( $dbs{$target} ) {
67  if( $force ) {
68  $dbh->do( "drop database $target" );
69  } else {
70  die("Target db $target already exists. Use -force option to overwrite.");
71  }
72  }
73 
74  $dbh->do( "create database ".$self->target() );
75 
76  $self->debug("Building schema for $target from $schema");
77  die "Cannot open $schema" if (! -e $schema);
78  my $cmd = "/usr/local/mysql/bin/mysql -u $user -p$pass -P $port -h $host $target < $schema";
79  system ($cmd);
80 
81  if ($vega_schema) {
82  $self->debug("Adding vega tables for $target");
83  die "Cannot open vega creation script" if (! -e $vega_schema);
84  my $cmd = "/usr/local/mysql/bin/mysql -u $user -p$pass -P $port -h $host $target < $vega_schema";
85  system ($cmd);
86  }
87 
88  $self->debug("Creating temporary tables");
89  #create a temporary table to store the mapping of old ids to new ids
90  $dbh->do
91  ("CREATE TEMPORARY TABLE $target.tmp_cln_map (" .
92  "old_id INT, new_id INT, INDEX new_idx (new_id))");
93 
94  #create a temp table which will store the mapping of old chromosome
95  #identifiers to new identifiers
96  $dbh->do("CREATE TEMPORARY TABLE $target.tmp_chr_map (" .
97  " old_id INT, new_id INT,".
98  " INDEX new_idx (new_id))");
99 
100  #create a temporary table to hold old supercontig name -> new id mappings
101  $dbh->do("CREATE TEMPORARY TABLE $target.tmp_superctg_map (" .
102  "name VARCHAR(255), new_id INT, ".
103  "INDEX new_idx (new_id))");
104 
105  return $self;
106 }
107 
108 
109 ###############################################################################
110 # Getter/Setters for converter properties
111 ###############################################################################
112 
113 sub force {
114  my $self = shift;
115  $self->{'force'} = shift if(@_);
116  return $self->{'force'};
117 }
118 
119 
120 sub dbh {
121  my $self = shift;
122  $self->{'dbh'} = shift if(@_);
123  return $self->{'dbh'};
124 }
125 
126 sub user {
127  my $self = shift;
128  $self->{'user'} = shift if(@_);
129  return $self->{'user'};
130 }
131 sub host {
132  my $self = shift;
133  $self->{'host'} = shift if(@_);
134  return $self->{'host'};
135 }
136 
137 sub port {
138  my $self = shift;
139  $self->{'port'} = shift if(@_);
140  return $self->{'port'};
141 }
142 
143 sub password {
144  my $self = shift;
145  $self->{'password'} = shift if(@_);
146  return $self->{'password'};
147 }
148 
149 
150 sub verbose {
151  my $self = shift;
152  $self->{'verbose'} = shift if (@_);
153  return $self->{'verbose'};
154 }
155 
156 sub schema {
157  my $self = shift;
158  $self->{'schema'} = shift if (@_);
159  return $self->{'schema'};
160 }
161 
162 sub vegaschema {
163  my $self = shift;
164  $self->{'vega_schema'} = shift if (@_);
165  return $self->{'vega_schema'};
166 }
167 
168 sub source {
169  my $self = shift;
170  $self->{'source'} = shift if(@_);
171  return $self->{'source'};
172 }
173 
174 
175 sub target {
176  my $self = shift;
177  $self->{'target'} = shift if(@_);
178  return $self->{'target'};
179 }
180 
181 sub limit {
182  my $self = shift;
183  $self->{'limit'} = shift if(@_);
184  return $self->{'limit'};
185 }
186 
187 
188 ###############################################################################
189 # Utility methods
190 ###############################################################################
191 
192 sub debug {
193  my $self = shift;
194  my $str = shift;
195  print STDERR $str . "\n" if $self->verbose();
196  return;
197 }
198 
199 sub copy_tables {
200  my ($self, @tables) = @_;
201 
202  foreach my $table (@tables) {
203  $self->debug("Copying $table");
204 
205  my $source = $self->source();
206  my $target = $self->target();
207 
208  eval {
209  my $sth = $self->dbh()->prepare
210  ("INSERT INTO $target.$table SELECT * FROM $source.$table");
211  $sth->execute();
212  $sth->finish();
213  };
214 
215  if($@) {
216  warn("Copy of table $table failed: $@\n");
217  }
218  }
219 
220  return;
221 }
222 
223 sub get_coord_system_id {
224  my $self = shift;
225  my $cs_name = shift;
226  my $cs_version = shift;
227 
228  my $target = $self->target();
229 
230  my @bind_vals = ($cs_name);
231  my $sql = "SELECT cs.coord_system_id " .
232  "FROM $target.coord_system cs " .
233  "WHERE cs.name = ?";
234 
235  if($cs_version) {
236  push(@bind_vals, $cs_version);
237  $sql .= " AND cs.version = ?";
238  }
239 
240  my $sth = $self->dbh()->prepare($sql);
241  $sth->execute(@bind_vals);
242 
243  if($sth->rows() != 1) {
244  die("Id for non-existant or ambiguous coord system requested " .
245  "$cs_name:$cs_version");
246  }
247 
248  my ($id) = $sth->fetchrow_array();
249 
250  $sth->finish();
251 
252  return $id;
253 }
254 
255 sub get_default_assembly {
256  my $self = shift;
257 
258  my $source = $self->source();
259 
260  my $sth = $self->dbh->prepare
261  ("SELECT meta_value FROM $source.meta WHERE meta_key='assembly.default'");
262  $sth->execute();
263 
264  if(!$sth->rows() == 1) {
265  die("This species has an ambiguous or non-existant assembly.default" .
266  " in the meta table");
267  }
268 
269  my ($result) = $sth->fetchrow_array();
270 
271  return $result;
272 }
273 
274 
275 
276 sub contig_to_seq_region {
277  my $self = shift;
278  my $target_cs_name = shift;
279 
280  my $target = $self->target();
281  my $source = $self->source();
282  my $dbh = $self->dbh();
283 
284  $target_cs_name ||= 'contig';
285 
286  $self->debug("Transforming contigs into $target_cs_name seq_regions");
287 
288  my $cs_id = $self->get_coord_system_id($target_cs_name);
289 
290  my $sth = $dbh->prepare
291  ("INSERT INTO $target.seq_region " .
292  "SELECT contig_id, name, $cs_id, length FROM $source.contig");
293 
294  $sth->execute();
295  $sth->finish();
296 
297  return;
298 }
299 
300 
301 sub clone_to_seq_region {
302  my $self = shift;
303  my $target_cs_name = shift;
304 
305  my $target = $self->target();
306  my $source = $self->source();
307  my $dbh = $self->dbh();
308 
309  # target coord_system will have a different ID
310  $target_cs_name ||= "clone";
311  my $cs_id = $self->get_coord_system_id($target_cs_name);
312 
313  $self->debug("Transforming clones into $target_cs_name seq_regions");
314 
315  my $select_sth = $dbh->prepare
316  ("SELECT cl.clone_id,
317  CONCAT(cl.embl_acc, '.', cl.embl_version),
318  MAX(ctg.embl_offset+ctg.length-1)
319  FROM $source.clone cl, $source.contig ctg
320  WHERE cl.clone_id = ctg.clone_id GROUP BY ctg.clone_id");
321  $select_sth->execute();
322 
323  my ($clone_id, $embl_acc, $length);
324  $select_sth->bind_columns(\$clone_id, \$embl_acc, \$length);
325 
326  my $insert_sth = $dbh->prepare
327  ("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
328  "VALUES(?,?,?)");
329 
330  my $tmp_insert_sth = $dbh->prepare
331  ("INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?, ?)");
332 
333  while ($select_sth->fetch()) {
334  $insert_sth->execute("$embl_acc", $cs_id, $length);
335 
336  #store mapping of old -> new ids in temp table
337  $tmp_insert_sth->execute($clone_id, $insert_sth->{'mysql_insertid'});
338  }
339 
340  $select_sth->finish();
341  $insert_sth->finish();
342  $tmp_insert_sth->finish();
343 
344  return;
345 }
346 
347 
348 
349 sub chromosome_to_seq_region {
350  my $self = shift;
351  my $target_cs_name = shift;
352 
353  my $target = $self->target();
354  my $source = $self->source();
355  my $dbh = $self->dbh();
356 
357  $target_cs_name ||= "chromosome";
358  my $cs_id = $self->get_coord_system_id($target_cs_name);
359 
360  $self->debug("Transforming chromosomes into $target_cs_name seq_regions");
361 
362 
363  my $select_sth = $dbh->prepare
364  ("SELECT chromosome_id, name, length FROM $source.chromosome");
365 
366  my $insert_sth = $dbh->prepare
367  ("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
368  "VALUES (?,?,?)");
369 
370  my $tmp_insert_sth = $dbh->prepare
371  ("INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)");
372 
373  $select_sth->execute();
374 
375  my ($chrom_id, $name, $length);
376  $select_sth->bind_columns(\$chrom_id, \$name, \$length);
377 
378  while ($select_sth->fetch()) {
379  #insert into seq_region table
380  $insert_sth->execute($name, $cs_id, $length);
381  #copy old/new mapping into temporary table
382  $tmp_insert_sth->execute($chrom_id, $insert_sth->{'mysql_insertid'});
383  }
384 
385  $select_sth->finish();
386  $insert_sth->finish();
387  $tmp_insert_sth->finish();
388 
389  return;
390 }
391 
392 
393 
394 sub supercontig_to_seq_region {
395  my $self = shift;
396  my $target_cs_name = shift || "supercontig";
397 
398  my $target = $self->target();
399  my $source = $self->source();
400  my $dbh = $self->dbh();
401 
402  $self->debug("Transforming supercontigs into $target_cs_name seq_regions");
403 
404  my $cs_id = $self->get_coord_system_id($target_cs_name);
405 
406  my $select_sth = $dbh->prepare
407  ("SELECT superctg_name, " .
408  "MAX(superctg_end) AS length " .
409  "FROM $source.assembly " .
410  "GROUP BY superctg_name");
411 
412  my $insert_sth = $dbh->prepare
413  ("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
414  "VALUES (?,?,?)");
415 
416  my $tmp_insert_sth = $dbh->prepare
417  ("INSERT INTO $target.tmp_superctg_map (name, new_id) VALUES (?, ?)");
418 
419  my ($name, $length);
420  $select_sth->execute();
421  $select_sth->bind_columns(\$name, \$length);
422 
423  while ($select_sth->fetch()) {
424  $insert_sth->execute($name, $cs_id, $length);
425  $tmp_insert_sth->execute($name, $insert_sth->{'mysql_insertid'});
426  }
427 
428  $select_sth->finish();
429  $insert_sth->finish();
430  $tmp_insert_sth->finish();
431 
432  return;
433 }
434 
435 
436 sub assembly_contig_chromosome {
437  my $self = shift;
438 
439  $self->debug("Building assembly table - contig/chromosome");
440 
441  my $source = $self->source();
442  my $target = $self->target();
443  my $dbh = $self->dbh();
444 
445  $dbh->do(
446  "INSERT INTO $target.assembly " .
447  "SELECT tcm.new_id, " . # asm_seq_region_id (old-new chromosome ID mapping)
448  "a.contig_id, " . # cmp_seq_region_id
449  "a.chr_start, " . # asm_start
450  "a.chr_end, " . # asm_end
451  "a.contig_start, " . # cmp_start
452  "a.contig_end, " . # cmp_end
453  "a.contig_ori " . # ori
454  "FROM $target.tmp_chr_map tcm, $source.assembly a, $source.contig c " .
455  "WHERE tcm.old_id = a.chromosome_id " .
456  "AND c.contig_id = a.contig_id "); # only copy assembly entries that
457  # refer to valid contigs (test db has
458  # superfluous assembly entries)
459 
460 }
461 
462 sub assembly_contig_clone {
463  my $self = shift;
464 
465  $self->debug("Building assembly table - contig/clone");
466 
467  my $source = $self->source();
468  my $target = $self->target();
469  my $dbh = $self->dbh();
470 
471  $dbh->do(
472  "INSERT INTO $target.assembly " .
473  "SELECT tcm.new_id, " . # asm_seq_region_id (old-new clone ID mapping)
474  "ctg.contig_id, ". # cmp_seq_region_id
475  "ctg.embl_offset, " . # asm_start
476  "ctg.embl_offset+ctg.length-1, " . # asm_end
477  "1, " . # cmp_start
478  "ctg.length, " . # cmp_end
479  "1 " . # ori - contig always positively oriented on the clone
480  "FROM $target.tmp_cln_map tcm, " .
481  "$source.clone cln, $source.contig ctg " .
482  "WHERE tcm.old_id = cln.clone_id " .
483  "AND cln.clone_id = ctg.clone_id");
484 
485 }
486 
487 
488 sub assembly_contig_supercontig {
489  my $self = shift;
490 
491  $self->debug("Building assembly table - contig/supercontig");
492 
493  my $source = $self->source();
494  my $target = $self->target();
495  my $dbh = $self->dbh();
496 
497  $dbh->do(
498  "INSERT INTO $target.assembly " .
499  "SELECT tsm.new_id, " . # asm_seq_region_id (superctg name-sr_id mapping)
500  "a.contig_id, " . # cmp_seq_region_id
501  "a.superctg_start, " . # asm_start
502  "a.superctg_end, " . # asm_end
503  "a.contig_start, " . # cmp_start
504  "a.contig_end, " . # cmp_end
505  "a.contig_ori " . # ori
506  "FROM $target.tmp_superctg_map tsm, $source.assembly a, $source.contig c ".
507  "WHERE tsm.name = a.superctg_name " .
508  "AND c.contig_id = a.contig_id "); # only copy assembly entries that
509  # refer to valid contigs (test db might
510  # have these)
511 
512 }
513 
514 
515 
516 sub assembly_supercontig_chromosome {
517  my $self = shift;
518 
519  $self->debug("Building assembly table - supercontig/chromosome");
520 
521  my $source = $self->source();
522  my $target = $self->target();
523  my $dbh = $self->dbh();
524 
525  $dbh->do(
526  "INSERT INTO $target.assembly " .
527  "SELECT tcm.new_id, " . # asm_seq_region_id (chr id)
528  "tsm.new_id, " . # cmp_seq_region_id (supercontig id)
529  "min(a.chr_start), " . # asm_start
530  "max(a.chr_end), " . # asm_end
531  "min(a.superctg_start), " . # cmp_start
532  "max(a.superctg_end), " . # cmp_end
533  "a.superctg_ori " . # ori
534  "FROM $target.tmp_superctg_map tsm, $target.tmp_chr_map tcm, " .
535  " $source.assembly a ".
536  "WHERE tsm.name = a.superctg_name " .
537  "AND tcm.old_id = a.chromosome_id " .
538  "GROUP BY superctg_name");
539 
540 }
541 
542 
543 
544 ###############################################################################
545 # Base class implementations of transfer methods. Can be overridden to
546 # create species specific behaviour
547 ###############################################################################
548 
549 
550 sub create_coord_systems {
551  my $self = shift;
552 
553  my $target = $self->target();
554  my $dbh = $self->dbh();
555 
556  my $ass_def = $self->get_default_assembly();
557 
558  my @coords =
559  (["chromosome" , $ass_def, "default_version" ,1],
560  ["supercontig", undef , "default_version" ,2],
561  ["clone" , undef , "default_version" ,3],
562  ["contig", undef , "default_version,sequence_level",4]);
563 
564  my @assembly_mappings = ("chromosome:$ass_def|contig",
565  "clone|contig",
566  "supercontig|contig",
567  "supercontig|contig|clone",
568  "chromosome:$ass_def|contig|clone",
569  "chromosome:$ass_def|contig|supercontig");
570 
571  $self->debug("Building coord_system table");
572 
573  my $sth = $dbh->prepare("INSERT INTO $target.coord_system " .
574  "(name, version, attrib,rank) VALUES (?,?,?,?)");
575 
576  my %coord_system_ids;
577 
578  foreach my $cs (@coords) {
579  $sth->execute(@$cs);
580  $coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
581  }
582  $sth->finish();
583 
584  $self->debug("Adding assembly.mapping entries to meta table");
585 
586  $sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
587  "VALUES ('assembly.mapping', ?)");
588 
589  foreach my $mapping (@assembly_mappings) {
590  $sth->execute($mapping);
591  }
592 
593  $sth->finish();
594 
595  return;
596 }
597 
598 
599 #
600 # populates the contents of the meta_coord table
601 # must be executed after all of the feature tables in the target database
602 # have already been populated
603 #
604 
605 sub create_meta_coord {
606  my $self = shift;
607 
608  $self->debug("Building meta_coord table");
609 
610  my $target = $self->target();
611  my $dbh = $self->dbh();
612 
613  my @feature_tables = qw(density_feature
614  dna_align_feature
615  exon
616  gene
617  karyotype
618  marker_feature
619  misc_feature
620  prediction_exon
621  prediction_transcript
622  protein_align_feature
623  repeat_feature
624  simple_feature
625  transcript);
626 
627  foreach my $ft (@feature_tables) {
628 
629  $dbh->do(qq{INSERT INTO $target.meta_coord(table_name, coord_system_id,
630  max_length)
631  SELECT '$ft', sr.coord_system_id,
632  MAX(f.seq_region_end - f.seq_region_start + 1)
633  FROM $target.$ft f, $target.seq_region sr
634  WHERE sr.seq_region_id = f.seq_region_id
635  GROUP BY sr.coord_system_id});
636  }
637 
638  # special case for assembly exception, features are created from both
639  # sides of table
640 
641  $dbh->do(qq{INSERT INTO $target.meta_coord
642  SELECT 'assembly_exception', sr.coord_system_id,
643  MAX(IF(ae.seq_region_end - ae.seq_region_start > ae.exc_seq_region_end - ae.exc_seq_region_start, ae.seq_region_end - ae.seq_region_start + 1, ae.exc_seq_region_end - ae.exc_seq_region_start + 1))
644  FROM $target.assembly_exception ae, $target.seq_region sr
645  WHERE sr.seq_region_id = ae.seq_region_id
646  GROUP BY sr.coord_system_id});
647 
648 }
649 
650 
651 sub create_seq_regions {
652  my $self = shift;
653 
654  my $target = $self->target();
655  my $dbh = $self->dbh();
656 
657  #default behaviour is to simply copy all tables as they come
658 
659  $self->contig_to_seq_region('contig');
660  $self->chromosome_to_seq_region();
661  $self->supercontig_to_seq_region();
662  $self->clone_to_seq_region();
663 
664  return;
665 }
666 
667 sub create_assembly {
668  my $self = shift;
669 
670  $self->assembly_contig_chromosome();
671  $self->assembly_contig_clone();
672  $self->assembly_contig_supercontig();
673 
674  return;
675 }
676 
677 sub transfer_dna {
678  my $self = shift;
679 
680  my $target = $self->target();
681  my $source = $self->source();
682  my $dbh = $self->dbh();
683 
684  $self->debug("Building dna table");
685 
686  $dbh->do("INSERT INTO $target.dna " .
687  "SELECT c.contig_id as seq_region_id, d.sequence as sequence " .
688  "FROM $source.dna d, $source.contig c " .
689  "WHERE c.dna_id = d.dna_id");
690  return;
691 }
692 
693 
694 sub transfer_genes {
695  my $self = shift;
696 
697  my $target = $self->target();
698  my $source = $self->source();
699  my $dbh = $self->dbh();
700 
701  #
702  # Transfer the gene table
703  #
704 
705  $self->debug("Building gene table");
706 
707  $dbh->do
708  ("INSERT INTO $target.gene " .
709  "SELECT g.gene_id, g.type, g.analysis_id, tcm.new_id, " .
710  "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
711  " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
712  "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
713  " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
714  " a.contig_ori*e.contig_strand as strand, " .
715  " g.display_xref_id " .
716  "FROM $source.transcript t, $source.exon_transcript et, " .
717  " $source.exon e, $source.assembly a, $source.gene g, " .
718  " $target.tmp_chr_map tcm " .
719  "WHERE t.transcript_id = et.transcript_id " .
720  "AND et.exon_id = e.exon_id " .
721  "AND e.contig_id = a.contig_id " .
722  "AND g.gene_id = t.gene_id " .
723  "AND a.chromosome_id = tcm.old_id " .
724  "GROUP BY g.gene_id");
725 
726 
727  #
728  # Transfer the transcript table
729  #
730  $self->debug("Building transcript table");
731 
732  $dbh->do
733  ("INSERT INTO $target.transcript " .
734  "SELECT t.transcript_id, t.gene_id, tcm.new_id, " .
735  "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
736  " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
737  "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
738  " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
739  " a.contig_ori*e.contig_strand as strand, " .
740  " t.display_xref_id " .
741  "FROM $source.transcript t, $source.exon_transcript et, " .
742  " $source.exon e, $source.assembly a, $target.tmp_chr_map tcm " .
743  "WHERE t.transcript_id = et.transcript_id " .
744  "AND et.exon_id = e.exon_id " .
745  "AND e.contig_id = a.contig_id " .
746  "AND a.chromosome_id = tcm.old_id " .
747  "GROUP BY t.transcript_id");
748 
749  #
750  # Transfer the exon table
751  #
752  $self->debug("Building exon table");
753 
754  $dbh->do
755  ("INSERT INTO $target.exon " .
756  "SELECT e.exon_id, tcm.new_id, " .
757  "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
758  " (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
759  "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
760  " (a.chr_start+a.contig_end-e.contig_start))) as end, " .
761  " a.contig_ori*e.contig_strand as strand, " .
762  " e.phase, e.end_phase " .
763  "FROM $source.transcript t, $source.exon_transcript et, " .
764  " $source.exon e, $source.assembly a, $source.gene g, " .
765  " $target.tmp_chr_map tcm " .
766  "WHERE t.transcript_id = et.transcript_id " .
767  "AND et.exon_id = e.exon_id " .
768  "AND e.contig_id = a.contig_id " .
769  "AND g.gene_id = t.gene_id " .
770  "AND a.chromosome_id = tcm.old_id " .
771  "GROUP BY e.exon_id");
772 
773  #
774  # Transfer translation table
775  #
776 
777  $self->debug("Building translation table");
778 
779  $dbh->do
780  ("INSERT INTO $target.translation " .
781  "SELECT tl.translation_id, ts.transcript_id, tl.seq_start, " .
782  " tl.start_exon_id, tl.seq_end, tl.end_exon_id " .
783  "FROM $source.transcript ts, $source.translation tl " .
784  "WHERE ts.translation_id = tl.translation_id");
785 
786  return;
787 }
788 
789 sub transfer_prediction_transcripts {
790  my $self = shift;
791 
792  my $source = $self->source();
793  my $target = $self->target();
794  my $dbh = $self->dbh();
795 
796  # prediction_transcript / prediction_exon
797 
798  $self->debug( "Building prediction_exon table" );
799 
800  $dbh->do
801  ("INSERT INTO $target.prediction_exon ".
802  "( prediction_transcript_id, seq_region_id, seq_region_start, " .
803  " seq_region_end, seq_region_strand, start_phase, score, p_value," .
804  " exon_rank ) " .
805  "SELECT prediction_transcript_id, contig_id, contig_start, contig_end, " .
806  " contig_strand, start_phase, score, p_value, exon_rank " .
807  "FROM $source.prediction_transcript" );
808 
809  $self->debug("Building prediction_transcript table");
810 
811  $dbh->do
812  ("INSERT INTO $target.prediction_transcript ".
813  "( prediction_transcript_id, seq_region_id, seq_region_start, " .
814  "seq_region_end, seq_region_strand, analysis_id ) " .
815  "SELECT prediction_transcript_id, contig_id, MIN(contig_start), " .
816  " MAX(contig_end), contig_strand, analysis_id ".
817  "FROM $source.prediction_transcript " .
818  "GROUP BY prediction_transcript_id ");
819 
820  return;
821 }
822 
823 
824 
825 sub transfer_features {
826  my $self = shift;
827 
828  my $target = $self->target();
829  my $source = $self->source();
830  my $dbh = $self->dbh();
831 
832  my $limit = '';
833  if($self->limit()) {
834  $limit = ' limit ' . $self->limit();
835  }
836 
837 
838  #
839  # Feature tables
840  # Note that we can just rename contig_* to set_region_* since the
841  # contig IDs were copied verbatim into seq_region
842  #
843 
844  # For some reason mysql occasionally refuses to use the index on large
845  # tables following copies like the following.
846  # So: drop the indexes first and then re-add them after
847 
848  # simple_feature
849  $self->debug("Building simple_feature table");
850  $dbh->do
851  ("INSERT INTO $target.simple_feature (simple_feature_id, seq_region_id, ".
852  " seq_region_start, seq_region_end, seq_region_strand, " .
853  " display_label, analysis_id, score) " .
854  "SELECT simple_feature_id, contig_id, contig_start, contig_end, " .
855  " contig_strand, display_label, analysis_id, score " .
856  "FROM $source.simple_feature $limit");
857 
858  # repeat_feature
859  $self->debug("Dropping indexes on repeat_feature");
860  $dbh->do("ALTER TABLE $target.repeat_feature DROP INDEX seq_region_idx");
861  $dbh->do("ALTER TABLE $target.repeat_feature DROP INDEX repeat_idx");
862  $dbh->do("ALTER TABLE $target.repeat_feature DROP INDEX analysis_idx");
863 
864  $self->debug("Building repeat_feature table");
865  $dbh->do
866  ("INSERT INTO $target.repeat_feature (repeat_feature_id, seq_region_id, " .
867  " seq_region_start, seq_region_end, seq_region_strand, analysis_id, " .
868  " repeat_start, repeat_end, repeat_consensus_id, score) " .
869  "SELECT repeat_feature_id, contig_id, contig_start, contig_end, " .
870  " contig_strand, analysis_id, repeat_start, repeat_end, " .
871  " repeat_consensus_id, score FROM $source.repeat_feature $limit");
872 
873  $self->debug("Reading indexes on repeat_feature");
874  $dbh->do("ALTER TABLE $target.repeat_feature " .
875  "ADD INDEX seq_region_idx( seq_region_id, seq_region_start)");
876  $dbh->do("ALTER TABLE $target.repeat_feature " .
877  "ADD INDEX repeat_idx( repeat_consensus_id )");
878  $dbh->do("ALTER TABLE $target.repeat_feature " .
879  "ADD INDEX analysis_idx(analysis_id)");
880 
881  # protein_align_feature
882  $self->debug("Dropping indexes on protein_align_feature");
883  $dbh->do("ALTER TABLE $target.protein_align_feature DROP INDEX hit_idx");
884  $dbh->do( "ALTER TABLE $target.protein_align_feature " .
885  "DROP INDEX seq_region_idx");
886 
887  $self->debug("Building protein_align_feature table");
888  $dbh->do( "INSERT INTO $target.protein_align_feature " .
889  " (protein_align_feature_id, seq_region_id, seq_region_start, " .
890  " seq_region_end, seq_region_strand, analysis_id, hit_start, " .
891  " hit_end, hit_name, cigar_line, evalue, perc_ident, score) " .
892  "SELECT protein_align_feature_id, contig_id, contig_start, " .
893  " contig_end, contig_strand, analysis_id, hit_start, " .
894  " hit_end, hit_name, cigar_line, evalue, perc_ident, score ".
895  "FROM $source.protein_align_feature $limit");
896 
897  $self->debug("Reading indexes on protein_align_feature");
898  $dbh->do( qq{ALTER TABLE $target.protein_align_feature
899  ADD index seq_region_idx( analysis_id, seq_region_id,
900  seq_region_start, score )});
901  $dbh->do( "ALTER TABLE $target.protein_align_feature " .
902  "ADD index hit_idx(hit_name)");
903 
904  # dna_align_feature
905  $self->debug("Dropping indexes on dna_align_feature");
906  $dbh->do( "ALTER TABLE $target.dna_align_feature DROP INDEX seq_region_idx");
907  $dbh->do( "ALTER TABLE $target.dna_align_feature DROP INDEX hit_idx");
908 
909  $self->debug("Building dna_align_feature table");
910  $dbh->do( "INSERT INTO $target.dna_align_feature " .
911  " (dna_align_feature_id, seq_region_id, seq_region_start, ".
912  " seq_region_end, seq_region_strand, analysis_id, " .
913  " hit_start, hit_end, hit_name, hit_strand, cigar_line, " .
914  " evalue, perc_ident, score) " .
915  "SELECT dna_align_feature_id, contig_id, contig_start, " .
916  " contig_end, contig_strand, analysis_id, hit_start, " .
917  " hit_end, hit_name, hit_strand, cigar_line, evalue, " .
918  " perc_ident, score FROM $source.dna_align_feature $limit");
919 
920 
921  $self->debug("Reading indexes on dna_align_feature");
922  $dbh->do( qq{ALTER TABLE $target.dna_align_feature
923  ADD INDEX seq_region_idx(seq_region_id, analysis_id,
924  seq_region_start, score)});
925  $dbh->do( "ALTER TABLE $target.dna_align_feature " .
926  "ADD index hit_idx(hit_name)");
927 
928  # marker_feature
929  $self->debug("Building marker_feature table");
930  $dbh->do( "INSERT INTO $target.marker_feature " .
931  " (marker_feature_id, marker_id, seq_region_id, " .
932  " seq_region_start, seq_region_end, analysis_id, " .
933  " map_weight) " .
934  "SELECT marker_feature_id, marker_id, contig_id, contig_start, ".
935  " contig_end, analysis_id, map_weight " .
936  "FROM $source.marker_feature $limit");
937 
938  # qtl_feature
939  # Note this uses chromosome coords so we have to join with tmp_chr_map to
940  # get the mapping
941  $self->debug("Building qtl_feature table");
942 
943  $dbh->do
944  ("INSERT INTO $target.qtl_feature( seq_region_id, seq_region_start, " .
945  " seq_region_end, qtl_id, analysis_id) " .
946  "SELECT tcm.new_id, " .
947  " q.start, q.end, q.qtl_id, q.analysis_id " .
948  "FROM $target.tmp_chr_map tcm, $source.qtl_feature q " .
949  "WHERE tcm.old_id = q.chromosome_id $limit");
950 
951  # These tables now have seq_region_* instead of chromosome_*
952 
953  $self->debug("Building karyotype table");
954  $dbh->do(
955  "INSERT INTO $target.karyotype " .
956  "SELECT null, tcm.new_id, " .
957  " k.chr_start, k.chr_end, k.band, k.stain " .
958  "FROM $target.tmp_chr_map tcm, $source.karyotype k " .
959  "WHERE tcm.old_id = k.chromosome_id $limit");
960 
961 
962  $self->debug("Building marker_map_location table");
963  $dbh->do(
964  "INSERT INTO $target.marker_map_location " .
965  "SELECT mml.marker_id, mml.map_id, " .
966  " c.name, " .
967  " mml.marker_synonym_id, mml.position, mml.lod_score " .
968  "FROM $source.chromosome c, $source.marker_map_location mml " .
969  "WHERE c.chromosome_id = mml.chromosome_id $limit");
970 
971  $self->debug( "Building misc_feature table" );
972  $dbh->do
973  ("INSERT INTO $target.misc_feature( misc_feature_id, seq_region_id, " .
974  " seq_region_start, seq_region_end, seq_region_strand ) " .
975  "SELECT m.mapfrag_id, sr.seq_region_id, m.seq_start, m.seq_end, " .
976  " m.orientation " .
977  "FROM $source.mapfrag m, $target.seq_region sr, $source.dnafrag d " .
978  "WHERE m.dnafrag_id = d.dnafrag_id " .
979  "AND d.name = sr.name $limit" );
980 
981  $self->debug( "Building misc_set table" );
982  $dbh->do
983  ("INSERT INTO $target.misc_set( misc_set_id, code, name, description, " .
984  " max_length ) " .
985  "SELECT mapset_id, code, name, description, max_length " .
986  "FROM $source.mapset ms" );
987 
988  $self->debug( "Building misc_attrib table" );
989  $dbh->do
990  ("INSERT INTO $target.misc_attrib( misc_feature_id, attrib_type_id, " .
991  " value ) ".
992  "SELECT mapfrag_id, mapannotationtype_id, value " .
993  "FROM $source.mapannotation" );
994 
995  $dbh->do
996  ("INSERT INTO $target.misc_attrib( misc_feature_id, attrib_type_id, " .
997  " value) " .
998  "SELECT mf.mapfrag_id, at.attrib_type_id, mf.name " .
999  "FROM $source.mapfrag mf, $target.attrib_type at " .
1000  "WHERE at.code = 'name'");
1001 
1002  $dbh->do
1003  ("INSERT INTO $target.misc_attrib( misc_feature_id, attrib_type_id, " .
1004  " value) " .
1005  "SELECT mf.mapfrag_id, at.attrib_type_id, mf.type " .
1006  "FROM $source.mapfrag mf, $target.attrib_type at " .
1007  "WHERE at.code = 'type'");
1008 
1009  $self->debug( "Building misc_feature_misc_set table" );
1010  $dbh->do
1011  ("INSERT INTO $target.misc_feature_misc_set(misc_feature_id, misc_set_id)".
1012  "SELECT mapfrag_id, mapset_id ".
1013  "FROM $source.mapfrag_mapset $limit" );
1014 
1015  return;
1016 }
1017 
1018 
1019 sub transfer_stable_ids {
1020  my $self = shift;
1021 
1022  my $source = $self->source();
1023  my $target = $self->target();
1024  my $dbh = $self->dbh();
1025 
1026  $self->debug("Building stable id event tables");
1027 
1028  $self->copy_tables
1029  ("stable_id_event","mapping_session","gene_archive","peptide_archive");
1030 
1031  return;
1032 }
1033 
1034 sub transfer_vega_stable_ids {
1035  my $self = shift;
1036 
1037  my $source = $self->source();
1038  my $target = $self->target();
1039  my $dbh = $self->dbh();
1040 
1041  $self->debug("Building vega_stable id event tables");
1042 
1043 
1044  $self->copy_tables
1045  ("stable_id_event","mapping_session","gene_archive","peptide_archive");
1046 
1047  return;
1048 }
1049 
1050 sub transfer_meta {
1051  my $self = shift;
1052 
1053  my $source = $self->source();
1054  my $target = $self->target();
1055 
1056  my $dbh = $self->dbh();
1057 
1058  $dbh->do("INSERT INTO $target.meta (meta_key, meta_value) " .
1059  "SELECT m.meta_key, m.meta_value FROM $source.meta m " .
1060  "ORDER BY meta_id");
1061 
1062  return;
1063 }
1064 
1065 
1066 sub copy_other_tables {
1067  my $self = shift;
1068 
1069  #xref tables
1070  $self->copy_tables("xref",
1071  "go_xref",
1072  "identity_xref",
1073  "object_xref",
1074  "external_db",
1075  "external_synonym",
1076  #marker/qtl related tables
1077  "map",
1078  "marker",
1079  "marker_synonym",
1080  "qtl",
1081  "qtl_synonym",
1082  #misc other tables
1083  "supporting_feature",
1084  "analysis",
1085  "exon_transcript",
1086  "interpro",
1087  "gene_description",
1088  "protein_feature");
1089 }
1090 
1091 
1092 sub copy_repeat_consensus {
1093  my $self = shift;
1094 
1095  my $source = $self->source();
1096  my $target = $self->target();
1097 
1098  my $dbh = $self->dbh();
1099 
1100  $self->debug("Converting repeat_consensus table.");
1101 
1102  $dbh->do("INSERT INTO $target.repeat_consensus " .
1103  "(repeat_consensus_id, repeat_name, repeat_class, repeat_type, ".
1104  " repeat_consensus) " .
1105  "SELECT repeat_consensus_id, repeat_name, repeat_class, " .
1106  " '', repeat_consensus " .
1107  "FROM $source.repeat_consensus rc" );
1108 
1109  return;
1110 }
1111 
1112 
1113 
1114 sub create_attribs {
1115  my $self = shift;
1116 
1117  my $source = $self->source();
1118  my $target = $self->target();
1119  my $dbh = $self->dbh();
1120 
1121  #copy the attrib types from the mapannotation type table
1122 
1123  $dbh->do
1124  ("INSERT INTO $target.attrib_type( attrib_type_id, code, " .
1125  " name, description ) " .
1126  "SELECT mapannotationtype_id, code, name, description " .
1127  "FROM $source.mapannotationtype " );
1128 
1129  $dbh->do
1130  ("INSERT INTO $target.attrib_type( code, name, description ) " .
1131  "VALUES ('name', 'Name',''), ('type', 'Type of feature','')");
1132 
1133  return;
1134 }
1135 
1136 
1137 #
1138 # The process of actually identifying toplevel seq_regions using the info in
1139 # the database is quite slow. Make the assumption that the coordsystem with
1140 # lowest rank value is going to have all of the toplevel seq_regions.
1141 #
1142 # This method must be overridden if alternate behaviour is required.
1143 #
1144 
1145 sub set_top_level {
1146  my $self = shift;
1147 
1148  my $target = $self->target();
1149  my $dbh = $self->dbh();
1150 
1151  my $attrib_type_id = $self->add_attrib_code();
1152 
1153  $self->debug("Setting toplevel attributes of seq_regions");
1154 
1155  my $sth = $dbh->prepare("DELETE FROM $target.seq_region_attrib " .
1156  "WHERE attrib_type_id = ?");
1157  $sth->execute($attrib_type_id);
1158  $sth->finish();
1159 
1160 
1161  $sth = $dbh->prepare("SELECT coord_system_id FROM $target.coord_system " .
1162  "ORDER BY RANK ASC LIMIT 1");
1163  $sth->execute();
1164 
1165  my ($cs_id) = $sth->fetchrow_array();
1166 
1167  $sth->finish();
1168 
1169  $sth = $dbh->prepare("INSERT INTO $target.seq_region_attrib " .
1170  '(seq_region_id, attrib_type_id, value) ' .
1171  "SELECT sr.seq_region_id, $attrib_type_id, 1 " .
1172  "FROM $target.seq_region sr " .
1173  "WHERE sr.coord_system_id = $cs_id");
1174 
1175  $sth->execute();
1176  $sth->finish();
1177 
1178 }
1179 
1180 sub add_attrib_code {
1181  my $self = shift;
1182  my $dbh = $self->dbh();
1183  my $target = $self->target();
1184 
1185  # add a toplevel code to the attrib_type table if it is not there already
1186 
1187  my $sth = $dbh->prepare("SELECT attrib_type_id " .
1188  "FROM $target.attrib_type " .
1189  "WHERE code = 'toplevel'");
1190 
1191  $sth->execute();
1192 
1193  if($sth->rows()) {
1194  my ($attrib_type_id) = $sth->fetchrow_array();
1195  $sth->finish();
1196  return $attrib_type_id;
1197  }
1198  $sth->finish();
1199 
1200 
1201  $sth = $dbh->prepare("INSERT INTO $target.attrib_type " .
1202  "SET code = 'toplevel', " .
1203  "name = 'Top Level', " .
1204  "description = 'Top Level Non-Redundant Sequence Region'");
1205 
1206  $sth->execute();
1207  my $attrib_type_id = $sth->{'mysql_insertid'};
1208  $sth->finish();
1209 
1210  return $attrib_type_id;
1211 }
1212 
1213 1;
transcript
public transcript()
map
public map()
exon
public exon()
debug
public debug()