ensembl-hive  2.8.1
SchemaConverter.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 # Module to ease mysql schema conversion
21 # Usage:
22 
23 # Make a schema_converter with new ( source_dbh, target_dbh )
24 # source database should be filled, target is empty schema
25 
26 
27 # For each target table there will be a transfer
28 # Either with a self specified transfer function
29 # with a custom select
30 # from a renamed source table
31 # from the same name source table
32 
33 # configure the transfer (and the order) with
34 
35 # table_rename( "oldname", "newname" )
36 # table_skip( "tablename" )
37 # do_first( "newtable1", "newtable2", "newtable3" )
38 
39 
40 # Each standard table transfer
41 # Either do a custom select statement
42 # or transfer columns with same name or renamed into each other
43 # specify columns to omit in target or get error
44 
45 # column_rename( "tablename", "oldcolname", "newcolname" )
46 # column_skip( $targetdb, "table", "column" )
47 # custom_select( $targetdb, "tablename",
48 
49 # Each row may be modified (custom select or standard select)
50 # specify a row_modifier function for the target table
51 # It takes a list ref and returns a list ref with the modified values
52 # ( you have to know the order of columns which come in have to go out db )
53 
54 # set_row_modifier( "tablename", function_reference )
55 #
56 # potentially large (target) tables should be declared with
57 # big_table( "newtablename" );
58 
59 package SchemaConverter;
60 
61 use strict;
62 use DBI;
63 use Data::Dumper;
64 
65 
66 sub new {
67  my ( $class, @args ) = @_;
68 
69  my $self = {};
70  bless $self, $class;
71 
72  $self->source_dbh( $args[0] );
73  $self->target_dbh( $args[1] );
74 
75  $self->read_dbs();
76 
77  return $self;
78 }
79 
80 sub tmp_dir {
81  my ( $self, $arg ) = @_;
82 
83  ( defined $arg ) &&
84  ( $self->{'tmp_dir'} = $arg );
85 
86  return $self->{'tmp_dir'};
87 }
88 
89 sub source_dbh {
90  my ( $self, $arg ) = @_;
91 
92  ( defined $arg ) &&
93  ( $self->{'source_dbh'} = $arg );
94 
95  return $self->{'source_dbh'};
96 }
97 
98 sub target_dbh {
99  my ( $self, $arg ) = @_;
100 
101  ( defined $arg ) &&
102  ( $self->{'target_dbh'} = $arg );
103 
104  return $self->{'target_dbh'};
105 }
106 
107 sub close_dbh {
108  my $self = shift;
109 
110  $self->source_dbh()->disconnect();
111  $self->target_dbh()->disconnect();
112 }
113 
114 
115 sub transfer {
116  my $self = shift;
117 
118  local *FH;
119  my $tmpdir;
120 
121  if( ! defined $self->tmp_dir() ) {
122  $self->close_dbh();
123  die( "No tmp_dir specified" );
124  } else {
125  $tmpdir = $self->tmp_dir();
126  }
127 
128  # first we should check if all standard trnasfers can go
129  # otherwise testing this is a pain
130  $self->check_possible_transfer();
131  my @ordered_tables = $self->{targetdb}{dofirst};
132  my %all_tables = %{$self->{targetdb}{tables}};
133  for my $tablename ( @ordered_tables ) {
134  delete $all_tables{$tablename};
135  }
136  for my $tablename ( keys %all_tables ) {
137  push( @ordered_tables, $tablename );
138  }
139 
140  for my $tablename ( @ordered_tables ) {
141  my $skip = 0;
142  print STDERR "Transfer $tablename ";
143 
144  open( FH, ">$tmpdir/$tablename.txt" ) or die "cant open dumpfile";
145 
146  if( exists $self->{targetdb}{tables}{$tablename}{transfer} ) {
147  my $transfunc = $self->{targetdb}{tables}{$tablename}{transfer};
148  &$transfunc( $self->source_dbh(), $self->target_dbh(), $tablename, \*FH );
149  } else {
150  my $sourcetable;
151  if( exists $self->{targetdb}{tables}{$tablename}{link} ) {
152  $sourcetable = $self->{targetdb}{tables}{$tablename}{link};
153  if( $sourcetable eq "" ) {
154  # skip this table
155  $skip = 1;
156  }
157  } else {
158  # find the sourcetable
159  if( exists $self->{targetdb}{tables}{$tablename}{select} ) {
160 
161  # if we have custom select, sourcetable doesnt make sense
162  $sourcetable = undef;
163 
164  } elsif( ! exists $self->{sourcedb}{tables}{$tablename} ) {
165 
166  die "Couldnt find source for $tablename. Enter empty sourcetable.";
167 
168  } else {
169  $sourcetable = $tablename;
170  }
171  }
172  if( ! $skip ) {
173  $self->standard_table_transfer( $sourcetable, $tablename, \*FH );
174  }
175  }
176 
177  close FH;
178 
179  if( ! $skip ) {
180  $self->target_dbh->do( "load data infile '$tmpdir/$tablename.txt' into table $tablename" );
181  }
182 
183  # upload ?
184  unlink "$tmpdir/$tablename.txt";
185  print STDERR " finished\n";
186  }
187 
188  # close databases
189 
190 }
191 
192 
193 # this function checks if a standard transfer is possible.
194 # custom selects and custom transfer functions are not checked
195 # it will only return if it can do the transfer otherwise die
196 
197 sub check_possible_transfer {
198  my $self = shift;
199 
200  for my $tablename ( keys %{$self->{targetdb}{tables}} ) {
201  my $sourcetable;
202 
203  if( exists $self->{targetdb}{tables}{$tablename}{transfer} ) {
204  next;
205  }
206 
207  if( exists $self->{targetdb}{tables}{$tablename}{link} ) {
208  $sourcetable = $self->{targetdb}{tables}{$tablename}{link};
209  if( $sourcetable eq "" ) {
210  # skip this table
211  next;
212  }
213  } else {
214  # find the sourcetable
215  if( exists $self->{targetdb}{tables}{$tablename}{select} ) {
216 
217  # custom select, no check
218  next;
219  } elsif( ! exists $self->{sourcedb}{tables}{$tablename} ) {
220 
221  die "Couldnt find source for $tablename. Enter empty sourcetable.";
222 
223  } else {
224  $sourcetable = $tablename;
225  }
226  }
227 
228  my @newcols = @{$self->{targetdb}{tables}{$tablename}{columns}};
229  my @oldcols = @{$self->{sourcedb}{tables}{$sourcetable}{columns}};
230 
231  my %rename;
232 
233  if( exists $self->{targetdb}{tables}{$tablename}{columnrename} ) {
234  %rename = %{$self->{targetdb}{tables}{$tablename}{columnrename}};
235  } else {
236  %rename = ();
237  }
238 
239  # find all source columns and build select statement
240 
241  for my $colname ( @newcols ) {
242  my $selname;
243 
244  if( exists $rename{$colname} ) {
245  $selname = $rename{$colname};
246  if( $selname eq "" ) {
247  $selname = "NULL";
248  }
249  } else {
250  my $colExists = 0;
251  for my $oldcol ( @oldcols ) {
252  if( $oldcol eq $colname ) {
253  $selname = $colname;
254  $colExists = 1;
255  last;
256  }
257  }
258  if( ! $colExists ) {
259  die "Couldnt fill $tablename.$colname\n";
260  }
261  }
262  }
263  }
264 }
265 
266 
267 sub standard_table_transfer {
268  my ( $self, $sourcetable, $targettable, $tmpfile ) = @_;
269 
270  my $sourcedb = $self->source_dbh();
271  my $targetdb = $self->target_dbh();
272 
273  # look for custom select
274  my $select = "";
275  if( exists $self->{targetdb}{tables}{$targettable}{select} ) {
276  $select = $self->{targetdb}{tables}{$targettable}{select};
277  } else {
278  # check if all columns have matching names
279  my @newcols = @{$self->{targetdb}{tables}{$targettable}{columns}};
280  my @oldcols = @{$self->{sourcedb}{tables}{$sourcetable}{columns}};
281 
282  my %rename;
283 
284  if( exists $self->{targetdb}{tables}{$targettable}{columnrename} ) {
285  %rename = %{$self->{targetdb}{tables}{$targettable}{columnrename}};
286  } else {
287  %rename = ();
288  }
289 
290  # find all source columns and build select statement
291 
292  for my $colname ( @newcols ) {
293  my $selname;
294 
295  if( exists $rename{$colname} ) {
296  $selname = $rename{$colname};
297  if( $selname eq "" ) {
298  $selname = "NULL";
299  }
300  } else {
301  my $colExists = 0;
302  for my $oldcol ( @oldcols ) {
303  if( $oldcol eq $colname ) {
304  $selname = $colname;
305  $colExists = 1;
306  last;
307  }
308  }
309  if( ! $colExists ) {
310  die "Couldnt fill $targettable.$colname\n";
311  }
312  }
313 
314  $select .= " $selname,";
315  }
316  chop( $select );
317  $select = "SELECT $select from $sourcetable";
318  }
319 
320  # MySQL specific ...
321  # DBD doesn't use cursors; loads whole table during execute()
322  # problem for large tables - "mysql_use_result" gets around this
323  my $sth;
324  if (defined $self->{targetdb}{tables}{$targettable}{its_a_big_un}) {
325  $sth = $self->source_dbh()->prepare( $select , { mysql_use_result => 1 } );
326  }
327  else {
328  $sth = $self->source_dbh()->prepare( $select );
329  }
330  $sth->execute();
331 
332  my $row;
333  if( exists $self->{targetdb}{tables}{$targettable}{row_modify} ) {
334  my $rowmod = $self->{targetdb}{tables}{$targettable}{row_modify};
335 
336  while( my $arref = $sth->fetchrow_arrayref() ) {
337 
338  $row = &{$rowmod}($arref);
339 
340  print $tmpfile ( join( "\t",@{$row} ),"\n" );
341  }
342  } else {
343  while( my $arref = $sth->fetchrow_arrayref() ) {
344  $row = join( "\t", @$arref );
345  print $tmpfile "$row\n";
346  }
347  }
348 }
349 
350 
351 
352 sub read_dbs {
353  my $self = shift;
354 
355  my $dbh;
356 
357  for my $db_name ('targetdb', 'sourcedb' ) {
358  if( $db_name eq 'targetdb' ) {
359  $dbh = $self->target_dbh();
360  } else {
361  $dbh = $self->source_dbh();
362  }
363 
364  my $sth = $dbh->prepare( "show tables" );
365  $sth->execute();
366 
367  while( my $arref = $sth->fetchrow_arrayref() ) {
368  $self->{$db_name}{tables}{$arref->[0]} = {};
369  }
370 
371  my @tables = keys %{$self->{$db_name}{tables}};
372  for my $table ( @tables ) {
373  $sth = $dbh->prepare( "show columns from $table" );
374  $sth->execute();
375  while( my $arref = $sth->fetchrow_arrayref () ) {
376  push( @{$self->{$db_name}{tables}{$table}{columns}}, $arref->[0] );
377  }
378  }
379  }
380 }
381 
382 
383 sub table_rename {
384  my ( $self, $oldtable, $newtable ) = @_;
385  $self->{targetdb}{tables}{$newtable}{link} = $oldtable;
386 }
387 
388 sub table_skip {
389  my ( $self, $newtable ) = @_;
390  $self->{targetdb}{tables}{$newtable}{link} = "";
391 }
392 
393 sub column_rename {
394  my ( $self, $newtable, $oldcol, $newcol ) = @_;
395  $self->{targetdb}{tables}{$newtable}{columnrename}{$newcol} = $oldcol;
396 }
397 
398 sub column_skip {
399  my ( $self, $newtable, $newcol ) = @_;
400  $self->{targetdb}{tables}{$newtable}{columnrename}{$newcol} = "";
401 }
402 
403 sub custom_select {
404  my ( $self, $newtable, $select ) = @_;
405  $self->{targetdb}{tables}{$newtable}{select} = $select;
406 }
407 
408 sub set_row_modifier {
409  my ( $self, $newtable, $row_modifier ) = @_;
410  $self->{targetdb}{tables}{$newtable}{row_modify} = $row_modifier;
411 }
412 
413 sub clear_target {
414  my $self = shift;
415  for my $tablename ( keys %{$self->{targetdb}{tables}} ) {
416  $self->target_dbh()->do( "delete from $tablename" );
417  }
418 }
419 
420 sub big_table {
421  my ( $self, $table ) = @_;
422 
423  $self->{targetdb}{tables}{$table}{its_a_big_un} = 1;
424 }
425 
426 
427 sub do_first {
428  my $self = shift;
429  my @ordered_table_list= @_;
430  $self->{targetdb}{dofirst} = \@ordered_table_list;
431 }
432 
433 1;
SchemaConverter::tmp_dir
public tmp_dir()