ensembl-hive  2.7.0
manage_id_mapping_tables.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 # Don't change the above line.
18 # Change the PATH in the myManager.ksh script if you want to use another perl.
19 
20 =head1 NAME
21 
22 manage_id_mapping_tables.pl - script to delete (and optionally backup) ID
23 mapping results
24 
25 =head1 SYNOPSIS
26 
27 manage_id_mapping_tables.pl [arguments]
28 
29 Required arguments:
30 
31  --dbname, db_name=NAME database name NAME
32  --host, --dbhost, --db_host=HOST database host HOST
33  --port, --dbport, --db_port=PORT database port PORT
34  --user, --dbuser, --db_user=USER database username USER
35  --pass, --dbpass, --db_pass=PASS database passwort PASS
36 
37 Optional arguments:
38 
39  --conffile, --conf=FILE read parameters from FILE
40  (default: conf/Conversion.ini)
41 
42  --logfile, --log=FILE log to FILE (default: *STDOUT)
43  --logpath=PATH write logfile to PATH (default: .)
44  --logappend, --log_append append to logfile (default: truncate)
45  --loglevel=LEVEL define log level (default: INFO)
46 
47  -i, --interactive run script interactively (default: true)
48  -n, --dry_run, --dry don't write results to database
49  -h, --help, -? print help (this message)
50 
51 =head1 DESCRIPTION
52 
53 This script will delete stable ID mapping data from a database. The script is
54 intended to be run interactively (your configuration will be overridden).
55 
56 The tables that will be emptied are:
57 
58  gene_stable_id
59  transcript_stable_id
60  translation_stable_id
61  exon_stable_id
62  mapping_session
63  stable_id_event
64  gene_archive
65  peptide_archive
66 
67 Optionally (by interactive selection), the current tables can be backed up.
68 Backkup tables will get suffices of _bak_0, _bak_1, etc. (where the correct
69 number is determined automatically from existing backup tables). There is also
70 an option to drop existing backup tables.
71 
72 Deleting from the current tables can also be skipped, so effectively this
73 script can do three different things (or any combination of them), depending on
74 your answers in the interactive process:
75 
76  - drop existing backup tables
77  - backup current tables
78  - delete from current tables
79 
80 
81 =head1 AUTHOR
82 
83 Patrick Meidl <meidl@ebi.ac.uk>, Ensembl core API team
84 
85 =head1 CONTACT
86 
87 Please post comments/questions to the Ensembl development list
88 <http://lists.ensembl.org/mailman/listinfo/dev>
89 
90 =cut
91 
92 use strict;
93 use warnings;
94 no warnings 'uninitialized';
95 
96 use FindBin qw($Bin);
97 use Bio::EnsEMBL::Utils::ConfParser;
98 use Bio::EnsEMBL::Utils::Logger;
99 use Bio::EnsEMBL::Utils::ScriptUtils qw(user_proceed);
100 use Bio::EnsEMBL::DBSQL::DBAdaptor;
101 
102 my @tables = qw(
103  gene_stable_id
104  transcript_stable_id
105  translation_stable_id
106  exon_stable_id
107  mapping_session
108  stable_id_event
109  gene_archive
110  peptide_archive
111 );
112 
113 my %suffnum = ();
114 
115 # parse configuration and commandline arguments
116 my $conf =
117  new Bio::EnsEMBL::Utils::ConfParser(-SERVERROOT => "$Bin/../../../..",
118  -DEFAULT_CONF => "" );
119 
120 $conf->parse_options( 'host=s' => 1,
121  'port=n' => 1,
122  'user=s' => 1,
123  'pass=s' => 0,
124  'dbname=s' => 1, );
125 
126 # get log filehandle and print heading and parameters to logfile
127 my $logger =
128  new Bio::EnsEMBL::Utils::Logger(
129  -LOGFILE => $conf->param('logfile'),
130  -LOGPATH => $conf->param('logpath'),
131  -LOGAPPEND => $conf->param('logappend'),
132  -VERBOSE => $conf->param('verbose'),
133  );
134 
135 # always run interactively
136 $conf->param( 'interactive', 1 );
137 
138 # initialise log
139 $logger->init_log( $conf->list_param_values );
140 
141 # connect to database and get adaptors
142 my $dba =
143  new Bio::EnsEMBL::DBSQL::DBAdaptor( -host => $conf->param('host'),
144  -port => $conf->param('port'),
145  -user => $conf->param('user'),
146  -pass => $conf->param('pass'),
147  -dbname => $conf->param('dbname'),
148  -group => 'core', );
149 $dba->dnadb($dba);
150 
151 my $dbh = $dba->dbc->db_handle;
152 
153 # first check which tables are populated
154 &list_table_counts;
155 
156 # then look for existing backup tables
157 my $sfx = &list_backup_counts;
158 
159 # aks user if he wants to drop backup tables
160 if ( %suffnum
161  and
162  user_proceed(
163  "Drop any backup tables? (you will be able chose which ones)",
164  $conf->param('interactive'), 'n' ) )
165 {
166  &drop_backup_tables;
167 }
168 
169 # ask user if current tables should be backed up
170 if (user_proceed( "Backup current tables?", $conf->param('interactive'),
171  'y' ) )
172 {
173  &backup_tables($sfx);
174 }
175 
176 # delete from tables
177 if ( user_proceed( "Delete from current tables?",
178  $conf->param('interactive'),
179  'n' ) )
180 {
181  &delete_from_tables;
182 }
183 
184 # finish logfile
185 $logger->finish_log;
186 
187 ### END main ###
188 
189 sub list_table_counts {
190  $logger->info("Current table counts:\n\n");
191  &list_counts( [@tables] );
192 }
193 
194 sub list_backup_counts {
195  my $new_num = -1;
196 
197  foreach my $table (@tables) {
198  my $thetable = $table;
199  if ( $table =~ /^([^_]+)_stable_id/ ) {
200  $thetable = $1;
201  }
202  my $sth = $dbh->prepare(qq(SHOW TABLES LIKE "${thetable}_bak_%"));
203  $sth->execute;
204 
205  while ( my ($bak) = $sth->fetchrow_array ) {
206  if ($bak =~ /_bak_(\d+)$/) {
207  my $num = $1;
208  $suffnum{$num} = 1;
209 
210  $new_num = $num if ( $num > $new_num );
211  }
212  }
213 
214  $sth->finish;
215  }
216 
217  $logger->info("Backup tables found:\n\n") if (%suffnum);
218 
219  foreach my $num ( sort keys %suffnum ) {
220  my @t = ();
221 
222  foreach my $table (@tables) {
223  my $thetable = $table;
224  if ( $table =~ /^([^_]+)_stable_id/ ) {
225  $thetable = $1;
226  }
227  push @t, "${thetable}_bak_$num";
228  }
229 
230  &list_counts( [@t] );
231  $logger->info("\n");
232  }
233 
234  my $sfx = '_bak_' . ++$new_num;
235  return $sfx;
236 } ## end sub list_backup_counts
237 
238 sub list_counts {
239  my $tabs = shift;
240 
241  unless ( $tabs and ref($tabs) eq 'ARRAY' ) {
242  throw("Need an arrayref.");
243  }
244 
245  $logger->info( sprintf( "%-30s%-8s\n", qw(TABLE COUNT) ), 1 );
246  $logger->info( ( '-' x 38 ) . "\n", 1 );
247 
248  my $fmt = "%-30s%8d\n";
249 
250  foreach my $table (@$tabs) {
251  my $sth;
252  my $thetable = $table;
253  if ( $table =~ /^([^_]+)_stable_id/ ) {
254  $thetable = $1;
255  $sth = $dbh->prepare(
256  qq(SELECT COUNT(*) FROM $thetable WHERE stable_id IS NOT NULL));
257  }
258  else {
259  $sth = $dbh->prepare(qq(SELECT COUNT(*) FROM $thetable));
260  }
261  $sth->execute;
262  my $count = $sth->fetchrow_arrayref->[0];
263  $sth->finish;
264 
265  $logger->info( sprintf( $fmt, $thetable, $count ), 1 );
266  }
267 
268  $logger->info("\n");
269 } ## end sub list_counts
270 
271 sub drop_backup_tables {
272 
273  foreach my $num ( sort keys %suffnum ) {
274  my $suffix = "_bak_$num";
275  if ( user_proceed( qq(Drop backup tables with suffix ${suffix}?),
276  $conf->param('interactive'), 'n' ) )
277  {
278  foreach my $table (@tables) {
279  my $thetable = $table;
280  if ( $table =~ /^([^_]+)_stable_id/ ) {
281  $thetable = $1;
282  }
283  my $bak_table = "${thetable}${suffix}";
284  $logger->info( "$bak_table\n", 1 );
285  unless ( $conf->param('dry_run') ) {
286  $dbh->do(qq(DROP TABLE $bak_table));
287  }
288  }
289 
290  # remove the suffix number
291  delete $suffnum{$num};
292  }
293  }
294 
295  $logger->info("\n");
296 
297  # recalculate the suffix number to use for current backup
298  my $max_num = reverse sort keys %suffnum;
299  $sfx = '_bak_' . ++$max_num;
300 } ## end sub drop_backup_tables
301 
302 sub backup_tables {
303  my $sfx = shift;
304 
305  throw("Need a backup table suffix.") unless ( defined($sfx) );
306 
307  $logger->info(qq(\nWill use '$sfx' as suffix for backup tables\n));
308 
309  $logger->info(qq(\nBacking up tables...\n));
310 
311  my $fmt1 = "%-30s";
312  my $fmt2 = "%8d\n";
313 
314  foreach my $table (@tables) {
315  my $thetable = $table;
316  if ( $table =~ /^([^_]+)_stable_id/ ) {
317  $thetable = $1;
318  }
319  $logger->info( sprintf( $fmt1, $thetable ), 1 );
320  my $c = 0;
321  if ( !$conf->param('dry_run') &&
322  $dbh->do(qq(CREATE TABLE ${thetable}${sfx} LIKE ${thetable})) )
323  {
324  $c = $dbh->do(
325  qq(INSERT INTO ${thetable}${sfx} SELECT * FROM ${thetable}));
326  }
327  $logger->info( sprintf( $fmt2, $c ) );
328  }
329 
330  $logger->info(qq(Done.\n));
331 } ## end sub backup_tables
332 
333 sub delete_from_tables {
334  my $fmt1 = "%-30s";
335  my $fmt2 = "%8d\n";
336 
337  $logger->info(qq(\nDeleting from current tables...\n));
338 
339  foreach my $table (@tables) {
340  my $thetable = $table;
341  if ( $table =~ /^([^_]+)_stable_id/ ) {
342  $thetable = $1;
343  }
344  $logger->info( sprintf( $fmt1, $thetable ), 1 );
345  my $c = 0;
346  unless ( $conf->param('dry_run') ) {
347  if ( $table =~ /^([^_]+)_stable_id/ ) {
348  $c = $dbh->do(qq(UPDATE $thetable SET stable_id=NULL));
349  }
350  else {
351  $c = $dbh->do(qq(TRUNCATE $thetable));
352  }
353  }
354  $logger->info( sprintf( $fmt2, $c ) );
355  }
356 
357  $logger->info(qq(Done.\n));
358 } ## end sub delete_from_tables
run
public run()