ensembl-hive  2.7.0
sort_feature_table.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 
18 # Designed to sort a feature table by seq_region_id, start and end
19 
20 use strict;
21 use warnings;
22 
24 use Getopt::Long;
25 use POSIX qw/strftime/;
26 
27 my ($db_name,$db_host,$db_user,$db_pass,$db_port,$help);
28 my @tables;
29 my @columns;
30 my ($optimise, $nobackup, $nolock);
31 
32 GetOptions ("db_name|dbname|database=s" => \$db_name,
33  "db_host|dbhost|host=s" => \$db_host,
34  "db_user|dbuser|user|username=s" => \$db_user,
35  "db_pass|dbpass|pass|password=s" => \$db_pass,
36  "db_port|dbport|port=s" => \$db_port,
37  'table|tables=s@' => \@tables,
38  'optimise!' => \$optimise,
39  'nolock!' => \$nolock,
40  'nobackup!' => \$nobackup,
41  'columns=s@' => \@columns,
42  "h|help!" => \$help,
43 );
44 
45 if ($help) {&usage; exit 0;}
46 unless ($db_name and $db_host) {print "Insufficient arguments\n"; &usage; exit 1;}
47 
48 if(!@columns) {
49  @columns = qw/seq_region_id seq_region_start seq_region_end/;
50 }
51 
52 sub get_adaptor {
54  -species => 'tmp',
55  -group => 'db',
56  -dbname => $db_name,
57  -host => $db_host,
58  -user => $db_user,
59  -port => $db_port
60  );
61  $dba->dbc->password($db_pass) if $db_pass;
62  return $dba;
63 }
64 
65 # see bottom of file for this method call
66 sub run {
67  my $dba = get_adaptor();
68  foreach my $table (@tables) {
69  info('Processing %s', $table);
70  sort_table($table, $dba);
71  optimise_table($table, $dba) if $optimise;
72  }
73  return;
74 }
75 
76 sub sort_table {
77  my ($table, $dba) = @_;
78  info("Starting sort");
79  my $s_table = $table.'_sorted';
80 
81  if(!$nolock) {
82  info("Locking table %s", $table);
83  $dba->dbc()->do("lock tables ${table} write");
84  }
85 
86  if(!$nobackup) {
87  backup_table($table);
88  }
89 
90  info("Re-ordering table");
91  my $cols = join(',', @columns);
92  $dba->dbc()->do("ALTER TABLE ${table} ORDER BY $cols");
93 
94  if(!$nolock) {
95  info("Unlocking table %s", $table);
96  $dba->dbc()->do("unlock tables");
97  }
98 
99  info("Done");
100  return;
101 }
102 
103 sub optimise_table {
104  my ($table, $dba) = @_;
105  info("Optimising table");
106  $dba->dbc()->do("OPTIMIZE TABLE ${table}");
107  info("Done");
108  return;
109 }
110 
111 sub backup_table {
112  my ($table, $dba) = @_;
113  my $bak_name = _next_name($table, $dba);
114  info("Backing up table to $bak_name");
115  $dba->dbc()->do("create table ${bak_name} like ${table}");
116  $dba->dbc()->do("alter table ${bak_name} disable keys");
117  $dba->dbc()->do("insert into ${bak_name} select * from ${table}");
118  $dba->dbc()->do("alter table ${bak_name} enable keys");
119  info("Finished backing up");
120  return $bak_name;
121 }
122 
123 sub _next_name {
124  my ($table, $dba) = @_;
125  my $count = 0;
126  my $new_name;
127  while (1) {
128  $new_name = $count == 0 ? "${table}_bak" : "${table}_bak_${count}";
129  my $r = $dba->dbc()->sql_helper()->execute(
130  -SQL => 'show tables like ?', -PARAMS => [$new_name]
131  );
132  last if scalar(@{$r}) == 0;
133  $count++;
134  }
135  return $new_name;
136 }
137 
138 sub info {
139  my ($msg, @args) = @_;
140  my $m = sprintf $msg, @args;
141  my $time = strftime('%c',localtime());
142  printf STDERR '[%s] %s', $time, $m;
143  print STDERR "\n";
144  return;
145 }
146 
147 sub usage {
148  print "Description:
149 
150 Sort a feature table by seq_region_id, seq_region_start. Also run optimise
151 if specified. Holds a backup of the table that has just been sorted
152 unless told otherwise.
153 
154 Synopsis:
155 
156  perl sort_feature_table.pl -db_name NAME
157 
158 Options:
159 
160  -db_name The DB to add these features to
161  -database
162  -dbname
163 
164  -db_host Hostname for the DB
165  -host
166  -dbhost
167 
168  -db_user Username for the DB
169  -user
170  -username
171  -dbuser
172 
173  -db_pass Password for the DB
174  -pass
175  -password
176  -dbpass
177 
178  -db_port Port for the DB
179  -dbport
180  -port
181 
182  -table The table(s) to sort. Specify multiple parameters
183  -tables
184 
185  -optimise Optimise the table post sort
186 
187  -nobackup Do not backup the original table
188 
189  -nolock Stop the code from applying for table locks
190 
191  -columns Specify the columns to sort on. Defaults to
192  seq_region_id, seq_region_start and seq_region_end.
193  Multiple parameters allowed
194 
195  -help
196 ";
197 }
198 
199 run();
optimise_table
public optimise_table()
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
run
public run()
backup_table
public backup_table()
Bio::EnsEMBL::DBSQL::DBAdaptor::dbc
public Bio::EnsEMBL::DBSQL::DBConnection dbc()
info
public info()
sort_table
public sort_table()
_next_name
protected _next_name()
Bio::EnsEMBL::DBSQL::DBConnection::password
public String password()
get_adaptor
public get_adaptor()
Bio::EnsEMBL::DBSQL::DBAdaptor::new
public Bio::EnsEMBL::DBSQL::DBAdaptor new()
usage
public usage()