6 # Finding out own path in order to reference own components (including own modules):
10 $ENV{
'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
11 unshift @INC, $ENV{
'EHIVE_ROOT_DIR'}.
'/modules';
14 use Getopt::Long qw(:config no_auto_abbrev);
24 my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc, $before_datetime, $days_ago, $help);
27 # connect to the database:
29 'reg_conf|regfile|reg_file=s' => \$reg_conf,
30 'reg_type=s' => \$reg_type,
31 'reg_alias|regname|regname=s' => \$reg_alias,
32 'nosqlvc' => \$nosqlvc, #
using "nosqlvc" instead of
"sqlvc!" for consistency with scripts where it is a propagated option
34 # specify the threshold datetime:
35 'before_datetime=s' => \$before_datetime,
36 'days_ago=f' => \$days_ago,
38 # other commands/options
40 ) or die "Error in command line arguments\n";
43 die
"ERROR: There are invalid arguments on the command-line: ". join(
" ", @ARGV).
"\n";
47 pod2usage({-exitvalue => 0, -verbose => 2});
51 if($url or $reg_alias) {
54 -reg_conf => $reg_conf,
55 -reg_type => $reg_type,
56 -reg_alias => $reg_alias,
57 -no_sql_schema_version_check => $nosqlvc,
59 $hive_dba->dbc->requires_write_access();
61 die
"\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
64 my $threshold_datetime_expression;
66 if($before_datetime) {
67 $threshold_datetime_expression =
"'$before_datetime'";
69 unless($before_datetime or $days_ago) {
70 warn
"Neither -before_datetime or -days_ago was defined, assuming '-days_ago 7'\n";
73 $threshold_datetime_expression =
"from_unixtime(unix_timestamp(now())-3600*24*$days_ago)";
79 AND j.when_completed < $threshold_datetime_expression
82 my $dbc = $hive_dba->dbc();
85 # Remove the roles that are not attached to any jobs
88 FROM role LEFT JOIN job USING (role_id)
89 WHERE job.job_id IS NULL
91 $dbc->do( $sql_roles );
93 # Remove the workers that are not attached to any roles, but only the
94 # ones that should actually have a role (e.g. have been deleted by the
98 FROM worker LEFT JOIN role USING (worker_id)
99 WHERE role.role_id IS NULL AND work_done > 0
101 $dbc->do( $sql_workers );
103 ## Remove old messages not attached to any jobs
104 my $sql_log_message = qq{
105 DELETE FROM log_message WHERE job_id IS NULL AND time < $threshold_datetime_expression
107 $dbc->do( $sql_log_message );
109 ## Remove old analysis_stats
110 my $sql_analysis_stats = qq{
111 DELETE FROM analysis_stats_monitor WHERE time < $threshold_datetime_expression
113 $dbc->do( $sql_analysis_stats );
128 hoover_pipeline.pl {-url <url> | -reg_conf <reg_conf> -reg_alias <reg_alias>} [ { -before_datetime <datetime> | -days_ago <days_ago> } ]
132 hoover_pipeline.pl is a script used to remove old
"DONE" Jobs from a continuously running pipeline database
134 =head1 USAGE EXAMPLES
136 # delete all Jobs that have been "DONE" for at least a week (default threshold) :
138 hoover_pipeline.pl -url
"mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult"
141 # delete all Jobs that have been "DONE" for at least a given number of days
143 hoover_pipeline.pl -url
"mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult" -days_ago 3
146 # delete all Jobs "DONE" before a specific datetime:
148 hoover_pipeline.pl -url
"mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult" -before_datetime
"2013-02-14 15:42:50"
154 =item --reg_conf <path>
156 path to a Registry configuration file
158 =item --reg_type <string>
160 type of the registry entry (
"hive",
"core",
"compara", etc - defaults to
"hive")
162 =item --reg_alias <string>
164 species/alias name
for the eHive DBAdaptor
166 =item --url <url string>
168 URL defining where eHive database is located
172 "No SQL Version Check" - set
if you want to force working with a database created by a potentially schema-incompatible API
174 =item --before_datetime <string>
176 delete Jobs
"DONE" before a specific time
178 =item --days_ago <num>
180 delete Jobs that have been
"DONE" for at least <num> days
184 show
this help message
190 See the NOTICE file distributed with
this work
for additional information
191 regarding copyright ownership.
193 Licensed under the Apache License, Version 2.0 (the
"License"); you may not use
this file except in compliance with the License.
194 You may obtain a copy of the License at
198 Unless required by applicable law or agreed to in writing, software distributed under the License
199 is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 See the License
for the specific language governing permissions and limitations under the License.
204 Please subscribe to the eHive mailing list: http: