ensembl-hive  2.7.0
hoover_pipeline.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 
3 use strict;
4 use warnings;
5 
6  # Finding out own path in order to reference own components (including own modules):
7 use Cwd ();
8 use File::Basename ();
9 BEGIN {
10  $ENV{'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
11  unshift @INC, $ENV{'EHIVE_ROOT_DIR'}.'/modules';
12 }
13 
14 use Getopt::Long qw(:config no_auto_abbrev);
15 use Pod::Usage;
16 
20 
22 
23 sub main {
24  my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc, $before_datetime, $days_ago, $help);
25 
26  GetOptions(
27  # connect to the database:
28  'url=s' => \$url,
29  'reg_conf|regfile|reg_file=s' => \$reg_conf,
30  'reg_type=s' => \$reg_type,
31  'reg_alias|regname|regname=s' => \$reg_alias,
32  'nosqlvc' => \$nosqlvc, # using "nosqlvc" instead of "sqlvc!" for consistency with scripts where it is a propagated option
33 
34  # specify the threshold datetime:
35  'before_datetime=s' => \$before_datetime,
36  'days_ago=f' => \$days_ago,
37 
38  # other commands/options
39  'h|help!' => \$help,
40  ) or die "Error in command line arguments\n";
41 
42  if (@ARGV) {
43  die "ERROR: There are invalid arguments on the command-line: ". join(" ", @ARGV). "\n";
44  }
45 
46  if ($help) {
47  pod2usage({-exitvalue => 0, -verbose => 2});
48  }
49 
50  my $hive_dba;
51  if($url or $reg_alias) {
53  -url => $url,
54  -reg_conf => $reg_conf,
55  -reg_type => $reg_type,
56  -reg_alias => $reg_alias,
57  -no_sql_schema_version_check => $nosqlvc,
58  );
59  $hive_dba->dbc->requires_write_access();
60  } else {
61  die "\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
62  }
63 
64  my $threshold_datetime_expression;
65 
66  if($before_datetime) {
67  $threshold_datetime_expression = "'$before_datetime'";
68  } else {
69  unless($before_datetime or $days_ago) {
70  warn "Neither -before_datetime or -days_ago was defined, assuming '-days_ago 7'\n";
71  $days_ago = 7;
72  }
73  $threshold_datetime_expression = "from_unixtime(unix_timestamp(now())-3600*24*$days_ago)";
74  }
75 
76  my $sql = qq{
77  DELETE j FROM job j
78  WHERE j.status='DONE'
79  AND j.when_completed < $threshold_datetime_expression
80  };
81 
82  my $dbc = $hive_dba->dbc();
83  $dbc->do( $sql );
84 
85  # Remove the roles that are not attached to any jobs
86  my $sql_roles = q{
87  DELETE role
88  FROM role LEFT JOIN job USING (role_id)
89  WHERE job.job_id IS NULL
90  };
91  $dbc->do( $sql_roles );
92 
93  # Remove the workers that are not attached to any roles, but only the
94  # ones that should actually have a role (e.g. have been deleted by the
95  # above statement).
96  my $sql_workers = q{
97  DELETE worker
98  FROM worker LEFT JOIN role USING (worker_id)
99  WHERE role.role_id IS NULL AND work_done > 0
100  };
101  $dbc->do( $sql_workers );
102 
103  ## Remove old messages not attached to any jobs
104  my $sql_log_message = qq{
105  DELETE FROM log_message WHERE job_id IS NULL AND time < $threshold_datetime_expression
106  };
107  $dbc->do( $sql_log_message );
108 
109  ## Remove old analysis_stats
110  my $sql_analysis_stats = qq{
111  DELETE FROM analysis_stats_monitor WHERE time < $threshold_datetime_expression
112  };
113  $dbc->do( $sql_analysis_stats );
114 }
115 
116 main();
117 
118 __DATA__
119 
120 =pod
121 
122 =head1 NAME
123 
124 hoover_pipeline.pl
125 
126 =head1 SYNOPSIS
127 
128  hoover_pipeline.pl {-url <url> | -reg_conf <reg_conf> -reg_alias <reg_alias>} [ { -before_datetime <datetime> | -days_ago <days_ago> } ]
129 
130 =head1 DESCRIPTION
131 
132 hoover_pipeline.pl is a script used to remove old "DONE" Jobs from a continuously running pipeline database
133 
134 =head1 USAGE EXAMPLES
135 
136  # delete all Jobs that have been "DONE" for at least a week (default threshold) :
137 
138  hoover_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult"
139 
140 
141  # delete all Jobs that have been "DONE" for at least a given number of days
142 
143  hoover_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult" -days_ago 3
144 
145 
146  # delete all Jobs "DONE" before a specific datetime:
147 
148  hoover_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult" -before_datetime "2013-02-14 15:42:50"
149 
150 =head1 OPTIONS
151 
152 =over
153 
154 =item --reg_conf <path>
155 
156 path to a Registry configuration file
157 
158 =item --reg_type <string>
159 
160 type of the registry entry ("hive", "core", "compara", etc - defaults to "hive")
161 
162 =item --reg_alias <string>
163 
164 species/alias name for the eHive DBAdaptor
165 
166 =item --url <url string>
167 
168 URL defining where eHive database is located
169 
170 =item --nosqlvc
171 
172 "No SQL Version Check" - set if you want to force working with a database created by a potentially schema-incompatible API
173 
174 =item --before_datetime <string>
175 
176 delete Jobs "DONE" before a specific time
177 
178 =item --days_ago <num>
179 
180 delete Jobs that have been "DONE" for at least <num> days
181 
182 =item -h, --help
183 
184 show this help message
185 
186 =back
187 
188 =head1 LICENSE
189 
190  See the NOTICE file distributed with this work for additional information
191  regarding copyright ownership.
192 
193  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
194  You may obtain a copy of the License at
195 
196  http://www.apache.org/licenses/LICENSE-2.0
197 
198  Unless required by applicable law or agreed to in writing, software distributed under the License
199  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200  See the License for the specific language governing permissions and limitations under the License.
201 
202 =head1 CONTACT
203 
204 Please subscribe to the eHive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss eHive-related questions or to be notified of our updates
205 
206 =cut
207 
Bio::EnsEMBL::Hive::DBSQL::DBAdaptor::new
public new()
Bio::EnsEMBL::Hive::Utils::URL::hide_url_password
public Void hide_url_password()
Bio::EnsEMBL::Hive::Utils::URL
Definition: URL.pm:11
BEGIN
public BEGIN()
Bio::EnsEMBL::Hive::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:31
main
public main()
Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor
Definition: AnalysisJobAdaptor.pm:22