ensembl-hive  2.8.1
hoover_pipeline.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 
3 use strict;
4 use warnings;
5 
6  # Finding out own path in order to reference own components (including own modules):
7 use Cwd ();
8 use File::Basename ();
9 BEGIN {
10  $ENV{'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
11  unshift @INC, $ENV{'EHIVE_ROOT_DIR'}.'/modules';
12 }
13 
14 use Getopt::Long qw(:config no_auto_abbrev);
15 use Pod::Usage;
16 
20 
22 
23 sub main {
24  my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc, $before_datetime, $days_ago, $help);
25 
26  GetOptions(
27  # connect to the database:
28  'url=s' => \$url,
29  'reg_conf|regfile|reg_file=s' => \$reg_conf,
30  'reg_type=s' => \$reg_type,
31  'reg_alias|regname|regname=s' => \$reg_alias,
32  'nosqlvc' => \$nosqlvc, # using "nosqlvc" instead of "sqlvc!" for consistency with scripts where it is a propagated option
33 
34  # specify the threshold datetime:
35  'before_datetime=s' => \$before_datetime,
36  'days_ago=f' => \$days_ago,
37 
38  # other commands/options
39  'h|help!' => \$help,
40  ) or die "Error in command line arguments\n";
41 
42  if (@ARGV) {
43  die "ERROR: There are invalid arguments on the command-line: ". join(" ", @ARGV). "\n";
44  }
45 
46  if ($help) {
47  pod2usage({-exitvalue => 0, -verbose => 2});
48  }
49 
50  my $hive_dba;
51  if($url or $reg_alias) {
53  -url => $url,
54  -reg_conf => $reg_conf,
55  -reg_type => $reg_type,
56  -reg_alias => $reg_alias,
57  -no_sql_schema_version_check => $nosqlvc,
58  );
59  $hive_dba->dbc->requires_write_access();
60  } else {
61  die "\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
62  }
63 
64  my $threshold_datetime_expression;
65 
66  if($before_datetime) {
67  $threshold_datetime_expression = "'$before_datetime'";
68  } else {
69  unless($before_datetime or $days_ago) {
70  warn "Neither -before_datetime or -days_ago was defined, assuming '-days_ago 7'\n";
71  $days_ago = 7;
72  }
73  $threshold_datetime_expression = "from_unixtime(unix_timestamp(now())-3600*24*$days_ago)";
74  }
75 
76  my $sql = qq{
77  DELETE j FROM job j
78  WHERE j.status='DONE'
79  AND j.when_completed < $threshold_datetime_expression
80  };
81 
82  my $dbc = $hive_dba->dbc();
83  $dbc->do( $sql );
84 
85  # Remove the roles that are not attached to any jobs
86  my $sql_roles = q{
87  DELETE role
88  FROM role LEFT JOIN job USING (role_id)
89  WHERE job.job_id IS NULL
90  };
91  $dbc->do( $sql_roles );
92 
93  # Remove the workers that are not attached to any roles, but only the
94  # ones that should actually have a role (e.g. have been deleted by the
95  # above statement).
96  my $sql_workers = q{
97  DELETE worker
98  FROM worker LEFT JOIN role USING (worker_id)
99  WHERE role.role_id IS NULL AND work_done > 0
100  };
101  $dbc->do( $sql_workers );
102 
103  ## Remove old messages not attached to any jobs
104  my $sql_log_message = qq{
105  DELETE FROM log_message WHERE job_id IS NULL AND time < $threshold_datetime_expression
106  };
107  $dbc->do( $sql_log_message );
108 }
109 
110 main();
111 
112 __DATA__
113 
114 =pod
115 
116 =head1 NAME
117 
118 hoover_pipeline.pl
119 
120 =head1 SYNOPSIS
121 
122  hoover_pipeline.pl {-url <url> | -reg_conf <reg_conf> -reg_alias <reg_alias>} [ { -before_datetime <datetime> | -days_ago <days_ago> } ]
123 
124 =head1 DESCRIPTION
125 
126 hoover_pipeline.pl is a script used to remove old "DONE" Jobs from a continuously running pipeline database
127 
128 =head1 USAGE EXAMPLES
129 
130  # delete all Jobs that have been "DONE" for at least a week (default threshold) :
131 
132  hoover_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult"
133 
134 
135  # delete all Jobs that have been "DONE" for at least a given number of days
136 
137  hoover_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult" -days_ago 3
138 
139 
140  # delete all Jobs "DONE" before a specific datetime:
141 
142  hoover_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_long_mult" -before_datetime "2013-02-14 15:42:50"
143 
144 =head1 OPTIONS
145 
146 =over
147 
148 =item --reg_conf <path>
149 
150 path to a Registry configuration file
151 
152 =item --reg_type <string>
153 
154 type of the registry entry ("hive", "core", "compara", etc - defaults to "hive")
155 
156 =item --reg_alias <string>
157 
158 species/alias name for the eHive DBAdaptor
159 
160 =item --url <url string>
161 
162 URL defining where eHive database is located
163 
164 =item --nosqlvc
165 
166 "No SQL Version Check" - set if you want to force working with a database created by a potentially schema-incompatible API
167 
168 =item --before_datetime <string>
169 
170 delete Jobs "DONE" before a specific time
171 
172 =item --days_ago <num>
173 
174 delete Jobs that have been "DONE" for at least <num> days
175 
176 =item -h, --help
177 
178 show this help message
179 
180 =back
181 
182 =head1 LICENSE
183 
184  See the NOTICE file distributed with this work for additional information
185  regarding copyright ownership.
186 
187  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
188  You may obtain a copy of the License at
189 
190  http://www.apache.org/licenses/LICENSE-2.0
191 
192  Unless required by applicable law or agreed to in writing, software distributed under the License
193  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
194  See the License for the specific language governing permissions and limitations under the License.
195 
196 =head1 CONTACT
197 
198 Please subscribe to the eHive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss eHive-related questions or to be notified of our updates
199 
200 =cut
201 
Bio::EnsEMBL::Hive::DBSQL::DBAdaptor::new
public new()
Bio::EnsEMBL::Hive::Utils::URL::hide_url_password
public Void hide_url_password()
Bio::EnsEMBL::Hive::Utils::URL
Definition: URL.pm:11
BEGIN
public BEGIN()
Bio::EnsEMBL::Hive::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:31
main
public main()
Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor
Definition: AnalysisJobAdaptor.pm:22