ensembl-hive  2.8.1
load_resource_usage.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 
3 use strict;
4 use warnings;
5 
6  # Finding out own path in order to reference own components (including own modules):
7 use Cwd ();
8 use File::Basename ();
9 BEGIN {
10  $ENV{'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
11  unshift @INC, $ENV{'EHIVE_ROOT_DIR'}.'/modules';
12 }
13 
14 use Getopt::Long qw(:config no_auto_abbrev);
15 use Pod::Usage;
16 
20 
22 
23 main();
24 exit(0);
25 
26 
27 sub main {
28 
29  my ($url, $reg_conf, $reg_type, $reg_alias, $nosqlvc, $source_line, $username, $meadow_type, $help);
30 
31  GetOptions(
32  # connect to the database:
33  'url=s' => \$url,
34  'reg_conf|regfile=s' => \$reg_conf,
35  'reg_type=s' => \$reg_type,
36  'reg_alias|regname=s' => \$reg_alias,
37  'nosqlvc' => \$nosqlvc, # using "nosqlvc" instead of "sqlvc!" for consistency with scripts where it is a propagated option
38 
39  'username=s' => \$username, # say "-user all" if the pipeline was run by several people
40  'source_line=s' => \$source_line,
41  'meadow_type=s' => \$meadow_type,
42  'h|help' => \$help,
43  ) or die "Error in command line arguments\n";
44 
45  if (@ARGV) {
46  die "ERROR: There are invalid arguments on the command-line: ". join(" ", @ARGV). "\n";
47  }
48 
49  if ($help) {
50  pod2usage({-exitvalue => 0, -verbose => 2});
51  }
52 
53  my $hive_dba;
54  if($url or $reg_alias) {
56  -url => $url,
57  -reg_conf => $reg_conf,
58  -reg_type => $reg_type,
59  -reg_alias => $reg_alias,
60  -no_sql_schema_version_check => $nosqlvc,
61  );
62  $hive_dba->dbc->requires_write_access();
63  } else {
64  die "\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
65  }
66 
67  my $queen = $hive_dba->get_Queen;
68  my $meadow_2_pid_wid = $queen->fetch_HASHED_FROM_meadow_type_AND_meadow_name_AND_process_id_TO_worker_id();
69 
70  my $config = Bio::EnsEMBL::Hive::Utils::Config->new();
71  my $valley = Bio::EnsEMBL::Hive::Valley->new($config);
72 
73  if( $source_line ) {
74 
75  my $meadow = $valley->available_meadow_hash->{$meadow_type || ''} || $valley->get_available_meadow_list()->[0];
76  warn "Taking the resource_usage data from the source ( $source_line ), assuming Meadow ".$meadow->signature."\n";
77 
78  if(my $report_entries = $meadow->parse_report_source_line( $source_line ) ) {
79  $queen->store_resource_usage( $report_entries, $meadow_2_pid_wid->{$meadow->type}{$meadow->cached_name} );
80  }
81 
82  } else {
83  warn "Searching for Workers without known resource_usage...\n";
84 
85  my $meadow_2_interval = $queen->interval_workers_with_unknown_usage();
86 
87  foreach my $meadow (@{ $valley->get_available_meadow_list() }) {
88 
89  warn "\nFinding out the time interval when the pipeline was run on Meadow ".$meadow->signature."\n";
90 
91  if(my $our_interval = $meadow_2_interval->{ $meadow->type }{ $meadow->cached_name } ) {
92  if(my $report_entries = $meadow->get_report_entries_for_time_interval( $our_interval->{'min_submitted'}, $our_interval->{'max_died'}, $username ) ) {
93  $queen->store_resource_usage( $report_entries, $meadow_2_pid_wid->{$meadow->type}{$meadow->cached_name} );
94  }
95  } else {
96  warn "\tNothing new to store for Meadow ".$meadow->signature."\n";
97  }
98  }
99  }
100 }
101 
102 __DATA__
103 
104 =pod
105 
106 =head1 NAME
107 
108 load_resource_usage.pl
109 
110 =head1 DESCRIPTION
111 
112 This script obtains resource usage data for your pipeline from the Meadow and stores it in the C<worker_resource_usage> table.
113 Your Meadow class/plugin has to support offline examination of resources in order for this script to work.
114 
115 Based on the start time of the first Worker and end time of the last Worker (as recorded in the pipeline database),
116 it pulls the relevant data out of your Meadow (runs the C<bacct> script in case of LSF), parses the report and stores in the C<worker_resource_usage> table.
117 You can join this table to the C<worker> table USING(meadow_name,process_id) in the usual MySQL way
118 to filter by analysis_id, do various stats, etc.
119 
120 You can optionally provide an an external filename or command to get the data from it (don't forget to append a "|" to the end!)
121 and then the data will be taken from your source and parsed from there.
122 
123 =head1 USAGE EXAMPLES
124 
125  # Just run it the usual way: query and store the relevant data into "worker_resource_usage" table:
126  load_resource_usage.pl -url mysql://username:secret@hostname:port/long_mult_test
127 
128  # The same, but assuming another user "someone_else" ran the pipeline:
129  load_resource_usage.pl -url mysql://username:secret@hostname:port/long_mult_test -username someone_else
130 
131  # Assuming the dump file existed. Load the dumped bacct data into "worker_resource_usage" table:
132  load_resource_usage.pl -url mysql://username:secret@hostname:port/long_mult_test -source long_mult.bacct
133 
134  # Provide your own command to fetch and parse the worker_resource_usage data from:
135  load_resource_usage.pl -url mysql://username:secret@hostname:port/long_mult_test -source "bacct -l -C 2012/01/25/13:33,2012/01/25/14:44 |" -meadow_type LSF
136 
137 =head1 OPTIONS
138 
139 =over
140 
141 =item --help
142 
143 print this help
144 
145 =item --url <url string>
146 
147 URL defining where eHive database is located
148 
149 =item --username <username>
150 
151 if it wasn't you who ran the pipeline, the name of that user can be provided
152 
153 =item --source <filename>
154 
155 alternative source of worker_resource_usage data. Can be a filename or a pipe-from command.
156 
157 =item --meadow_type <type>
158 
159 only used when -source is given. Tells which meadow type the source filename relates to. Defaults to the first available meadow (LOCAL being considered as the last available)
160 
161 =item --nosqlvc
162 
163 "No SQL Version Check" - set if you want to force working with a database created by a potentially schema-incompatible API
164 
165 =back
166 
167 =head1 LICENSE
168 
169  See the NOTICE file distributed with this work for additional information
170  regarding copyright ownership.
171 
172  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
173  You may obtain a copy of the License at
174 
175  http://www.apache.org/licenses/LICENSE-2.0
176 
177  Unless required by applicable law or agreed to in writing, software distributed under the License
178  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
179  See the License for the specific language governing permissions and limitations under the License.
180 
181 =head1 CONTACT
182 
183 Please subscribe to the eHive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss eHive-related questions or to be notified of our updates
184 
185 =cut
186 
usage
public usage()
Bio::EnsEMBL::Hive::DBSQL::DBAdaptor::new
public new()
Bio::EnsEMBL::Hive::Utils::URL::hide_url_password
public Void hide_url_password()
Bio::EnsEMBL::Hive::Utils::URL
Definition: URL.pm:11
Bio::EnsEMBL::Hive::Utils::Config::new
public new()
Bio::EnsEMBL::Hive::Valley::new
public new()
Bio::EnsEMBL::Hive::Utils::Config
Definition: Config.pm:12
BEGIN
public BEGIN()
run
public run()
main
public main()
Bio::EnsEMBL::Hive::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:31
Bio::EnsEMBL::Hive::Valley
Definition: Valley.pm:16