ensembl-hive  2.7.0
generate_timeline.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 
3 # Gets the activity of each Analysis along time, in a CSV file or in an image (see list of formats supported by GNUplot)
4 
5 use strict;
6 use warnings;
7 
8  # Finding out own path in order to reference own components (including own modules):
9 use Cwd ();
10 use File::Basename ();
11 BEGIN {
12  $ENV{'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
13  unshift @INC, $ENV{'EHIVE_ROOT_DIR'}.'/modules';
14 }
15 
16 use Getopt::Long qw(:config no_auto_abbrev);
17 use List::Util qw(sum);
18 use POSIX;
19 use Pod::Usage;
20 use Data::Dumper;
21 use Scalar::Util qw(looks_like_number);
22 use Time::Piece;
23 use Time::Seconds; # not sure if seconds-only arithmetic also needs it
24 
27 
28 no warnings qw{qw};
29 
31 
32 # This replaces "when_died" when a role is still active
33 my $now = localtime;
34 # To compare things to 0
35 my $rounding_error_threshold = 0.005;
36 
37 main();
38 exit(0);
39 
40 sub main {
41 
42  my (@urls, $reg_conf, $reg_type, $reg_alias, $nosqlvc, $help, $verbose, $mode, $start_date, $end_date, $output, $top, $default_memory, $default_cores, $key, $key_transform_file, $resolution);
43 
44  GetOptions(
45  # connect to the database:
46  'url=s@' => \@urls,
47  'reg_conf|regfile|reg_file=s' => \$reg_conf,
48  'reg_type=s' => \$reg_type,
49  'reg_alias|regname|reg_name=s' => \$reg_alias,
50  'nosqlvc' => \$nosqlvc, # using "nosqlvc" instead of "sqlvc!" for consistency with scripts where it is a propagated option
51 
52  # miscellaneous options
53  'verbose!' => \$verbose,
54  'h|help' => \$help,
55 
56  # output control
57  'start_date=s' => \$start_date,
58  'end_date=s' => \$end_date,
59  'mode=s' => \$mode,
60  'key=s' => \$key,
61  'key_transform_file=s' => \$key_transform_file,
62  'resolution=i' => \$resolution,
63  'top=f' => \$top,
64  'mem=i' => \$default_memory,
65  'n_core=i' => \$default_cores,
66  'output=s' => \$output,
67  ) or die "Error in command line arguments\n";
68 
69  if (@ARGV) {
70  die "ERROR: There are invalid arguments on the command-line: ". join(" ", @ARGV). "\n";
71  }
72 
73  if ($help) {
74  pod2usage({-exitvalue => 0, -verbose => 2});
75  }
76 
77  my @pipelines;
78  foreach my $url (@urls) {
79  push @pipelines, Bio::EnsEMBL::Hive::HivePipeline->new(
80  -url => $url,
81  -no_sql_schema_version_check => $nosqlvc,
82  );
83  }
84  if ($reg_alias) {
85  push @pipelines, Bio::EnsEMBL::Hive::HivePipeline->new(
86  -reg_conf => $reg_conf,
87  -reg_type => $reg_type,
88  -reg_alias => $reg_alias,
89  -no_sql_schema_version_check => $nosqlvc,
90  );
91  }
92  unless (@pipelines) {
93  die "\nERROR: Connection parameters (url or reg_conf+reg_alias) need to be specified\n";
94  }
95 
96  # Check whether $mode is valid
97  my %allowed_modes = (
98  workers => 'Number of Workers',
99  memory => 'Memory asked / unused (Gb)',
100  cores => 'Number of CPU cores asked / unused',
101  pending_workers => 'Number of pending Workers',
102  pending_time => 'Average instantaneous pending time (min.)',
103  );
104  if ($mode) {
105  die "Unknown mode '$mode'. Allowed modes are: ".join(", ", keys %allowed_modes) unless exists $allowed_modes{$mode};
106  $default_memory = 100 unless $default_memory;
107  $default_cores = 1 unless $default_cores;
108  } else {
109  $mode = 'workers';
110  }
111 
112  # Check whether $key is valid
113  my %allowed_keys = (
114  analysis => 'Analysis',
115  resource_class => 'Resource Class',
116  );
117  if ($key) {
118  die "Unknown key '$key'. Allowed keys are: ".join(", ", keys %allowed_keys) unless exists $allowed_keys{$key};
119  # Check whether the pair ($mode,$key) makes sense
120  if (($mode =~ /^pending/) and ($key eq 'analysis')) {
121  die "Timeline of pending workers can only be represented by resource-class, not analysis";
122  }
123 
124  } elsif ($mode =~ /^pending/) {
125  $key = 'resource_class';
126 
127  } else {
128  $key = 'analysis';
129  }
130 
131  # Custom key transformations (categories)
132  if ($key_transform_file) {
133  unless (-e $key_transform_file) {
134  die "File '$key_transform_file' doesn't exist";
135  }
136  # "do" does some trick with @INC unless the path starts with one of qw(/ ./ ../)
137  $key_transform_file = "./$key_transform_file" unless $key_transform_file =~ /^.?.?\//;
138  do $key_transform_file;
139  unless (defined &get_key_name) {
140  die "'$key_transform_file' doesn't contain a function named 'get_key_name'";
141  }
142  }
143 
144  # Durations are rounded up to a multiple of this (number of minutes)
145  $resolution ||= 1;
146 
147  # Palette generated with R: c(brewer.pal(9, "Set1"), brewer.pal(12, "Set3")). #FFFFB3 is removed because it is too close to white
148  my @palette = qw(#E41A1C #377EB8 #4DAF4A #984EA3 #FF7F00 #FFFF33 #A65628 #F781BF #999999 #8DD3C7 #BEBADA #FB8072 #80B1D3 #FDB462 #B3DE69 #FCCDE5 #D9D9D9 #BC80BD #CCEBC5 #FFED6F #2F4F4F);
149 
150  my %terminal_mapping = (
151  'emf' => 'emf noenhanced',
152  'png' => 'png noenhanced',
153  'svg' => 'svg noenhanced',
154  'jpg' => 'jpeg noenhanced',
155  'gif' => 'gif noenhanced',
156  'ps' => 'postscript eps noenhanced colour',
157  'pdf' => 'pdf colour noenhanced',
158  );
159  my $gnuplot_terminal = undef;
160  if ($output and $output =~ /\.(\w+)$/) {
161  $gnuplot_terminal = $1;
162  die "The format '$gnuplot_terminal' is not currently supported." if not exists $terminal_mapping{$gnuplot_terminal};
163  require Chart::Gnuplot;
164 
165  }
166 
167  # Get the memory usage from each resource_class
168  my %mem_resources = ();
169  my %cpu_resources = ();
170  foreach my $pipeline (@pipelines) {
171  my $hive_dbc = $pipeline->hive_dba->dbc;
172  foreach my $rd ($pipeline->collection_of('ResourceDescription')->list) {
173  if ($rd->meadow_type eq 'LSF') {
174  $mem_resources{"$pipeline..".$rd->resource_class_id} = $1 if $rd->submission_cmd_args =~ m/mem=(\d+)/;
175  $cpu_resources{"$pipeline..".$rd->resource_class_id} = $1 if $rd->submission_cmd_args =~ m/-n\s*(\d+)/;
176  }
177  }
178  }
179  warn "mem_resources: ", Dumper \%mem_resources if $verbose;
180  warn "cpu_resources: ", Dumper \%cpu_resources if $verbose;
181 
182  my $additive_layer = (($mode eq 'memory') or ($mode eq 'cores')) ? 1 : 0;
183 
184  # Get the resource usage information of each worker
185  my %used_res = ();
186  if (($mode eq 'memory') or ($mode eq 'cores') or ($mode eq 'pending_workers') or ($mode eq 'pending_time')) {
187  foreach my $pipeline (@pipelines) {
188  my $hive_dbc = $pipeline->hive_dba->dbc;
189  my $sql_used_res = 'SELECT worker_id, mem_megs, cpu_sec/lifespan_sec FROM worker_resource_usage';
190  foreach my $db_entry (@{$hive_dbc->selectall_arrayref($sql_used_res)}) {
191  my $worker_id = shift @$db_entry;
192  $used_res{"$pipeline..$worker_id"} = $db_entry;
193  }
194  warn scalar(keys %used_res), " Worker info loaded from worker_resource_usage\n" if $verbose;
195  }
196  }
197 
198  # Get the info about the analysis
199  my %default_resource_class;
200  foreach my $pipeline (@pipelines) {
201  $default_resource_class{"$pipeline..".$_->dbID} = $_->resource_class_id for $pipeline->collection_of('Analysis')->list;
202  }
203  warn "default_resource_class: ", Dumper \%default_resource_class if $verbose;
204  my %key_name_mapping;
205  foreach my $pipeline (@pipelines) {
206  foreach my $key_object ($pipeline->collection_of($key eq 'analysis' ? 'Analysis' : 'ResourceClass')->list) {
207  my $key_id = "$pipeline..".$key_object->dbID;
208  my $key_name = $key_transform_file ? get_key_name($key_object) : $key_object->display_name;
209  die "No key name for ".$key_object->toString unless $key_name;
210  $key_name_mapping{$key_id} = $key_name;
211  }
212  $key_name_mapping{"$pipeline..-1"} = 'UNSPECIALIZED';
213  }
214  my %unique_key_names = map {$_ => 1} values %key_name_mapping;
215  my @key_names = keys %unique_key_names;
216  warn scalar(keys %key_name_mapping), " keys: ", Dumper \%key_name_mapping if $verbose;
217 
218  # Get the events from the database
219  my %events = ();
220  my %layers = ();
221  foreach my $pipeline (@pipelines) {
222  my $hive_dbc = $pipeline->hive_dba->dbc;
223  my $sql = $key eq 'analysis'
224  ? 'SELECT when_submitted, when_started, when_finished, worker_id, resource_class_id, analysis_id FROM worker LEFT JOIN role USING (worker_id)'
225  : 'SELECT when_submitted, when_born, when_died, worker_id, resource_class_id FROM worker';
226  my @tmp_dates = @{$hive_dbc->selectall_arrayref($sql)};
227  warn scalar(@tmp_dates), " rows in ", $hive_dbc->dbname, "\n" if $verbose;
228 
229  foreach my $db_entry (@tmp_dates) {
230  my ($when_submitted, $when_born, $when_died, $worker_id, $resource_class_id, $analysis_id) = @$db_entry;
231 
232  # Workers that are submitted but not yet born
233  next unless $when_born;
234 
235  # In case $resource_class_id is undef
236  next unless $resource_class_id or $analysis_id;
237  $resource_class_id //= $default_resource_class{"$pipeline..$analysis_id"};
238  my $key_id = "$pipeline.." . (($key eq 'analysis' ? $analysis_id : $resource_class_id) // -1);
239  my $key_name = $key_name_mapping{$key_id};
240  $resource_class_id = "$pipeline..$resource_class_id";
241  $worker_id = "$pipeline..$worker_id";
242 
243  if ($mode eq 'workers') {
244  add_event(\%events, $key_name, $when_born, $when_died, 1, $resolution);
245 
246  } elsif ($mode eq 'memory') {
247  my $offset = ($mem_resources{$resource_class_id} || $default_memory) / 1024.;
248  add_event(\%events, $key_name, $when_born, $when_died, $offset, $resolution);
249  $offset = ($used_res{$worker_id}->[0]) / 1024. if exists $used_res{$worker_id} and $used_res{$worker_id}->[0];
250  add_event(\%layers, $key_name, $when_born, $when_died, $offset, $resolution);
251 
252  } elsif ($mode eq 'cores') {
253  my $offset = ($cpu_resources{$resource_class_id} || $default_cores);
254  add_event(\%events, $key_name, $when_born, $when_died, $offset, $resolution);
255  $offset = $used_res{$worker_id}->[1] if exists $used_res{$worker_id} and $used_res{$worker_id}->[1];
256  add_event(\%layers, $key_name, $when_born, $when_died, $offset, $resolution);
257  } else {
258  add_event(\%events, $key_name, $when_submitted, $when_born, 1, $resolution);
259  add_event(\%layers, $key_name, $when_submitted, $when_born, 'length_by_60', $resolution);
260  }
261  }
262  $hive_dbc->disconnect_if_idle;
263  }
264  warn "Events recorded: ", scalar(keys %events), " ", scalar(keys %layers), "\n" if $verbose;
265 
266  my @event_dates = sort {$a cmp $b} (keys %events);
267 
268  my $time_samples_data = cumulate_events(\%events, \@key_names, $start_date, $end_date, \%events, $verbose);
269  my %tot_analysis = %{$time_samples_data->[0]};
270  my @xdata = map {$_->[0]} @{$time_samples_data->[1]};
271  my @data_timings = map {$_->[1]} @{$time_samples_data->[1]};
272  my $max_workers = $time_samples_data->[2];
273 
274  my $total_total = sum(values %tot_analysis);
275 
276  my @sorted_key_ids = sort {($tot_analysis{$b} <=> $tot_analysis{$a}) || (lc $a cmp lc $b)} (grep {$tot_analysis{$_}} keys %tot_analysis);
277  warn "Sorted key_ids: ", Dumper \@sorted_key_ids if $verbose;
278 
279  if (not $gnuplot_terminal) {
280  print join("\t", 'date', "OVERALL_$mode", @sorted_key_ids), "\n";
281  print join("\t", 'total', $total_total, map {$tot_analysis{$_}} @sorted_key_ids), "\n";
282  print join("\t", 'proportion', 'NA', map {$tot_analysis{$_}/$total_total} @sorted_key_ids), "\n";
283  my $s = 0;
284  print join("\t", 'cum_proportion', 'NA', map {$s+=$tot_analysis{$_}/$total_total} @sorted_key_ids), "\n";
285 
286  foreach my $row (@{$time_samples_data->[1]}) {
287  print join("\t", $row->[0], sum(values %{$row->[1]}), map {$row->[1]->{$_}} @sorted_key_ids)."\n";
288  }
289  return;
290  }
291 
292  my $layer_samples_data = cumulate_events(\%layers, \@key_names, $start_date, $end_date, \%events, $verbose);
293  my @layer_timings = map {$_->[1]} @{$layer_samples_data->[1]};
294 
295  if ($mode eq 'pending_time') {
296  foreach my $j (1..(scalar(@data_timings))) {
297  foreach my $i (@sorted_key_ids) {
298  next if $data_timings[$j-1]->{$i} == 0;
299  $data_timings[$j-1]->{$i} = $layer_timings[$j-1]->{$i} / $data_timings[$j-1]->{$i};
300  }
301  }
302  }
303 
304  my ($n_relevant_analysis, $need_other_analysis, $real_top) = count_number_relevant_sets(\@sorted_key_ids, \%tot_analysis, $total_total, $top, scalar(@palette), $verbose);
305 
306  my @datasets = ();
307 
308  my $pseudo_zero_value = -$max_workers / 50;
309 
310  # The background plot: the sum of all the analysis
311  if ($need_other_analysis) {
312  add_dataset(\@datasets, \@data_timings, \@layer_timings, \@xdata,
313  \@sorted_key_ids, 'OTHER', $palette[$n_relevant_analysis], $pseudo_zero_value, $additive_layer ? [@sorted_key_ids[$n_relevant_analysis..(scalar(@sorted_key_ids)-1)]] : undef);
314  }
315 
316  # Each analysis is plotted as the sum of itself and the top ones
317  foreach my $i (reverse 1..$n_relevant_analysis) {
318  add_dataset(\@datasets, \@data_timings, \@layer_timings, \@xdata,
319  [@sorted_key_ids[0..($i-1)]], $sorted_key_ids[$i-1], $palette[$i-1], $pseudo_zero_value, $additive_layer ? [$sorted_key_ids[$i-1]] : undef);
320  }
321 
322  my $safe_database_location = scalar(@pipelines) > 1 ? scalar(@pipelines) . ' pipelines' : $pipelines[0]->display_name;
323  my $plotted_analyses_desc = '';
324  if ($n_relevant_analysis < scalar(@sorted_key_ids)) {
325  if ($real_top) {
326  if ($real_top < 1) {
327  $plotted_analyses_desc = sprintf('the top %.1f%% of ', 100*$real_top);
328  } else {
329  $plotted_analyses_desc = "the top $real_top analyses of ";
330  }
331  } else {
332  $plotted_analyses_desc = "the top $n_relevant_analysis analyses of ";
333  }
334  }
335  my $title = "Timeline of ${plotted_analyses_desc}${safe_database_location}";
336  $title .= " from $start_date" if $start_date;
337  $title .= " to $end_date" if $end_date;
338 
339  unless (@xdata) {
340  if ($start_date || $end_date) {
341  die "No data to display in this time interval !";
342  } else {
343  die "No data to display !";
344  }
345  }
346 
347  my $data_start = Time::Piece->strptime( $xdata[0] , '%Y-%m-%dT%H:%M:%S');
348  my $data_end = Time::Piece->strptime( $xdata[-1], '%Y-%m-%dT%H:%M:%S');
349  my $xlabelfmt = $data_end-$data_start >= 6*24*3600 ? '%b %d' : '%b %d\n %H:%M';
350 
351  # The main Gnuplot object
352  my $chart = Chart::Gnuplot->new(
353  title => $title,
354  timeaxis => 'x',
355  legend => {
356  position => 'outside right',
357  align => 'left',
358  },
359  xtics => {
360  labelfmt => $xlabelfmt,
361  along => 'out nomirror',
362  },
363  bg => {
364  color => 'white',
365  },
366  grid => 'on',
367  imagesize => '1400, 800',
368  output => $output,
369  terminal => $terminal_mapping{$gnuplot_terminal},
370  ylabel => $allowed_modes{$mode},
371  yrange => [$pseudo_zero_value, undef],
372  ($start_date && $end_date) ? (xrange => [$start_date, $end_date]) : (),
373  );
374  $chart->plot2d(@datasets);
375 
376 }
377 
378 
379 #####
380 # Function to add a new Gnuplot dataset
381 # It needs a list of key IDs to represent (i.e. to sum) and optionally some
382 # key IDs to subtract (represented as hashed)
383 #####
384 
385 sub add_dataset {
386  my ($datasets, $data_timings, $layer_timings, $xdata, $key_ids_to_sum, $title, $color, $pseudo_zero_value, $analysis_ids_pattern) = @_;
387 
388  my @ydata;
389  foreach my $row (@$data_timings) {
390  my $y = sum(map {$row->{$_} || 0} @$key_ids_to_sum) || $pseudo_zero_value;
391  # Due to rounding errors, values are not always decreased to 0
392  push @ydata, $y < $rounding_error_threshold ? $pseudo_zero_value : $y;
393  }
394  my $dataset = Chart::Gnuplot::DataSet->new(
395  xdata => $xdata,
396  ydata => \@ydata,
397  timefmt => '%Y-%m-%dT%H:%M:%S',
398  style => 'filledcurves x1',
399  linewidth => '0',
400  color => $color,
401  );
402  push @$datasets, $dataset;
403 
404  if (defined $analysis_ids_pattern) {
405  $dataset->{fill} = {pattern => 1};
406  my @ydatal = @ydata;
407  foreach my $j (1..(scalar(@$data_timings))) {
408  my $y = $ydata[$j-1];
409  next if $y == $pseudo_zero_value;
410  my $dt = $data_timings->[$j-1];
411  my $lt = $layer_timings->[$j-1];
412  foreach my $i (@$analysis_ids_pattern) {
413  $y += ($lt->{$i} || 0) - ($dt->{$i} || 0);
414  }
415  $ydatal[$j-1] = $y < $rounding_error_threshold ? $pseudo_zero_value : $y;
416  }
417  $dataset = Chart::Gnuplot::DataSet->new(
418  xdata => $xdata,
419  ydata => \@ydatal,
420  timefmt => '%Y-%m-%dT%H:%M:%S',
421  style => 'filledcurves x1',
422  linewidth => '0',
423  color => $color,
424  );
425  push @$datasets, $dataset;
426  }
427  $dataset->{title} = $title;
428 }
429 
430 
431 #####
432 # Function to add a new event to the hash.
433 # Events are defined with birth and death dates.
434 # NB: The dates are truncated to the minute: seconds are not recorded
435 # NB: Does not add anything if birth and death are identical (after
436 # truncation)
437 #####
438 
439 sub add_event {
440  my ($events, $key, $when_born, $when_died, $offset, $resolution) = @_;
441 
442  return if looks_like_number($offset) && ($offset <= 0);
443 
444  # temporary Time::Piece values
445  my $death_datetime = $when_died ? Time::Piece->strptime( $when_died , '%Y-%m-%d %H:%M:%S') : $now;
446  my $birth_datetime = Time::Piece->strptime( $when_born , '%Y-%m-%d %H:%M:%S');
447 
448  if ($offset =~ /length_by_(\d+)/) {
449  $offset = ($death_datetime - $birth_datetime) / $1;
450  }
451 
452  # We don't need to draw things at the resolution of 1 second; round up to $resolution minutes
453  $death_datetime->[0] = 0;
454  $birth_datetime->[0] = 0;
455  $birth_datetime->[1] = $resolution*int($birth_datetime->[1] / $resolution);
456  $death_datetime->[1] = $resolution*int($death_datetime->[1] / $resolution);
457 
458  # string values:
459  my $birth_date = $birth_datetime->date . 'T' . $birth_datetime->hms;
460  my $death_date = $death_datetime->date . 'T' . $death_datetime->hms;
461  return if $birth_date eq $death_date;
462 
463  $events->{$birth_date}{$key} += $offset;
464  $events->{$death_date}{$key} -= $offset;
465 }
466 
467 
468 #####
469 # Cumulate all the events between start_date and end_date
470 # A reference list of events can be passed to handle the layered
471 # information
472 #####
473 
474 sub cumulate_events {
475  my ($events, $key_names, $start_date, $end_date, $ref_events, $verbose) = @_;
476 
477  my @event_dates = sort {$a cmp $b} (keys %$ref_events);
478  warn scalar(@event_dates), " dates\n" if $verbose;
479 
480  my $max_workers = 0;
481  my @data_timings = ();
482  my %tot_area = ();
483 
484  my $num_curr_workers = 0;
485  my %hash_curr_workers = (map {$_ => 0 } @$key_names);
486 
487  foreach my $event_date (@event_dates) {
488 
489  last if $end_date and ($event_date gt $end_date);
490  next unless exists $events->{$event_date};
491 
492  if ((scalar(@data_timings) == 0) and $start_date and ($event_date gt $start_date)) {
493  push @data_timings, [$start_date, { %hash_curr_workers }];
494  %tot_area = %hash_curr_workers;
495  }
496 
497  my $topup_hash = $events->{$event_date};
498  foreach my $key_id (keys %$topup_hash) {
499  $hash_curr_workers{$key_id} += $topup_hash->{$key_id};
500  $num_curr_workers += $topup_hash->{$key_id};
501  }
502  # Due to rounding errors, the sums may be slightly different
503  die sum(values %hash_curr_workers)."!=$num_curr_workers" if abs(sum(values %hash_curr_workers) - $num_curr_workers) > $rounding_error_threshold;
504 
505  next if $start_date and ($event_date lt $start_date);
506 
507  #FIXME It should be normalised by the length of the time interval
508  map {$tot_area{$_} += $hash_curr_workers{$_}} keys %hash_curr_workers;
509 
510  $max_workers = $num_curr_workers if ($num_curr_workers > $max_workers);
511 
512  # We need to repeat the previous value to have an histogram shape
513  push @data_timings, [$event_date, { %{$data_timings[-1]->[1]} }] if @data_timings;
514  push @data_timings, [$event_date, { %hash_curr_workers }];
515  }
516  push @data_timings, [$end_date, { %{$data_timings[-1]->[1]} }] if @data_timings and $end_date and ($data_timings[-1]->[0] lt $end_date);
517  warn "Last timing: ", Dumper $data_timings[-1] if $verbose and @data_timings;
518  warn "Highest y value: ", $max_workers, "\n" if $verbose;
519  warn "Total area: ", Dumper \%tot_area if $verbose;
520 
521  return [\%tot_area, \@data_timings, $max_workers];
522 }
523 
524 
525 
526 #####
527 # Function to translate $top (which can be an integer or a float between 0
528 # and 1) to the number of keys that should be displayed in the legend.
529 # This is done in accordance to the numbers of available colours in the
530 # palette, and the relative importance of each category (the most present
531 # ones are selected first)
532 #####
533 
535  my ($sorted_key_ids, $tot_analysis, $total_total, $top, $n_colors_in_palette, $verbose) = @_;
536 
537  # Get the number of analysis we want to display
538  my $n_relevant_analysis = scalar(@$sorted_key_ids);
539  if ($top and ($top > 0)) {
540  if ($top < 1) {
541  my $s = 0;
542  $n_relevant_analysis = 0;
543  map {my $pre_s = $s; $s += $tot_analysis->{$_}/$total_total; $pre_s < $top && $n_relevant_analysis++} @$sorted_key_ids;
544  } elsif ($top < scalar(@$sorted_key_ids)) {
545  $n_relevant_analysis = $top;
546  }
547  }
548  # cap based on the length of the palette
549  my $need_other_analysis = $n_relevant_analysis < scalar(@$sorted_key_ids) ? 1 : 0;
550  if (($n_relevant_analysis+$need_other_analysis) > $n_colors_in_palette) {
551  $n_relevant_analysis = $n_colors_in_palette - 1;
552  $need_other_analysis = 1;
553  }
554 
555  warn "$n_relevant_analysis relevant analysis\n" if $verbose;
556  return ($n_relevant_analysis, $need_other_analysis, $top);
557 }
558 
559 __DATA__
560 
561 =pod
562 
563 =head1 NAME
564 
565 generate_timeline.pl
566 
567 =head1 SYNOPSIS
568 
569  generate_timeline.pl {-url <url> | [-reg_conf <reg_conf>] -reg_alias <reg_alias> [-reg_type <reg_type>] }
570  [-start_date <start_date>] [-end_date <end_date>]
571  [-top <float>]
572  [-mode [workers | memory | cores | pending_workers | pending_time]]
573  [-key [analysis | resource_class]]
574  [-n_core <int>] [-mem <int>]
575 
576 =head1 DESCRIPTION
577 
578 This script is used for offline examination of the allocation of Workers.
579 
580 Based on the command-line parameters "start_date" and "end_date", or on the start time of the first
581 Worker and end time of the last Worker (as recorded in pipeline database), it pulls the relevant data out
582 of the C<worker> table for accurate timing.
583 By default, the output is in CSV format, to allow extra Analysis to be carried.
584 
585 You can optionally ask the script to generate an image with Gnuplot.
586 
587 
588 =head1 USAGE EXAMPLES
589 
590  # Just run it the usual way: only the top 20 Analysis will be reported in CSV format
591  generate_timeline.pl -url mysql://username:secret@hostname:port/database > timeline.csv
592 
593  # The same, but getting the Analysis that fill 99.5% of the global activity in a PNG file
594  generate_timeline.pl -url mysql://username:secret@hostname:port/database -top .995 -output timeline_top995.png
595 
596  # Assuming you are only interested in a precise interval (in a PNG file)
597  generate_timeline.pl -url mysql://username:secret@hostname:port/database -start_date 2013-06-15T10:34 -end_date 2013-06-15T16:58 -output timeline_June15.png
598 
599  # Get the required memory instead of the number of Workers
600  generate_timeline.pl -url mysql://username:secret@hostname:port/database -mode memory -output timeline_memory.png
601 
602  # Draw the CPU-usage timeline across several databases
603  generate_timeline.pl -url mysql://username:secret@hostname:port/database -url mysql://username:secret@hostname:port/another_database -mode cores -output timeline_cpu.png
604 
605 
606 =head1 OPTIONS
607 
608 =head2 Connection options
609 
610 =over
611 
612 =item --help
613 
614 print this help
615 
616 =item --url <url string>
617 
618 URL defining where eHive database is located.
619 It can be repeated to draw a timeline across several databases
620 
621 =item --reg_conf
622 
623 path to a Registry configuration file
624 
625 =item --reg_type
626 
627 type of the registry entry ("hive", "core", "compara", etc - defaults to "hive")
628 
629 =item --reg_alias
630 
631 species/alias name for the eHive DBAdaptor
632 
633 =item --nosqlvc
634 
635 "No SQL Version Check" - set if you want to force working with a database created by a potentially schema-incompatible API
636 Be aware that generate_timeline.pl uses raw SQL queries that may break on different schema versions
637 
638 =item --verbose
639 
640 Print some info about the data loaded from the database
641 
642 =back
643 
644 =head2 Timeline configuration
645 
646 =over
647 
648 =item --start_date <date>
649 
650 minimal start date of a Worker (the format is ISO8601, e.g. "2012-01-25T13:46")
651 
652 =item --end_date <date>
653 
654 maximal end date of a Worker (the format is ISO8601, e.g. "2012-01-25T13:46")
655 
656 =item --top <float>
657 
658 maximum number (> 1) or fraction (< 1) of Analysis to report (default: 20)
659 
660 =item --output <string>
661 
662 output file: its extension must match one of the Gnuplot terminals. Otherwise, the CSV output is produced on stdout
663 
664 =item --mode <string>
665 
666 what should be displayed on the y-axis. Allowed values are "workers" (default), "memory", "cores", "pending_workers", or "pending_time"
667 
668 =item --key <string>
669 
670 "analysis" (default) or "resource_class": how to bin the Workers
671 
672 =item --key_transform_file <string>
673 
674 the path to a Perl script that defines a function named "get_key_name". The function is used to provide custom key names for analyses and
675 resource classes instead of their own display names. The function must take the object (Analysis or ResourceClass) as a sole argument and
676 return a (non empty) string.
677 See scripts/dev/generate_timeline_example_key_transform_file.pl for an example.
678 
679 =item --resolution <integer>
680 
681 Timestamps are rounded up to multiples of this amount of minutes (default: 1).
682 Increase this value when displaying timelines of very large pipelines.
683 
684 =back
685 
686 =head2 Farm configuration
687 
688 =over
689 
690 =item --n_core <int>
691 
692 the default number of cores allocated to a Worker (default: 1)
693 
694 =item --mem <int>
695 
696 the default memory allocated to a Worker (default: 100Mb)
697 
698 =back
699 
700 =head1 EXTERNAL DEPENDENCIES
701 
702 =over
703 
704 =item Chart::Gnuplot
705 
706 =back
707 
708 =head1 LICENSE
709 
710 See the NOTICE file distributed with this work for additional information
711 regarding copyright ownership.
712 
713 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
714 You may obtain a copy of the License at
715 
716  http://www.apache.org/licenses/LICENSE-2.0
717 
718 Unless required by applicable law or agreed to in writing, software distributed under the License
719 is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
720 See the License for the specific language governing permissions and limitations under the License.
721 
722 =head1 CONTACT
723 
724 Please subscribe to the eHive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss eHive-related questions or to be notified of our updates
725 
726 =cut
727 
Bio::EnsEMBL::Hive::Utils::URL::hide_url_password
public Void hide_url_password()
Bio::EnsEMBL::Hive::Utils::URL
Definition: URL.pm:11
map
public map()
count_number_relevant_sets
public count_number_relevant_sets()
Bio::EnsEMBL::Hive::HivePipeline::new
public new()
main
public main()
get_key_name
public get_key_name()
cumulate_events
public cumulate_events()
Bio::EnsEMBL::Hive::HivePipeline
Definition: HivePipeline.pm:13
add_dataset
public add_dataset()
about
public about()
BEGIN
public BEGIN()
info
public info()
add_event
public add_event()