6 # Finding out own path in order to reference own components (including own modules):
10 $ENV{
'EHIVE_ROOT_DIR'} ||= File::Basename::dirname( File::Basename::dirname( Cwd::realpath($0) ) );
11 unshift @INC, $ENV{
'EHIVE_ROOT_DIR'}.
'/modules';
26 my ($main_pipeline, $start_analysis, $stop_analysis);
27 my %analysis_name_2_pipeline;
28 my %semaphore_url_hash = ();
36 # connection parameters
37 'url=s' => \$self->{
'url'},
38 'reg_conf|reg_file=s' => \$self->{
'reg_conf'},
39 'reg_type=s' => \$self->{
'reg_type'},
40 'reg_alias|reg_name=s' => \$self->{
'reg_alias'},
41 'nosqlvc' => \$self->{
'nosqlvc'}, #
using "nosqlvc" instead of
"sqlvc!" for consistency with scripts where it is a propagated option
43 'job_id=s@' => \$self->{
'job_ids'}, # Jobs to start from
44 'start_analysis_name=s' => \$self->{
'start_analysis_name'}, #
if given, first trace the graph up to the given analysis or the seed_jobs, and then start visualisation
45 'stop_analysis_name=s' => \$self->{
'stop_analysis_name'}, #
if given, the visualisation is aborted at that analysis and doesn
't go any further
47 'include!
' => \$self->{'include
'}, # if set, include other pipeline rectangles inside the main one
48 'suppress_funnel_parent_link!
' => \$self->{'suppress
'}, # if set, do not show the link to the parent of a funnel job (potentially less clutter)
50 'accu_keys|accus!
' => \$self->{'show_accu_keys
'}, # show accu keys, but not necessarily values
51 'accu_values|values!
' => \$self->{'show_accu_values
'}, # show accu keys & values (implies -accu_keys)
52 'accu_pointers|accu_ptrs!
' => \$self->{'show_accu_pointers
'}, # (attempt to) show which accu values come from which Jobs
54 'o|out|output=s
' => \$self->{'output
'}, # output file name
55 'f|format=s
' => \$self->{'format
'}, # output format (if not guessable from -output)
57 'h|help
' => \$self->{'help
'},
61 pod2usage({-exitvalue => 0, -verbose => 2});
64 $self->{'show_accu_keys
'} = 1 if($self->{'show_accu_values
'}); # -accu_values implies -accu_keys
66 if($self->{'url
'} or $self->{'reg_alias
'}) {
67 $main_pipeline = Bio::EnsEMBL::Hive::HivePipeline->new(
68 -url => $self->{'url
'},
69 -reg_conf => $self->{'reg_conf
'},
70 -reg_type => $self->{'reg_type
'},
71 -reg_alias => $self->{'reg_alias
'},
72 -no_sql_schema_version_check => $self->{'nosqlvc
'},
76 die "\nERROR : Connection parameters (url or reg_conf+reg_alias) need to be specified\n\n";
79 if($self->{'output
'}) {
81 if(!$self->{'format
'}) {
82 if($self->{'output
'}=~/\.(\w+)$/) {
83 $self->{'format
'} = $1;
85 die "Format was not set and could not guess from ".$self->{'output
'}.". Please use either way to select it.\n";
89 $self->{'graph
'} = Bio::EnsEMBL::Hive::Utils::GraphViz->new(
90 'name
' => 'JobDependencyGraph
',
93 'remincross
' => 'true',
96 # Defined on its own because it should not be in the dot output but
97 # Bio::EnsEMBL::Hive::Utils::GraphViz->new passes all its parameters to dot
98 $self->{'graph
'}->{'SORT
'} = 1;
100 $self->{'graph
'}->cluster_2_nodes( {} );
102 # preload all participating pipeline databases into TheApiary:
103 precache_participating_pipelines( $main_pipeline );
105 my $job_adaptor = $main_pipeline->hive_dba->get_AnalysisJobAdaptor;
106 my $anchor_jobs = $self->{'job_ids
'} && $job_adaptor->fetch_all( 'job_id IN (
'.join(',
', @{$self->{'job_ids
'}} ).')
' );
107 $start_analysis = $self->{'start_analysis_name
'} && $main_pipeline->find_by_query( {'object_type
' => 'Analysis
', 'logic_name
' => $self->{'start_analysis_name
'} } );
108 $stop_analysis = $self->{'stop_analysis_name
'} && $main_pipeline->find_by_query( {'object_type
' => 'Analysis
', 'logic_name
' => $self->{'stop_analysis_name
'} } );
110 my $start_jobs = $anchor_jobs
111 ? find_the_top( $anchor_jobs ) # scan from $anchor_jobs to start_analysis//top
113 ? $job_adaptor->fetch_all_by_analysis_id( $start_analysis->dbID ) # take all jobs of the top analysis
114 : find_the_top( $job_adaptor->fetch_all_by_prev_job_id( undef ) ); # scan from seed_jobs to start_analysis//top
116 foreach my $start_job ( @$start_jobs ) {
117 my $job_node_name = add_job_node( $start_job );
121 for (1..2) { # a hacky way to get relative independence on sorting order (we don't know the ideal sorting order)
122 foreach my $semaphore_url ( keys %semaphore_url_hash ) {
125 foreach my $start_job ( @{
find_the_top( $remote_semaphore->fetch_my_local_controlling_jobs ) } ) {
133 foreach my $analysis_name (keys %analysis_name_2_pipeline) {
134 my $this_pipeline = $analysis_name_2_pipeline{$analysis_name};
135 push @{ $self->{
'graph'}->cluster_2_nodes->{ $this_pipeline->hive_pipeline_name } }, $analysis_name;
138 my $mcluster_name = $main_pipeline->hive_pipeline_name;
139 $self->{
'graph'}->cluster_2_attributes->{ $mcluster_name }{
'cluster_label' } = $mcluster_name;
140 $self->{
'graph'}->cluster_2_attributes->{ $mcluster_name }{
'style' } =
'bold,filled';
141 $self->{
'graph'}->cluster_2_attributes->{ $mcluster_name }{
'fill_colour_pair' } = [
'pastel19', 3];
142 my @other_pipeline_colour_pairs = ( [
'pastel19', 8], [
'pastel19', 5], [
'pastel19', 6], [
'pastel19', 1] );
144 # now rotate through the list of the non-reference pipelines:
147 my $ocluster_name = $other_pipeline->hive_pipeline_name;
149 my $colour_pair = shift @other_pipeline_colour_pairs;
150 $self->{
'graph'}->cluster_2_attributes->{ $ocluster_name }{
'cluster_label' } = $ocluster_name;
151 $self->{
'graph'}->cluster_2_attributes->{ $ocluster_name }{
'style' } =
'bold,filled';
152 $self->{
'graph'}->cluster_2_attributes->{ $ocluster_name }{
'fill_colour_pair' } = $colour_pair;
153 push @other_pipeline_colour_pairs, $colour_pair;
155 if($self->{
'include'}) {
156 push @{ $self->{
'graph'}->cluster_2_nodes->{ $mcluster_name } }, $ocluster_name;
160 if( $self->{
'format'} eq
'dot' ) { # If you need to take a look at the intermediate dot file
161 $self->{
'graph'}->dot_input_filename( $self->{
'output'} );
162 $self->{
'graph'}->as_canon(
'/dev/null' );
165 my $call =
'as_'.$self->{
'format'};
166 $self->{
'graph'}->$call($self->{
'output'});
170 die
"\nERROR : -output filename has to be defined\n\n";
175 ##################### tracing: ##############################################################################
177 # preload all participating pipeline databases into TheApiary:
179 my @pipelines_to_check = @_;
181 my %scanned_pipeline_urls = ();
183 while( my $current_pipeline = shift @pipelines_to_check ) {
184 my $current_pipeline_url = $current_pipeline->hive_dba->dbc->url;
185 unless( $scanned_pipeline_urls{ $current_pipeline_url }++ ) {
186 foreach my $df_target ( $current_pipeline->collection_of(
'DataflowTarget')->list ) {
187 # touching it for the side-effect of loading it to TheApiary:
188 my $target_object_pipeline = $df_target->to_analysis->hive_pipeline;
189 my $target_pipeline_url = $target_object_pipeline->hive_dba->dbc->url;
190 unless(exists $scanned_pipeline_urls{$target_pipeline_url}) {
191 push @pipelines_to_check, $target_object_pipeline;
201 my ($anchor_jobs) = @_;
205 # first try to find the start_analysis on the way up:
206 foreach my $anchor_job ( @$anchor_jobs ) {
210 my $children = $anchor_job->adaptor->fetch_all_by_prev_job_id( $anchor_job->dbID );
211 foreach my $child ( @$children ) {
212 if( my $semaphore = $child->fetch_local_blocking_semaphore ) {
227 if(my $local_blocking_semaphore = $job->fetch_local_blocking_semaphore) {
231 if(my $local_parent_job = $job->prev_job) {
242 my $semaphore = shift @_;
246 foreach my $local_controlling_job ( @{ $semaphore->fetch_my_local_controlling_jobs } ) {
258 ##################### drawing: ##############################################################################
261 my ($job, $job_node_name) = @_;
263 my $job_shape =
'box3d';
264 my $job_status = $job->status;
265 my $job_status_colour = {
'DONE' =>
'DeepSkyBlue',
'READY' =>
'green',
'SEMAPHORED' =>
'grey',
'FAILED' =>
'red'}->{$job_status}
266 my $analysis_status_colour = {
269 "LOADING" =>
"green",
270 "ALL_CLAIMED" =>
"grey",
271 "SYNCHING" =>
"green",
273 "WORKING" =>
"yellow",
274 "DONE" =>
"DeepSkyBlue",
278 my $job_id = $job->dbID;
279 my $job_params = destringify($job->input_id);
281 my $job_label = qq{<<table border=
"0" cellborder=
"0" cellspacing=
"0" cellpadding=
"1">}
282 .qq{<tr><td><u><i>job_id:</i></u></td><td><i>$job_id</i></td></tr>};
284 if(my $param_id_stack = $job->param_id_stack) {
285 $job_label .= qq{<tr><td><u><i>params from:</i></u></td><td><i>$param_id_stack</i></td></tr>};
288 foreach my $param_key (sort keys %$job_params) {
289 my $param_value = $job_params->{$param_key};
290 $job_label .=
"<tr><td>$param_key:</td><td> $param_value</td></tr>";
293 $job_label .=
"</table>>";
296 $self->{
'graph'}->add_node( $job_node_name,
299 fillcolor => $job_status_colour,
303 # adding the job to the corresponding analysis' cluster:
304 my $analysis_name = $job->analysis->relative_display_name($main_pipeline);
307 if($analysis_name =~ m{^(\w+)/(\w+)$}) {
308 $analysis_name = $1 .
'___' . $2;
311 $cluster_label = $analysis_name;
314 my $analysis_status = $job->analysis->status;
315 push @{$self->{
'graph'}->cluster_2_nodes->{ $analysis_name }}, $job_node_name;
316 $self->{
'graph'}->cluster_2_attributes->{ $analysis_name }{
'cluster_label' } = $cluster_label;
317 $self->{
'graph'}->cluster_2_attributes->{ $analysis_name }{
'style' } =
'rounded,filled';
318 $self->{
'graph'}->cluster_2_attributes->{ $analysis_name }{
'fill_colour_pair' } = [ $analysis_status_colour->{$analysis_status} ];
319 $analysis_name_2_pipeline{ $analysis_name } = $job->hive_pipeline;
323 my %job_node_hash = ();
328 my $job_id = $job->dbID;
329 my $job_pipeline_name = $job->hive_pipeline->hive_pipeline_name;
330 my $job_node_name =
'job_'.$job_id.
'__'.$job_pipeline_name;
332 unless($job_node_hash{$job_node_name}++) {
336 # recursion via child jobs:
337 if( !$stop_analysis or ($job->analysis != $stop_analysis) ) {
340 my $children = $job->adaptor->fetch_all_by_prev_job_id( $job_id );
341 foreach my $child_job ( @$children ) {
344 my $child_can_be_controlled = $child_job->fetch_local_blocking_semaphore;
346 unless( $self->{
'suppress'} and $child_can_be_controlled ) {
347 $self->{
'graph'}->add_edge( $job_node_name => $child_node_name,
353 # a local semaphore potentially blocked by this job:
354 if(my $controlled_semaphore = $job->controlled_semaphore) {
357 my $job_status = $job->status;
358 my $parent_is_blocking = ($job_status eq
'DONE' or $job_status eq
'PASSED_ON') ? 0 : 1;
359 my $parent_controlling_colour = $parent_is_blocking ?
'red' :
'darkgreen';
360 my $blocking_arrow = $parent_is_blocking ?
'tee' :
'none';
362 $self->{
'graph'}->add_edge( $job_node_name => $semaphore_node_name,
363 color => $parent_controlling_colour,
365 arrowhead => $blocking_arrow,
371 return $job_node_name;
376 my ($semaphore, $dependent_node_name) = @_;
378 my $semaphore_id = $semaphore->dbID;
379 my $semaphore_pipeline_name = $semaphore->hive_pipeline->hive_pipeline_name;
380 my $semaphore_node_name =
'semaphore_'.$semaphore_id.
'__'.$semaphore_pipeline_name;
382 my $semaphore_blockers = $semaphore->local_jobs_counter + $semaphore->remote_jobs_counter;
383 my $semaphore_is_blocked = $semaphore_blockers > 0;
384 my $meta_shape = $self->{
'show_accu_keys'}
385 ? [
'house',
'invhouse' ] # house shape hints that accu data will be shown
if present
386 : [
'triangle',
'invtriangle']; # triangle shape hints that no accu data will be shown even
if present
387 my $columns_in_table = $self->{
'show_accu_values'} ? 3 : 2;
389 my ($semaphore_shape, $semaphore_bgcolour, $semaphore_fgcolour, $dependent_blocking_arrow_colour, $dependent_blocking_arrow_shape ) = $semaphore_is_blocked
390 ? ($meta_shape->[0],
'grey',
'brown',
'red',
'tee')
391 : ($meta_shape->[1],
'darkgreen',
'white',
'darkgreen',
'none');
393 my @semaphore_label_parts = ();
394 if($semaphore_is_blocked) {
395 if(my $local=$semaphore->local_jobs_counter) { push @semaphore_label_parts,
"local: $local" }
396 if(my $remote=$semaphore->remote_jobs_counter) { push @semaphore_label_parts,
"remote: $remote" }
398 push @semaphore_label_parts,
"open";
400 my $semaphore_label = join(
', ', @semaphore_label_parts);
402 my $accusem_label = qq{<<table border=
"0" cellborder=
"0" cellspacing=
"0" cellpadding=
"1">};
403 $accusem_label .= qq{<tr><td colspan=
"$columns_in_table"><font color=
"$semaphore_fgcolour"><b><i>$semaphore_label</i></b></font></td></tr>};
407 if($self->{
'show_accu_keys'}) {
408 my $raw_accu_data = $semaphore->fetch_my_raw_accu_data;
410 if(@$raw_accu_data) {
411 $accusem_label .= qq{<tr><td colspan=
"$columns_in_table"> </td></tr>}; # skip one table row between semaphore attributes and accu data
413 my %struct_name_2_key_signature_and_value = ();
414 foreach my $accu_rowhash (@$raw_accu_data) {
415 push @{ $struct_name_2_key_signature_and_value{ $accu_rowhash->{
'struct_name'} } },
416 [ $accu_rowhash->{
'key_signature'}, $accu_rowhash->{
'value'}, $accu_rowhash->{
'sending_job_id'} ];
419 my $sending_job_pipeline_name = $semaphore->hive_pipeline->hive_pipeline_name; # assuming cross-database links are currently not stored
421 foreach my $struct_name (sort keys %struct_name_2_key_signature_and_value) {
422 $accusem_label .= $self->{
'show_accu_values'}
423 ? qq{<tr><td></td><td><b><u>$struct_name</u></b></td><td></td></tr>}
424 : qq{<tr> <td><b><u>$struct_name</u></b></td><td></td></tr>};
426 my @sorted_values = sort {(($a->[2]
427 foreach my $accu_vector ( @sorted_values ) {
428 my ($key_signature, $value, $sending_job_id) = @$accu_vector;
431 my $protected_value = $self->{
'graph'}->protect_string_for_display($value);
432 my $port_label =
"${semaphore_node_name}_${struct_name}_${sending_job_id}";
433 my $port_attribute = $sending_job_id ? qq{port=
"$port_label"} :
'';
435 if(my $sending_job_node_name =
'job_'.$sending_job_id.
'__'.$sending_job_pipeline_name) {
436 push @{ $accu_ptrs{$sending_job_node_name} }, $port_label;
439 $accusem_label .= $self->{
'show_accu_values'}
440 ? qq{<tr><td $port_attribute>$key_signature</td><td> <b>--></b> </td><td>$protected_value</td></tr>}
441 : qq{<tr><td $port_attribute></td><td>$key_signature</td></tr>};
447 $accusem_label .=
"</table>>";
449 $self->{
'graph'}->add_node( $semaphore_node_name,
450 shape => $semaphore_shape, #
'note',
453 fillcolor => $semaphore_bgcolour,
454 label => $accusem_label,
457 if($dependent_node_name) {
458 $self->{
'graph'}->add_edge( $semaphore_node_name => $dependent_node_name,
459 color => $dependent_blocking_arrow_colour,
461 arrowhead => $dependent_blocking_arrow_shape,
467 if($self->{
'show_accu_pointers'}) {
468 foreach my $sending_job_node_name (keys %accu_ptrs) {
469 foreach my $receiving_port (@{ $accu_ptrs{$sending_job_node_name} }) {
471 $self->{
'graph'}->add_edge( $sending_job_node_name => $semaphore_node_name,
472 headport => $receiving_port.
':w',
480 return $semaphore_node_name;
485 my $semaphore = shift @_;
487 my $semaphore_url = $semaphore->relative_url( 0 ); # request
for an absolute URL
488 my $semaphore_id = $semaphore->dbID;
489 my $semaphore_pipeline_name = $semaphore->hive_pipeline->hive_pipeline_name;
490 my $semaphore_node_name =
'semaphore_'.$semaphore_id.
'__'.$semaphore_pipeline_name;
492 unless($semaphore_url_hash{$semaphore_url}++) {
494 my ($accu_node_name, $target_cluster_name);
496 if(my $dependent_job = $semaphore->dependent_job) {
497 my $dependent_job_node_name =
add_job_node( $dependent_job );
501 $target_cluster_name = $dependent_job->analysis->relative_display_name($main_pipeline);
503 if($dependent_job->analysis->hive_pipeline->hive_pipeline_name eq $main_pipeline->hive_pipeline_name) {
504 $target_cluster_name =~s{^.*/}{}; # workaround since we may have added $main_pipeline into TheApiary
506 $target_cluster_name =~s{/}{___};
509 } elsif(my $dependent_semaphore = $semaphore->dependent_semaphore) {
515 $target_cluster_name = $semaphore->hive_pipeline->hive_pipeline_name;
517 # can we trace the local blocking jobs up to their roots?
518 foreach my $start_job ( @{
find_the_top( $dependent_semaphore->fetch_my_local_controlling_jobs ) } ) {
522 }
else { # The semaphore is not blocking anything, possibly the end of execution.
526 $target_cluster_name = $semaphore_pipeline_name;
529 # adding the semaphore node to the cluster of the dependent job's analysis:
530 push @{$self->{
'graph'}->cluster_2_nodes->{ $target_cluster_name }}, $semaphore_node_name;
533 return $semaphore_node_name;
547 visualize_jobs.pl -help
549 visualize_jobs.pl [ -url mysql:
553 This program generates a visualisation of a subset of interrelated Jobs, Semaphores and Accumulators from a given pipeline database.
555 Jobs are represented by 3D-rectangles which contain parameters and are colour-coded (reflecting the Job
's status).
556 Semaphores are represented by triangles (red upward-pointing = closed, green downward-pointing = open) which contain the counter.
557 Accumulators are represented by rectangles with key-paths and may contain data (configurable).
559 Blue solid arrows show Jobs' parent-child relationships (parents point at their children).
560 Dashed red lines show Jobs blocking downstream Semaphores.
561 Dashed green lines show Jobs no longer blocking downstream Semaphores (when the Jobs have finished successfully).
562 Dashed red/green lines (with colour matching Semaphore
's) also link the Semaphores to their Accumulators and further to the controlled Job.
570 URL defining where eHive database is located
572 =item --reg_conf <path>
574 path to a Registry configuration file
576 =item --reg_alias <name>
578 species/alias name for the eHive DBAdaptor
582 "No SQL Version Check" - set if you want to force working with a database created by a potentially schema-incompatible API
586 Start with this job(s) and reach as far as possible using parent-child relationships.
588 =item --start_analysis_name <logic_name>
590 Trace up to this Analysis and start displaying from this Analysis.
592 =item --stop_analysis_name <logic_name>
594 Make this Analysis to be the last one to be displayed.
595 As the result, the graph may not contain the initial job_id(s).
599 If set, in multi-pipeline contexts include other pipeline rectangles inside the "main" one.
602 =item --suppress_funnel_parent_link
604 If set, do not show the link to the parent of a funnel Job (potentially less clutter).
609 If set, show accu keys in Semaphore nodes.
614 If set, show accu keys & values in Semaphore nodes.
617 =item --accu_pointers
619 If set, show an extra link between an item in the accu and the local Job that generated it.
622 =item --output <path>
624 Location of the file to write to.
625 The file extension (.png , .jpeg , .dot , .gif , .ps) will define the output format.
629 Print this help message
633 =head1 EXTERNAL DEPENDENCIES
643 See the NOTICE file distributed with this work for additional information
644 regarding copyright ownership.
646 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
647 You may obtain a copy of the License at
649 http://www.apache.org/licenses/LICENSE-2.0
651 Unless required by applicable law or agreed to in writing, software distributed under the License
652 is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
653 See the License for the specific language governing permissions and limitations under the License.
657 Please subscribe to the eHive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss eHive-related questions or to be notified of our updates