ensembl-hive  2.6
DataflowRule.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 DESCRIPTION
8 
9  A data container object (methods are intelligent getters/setters) that corresponds to a row stored in 'dataflow_rule' table
10 
11  A dataflow rule is activated when a Bio::EnsEMBL::Hive::AnalysisJob::dataflow_output_id is called at any moment during a RunnableDB's execution.
12  The current RunnableDB's analysis ($from_analysis) and the requested $branch_code (1 by default) define the entry conditions,
13  and whatever rules match these conditions will generate new jobs with input_ids specified in the dataflow_output_id() call.
14  If input_id_template happens to contain a non-NULL value, it will be used to generate the corresponding intput_id instead.
15 
16  Jessica's remark on the structure of to_analysis_url:
17  Extended from design of SimpleRule concept to allow the 'to' analysis to be specified with a network savy URL like
18  mysql://ensadmin:<pass>@ecs2:3361/compara_hive_test/analysis?logic_name='blast_NCBI34'
19 
20 =head1 LICENSE
21 
22  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
23  Copyright [2016-2024] EMBL-European Bioinformatics Institute
24 
25  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
26  You may obtain a copy of the License at
27 
28  http://www.apache.org/licenses/LICENSE-2.0
29 
30  Unless required by applicable law or agreed to in writing, software distributed under the License
31  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
32  See the License for the specific language governing permissions and limitations under the License.
33 
34 =head1 CONTACT
35 
36  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
37 
38 =cut
39 
40 
41 package Bio::EnsEMBL::Hive::DataflowRule;
42 
43 use strict;
44 use warnings;
45 
46 use Bio::EnsEMBL::Hive::TheApiary;
47 use Bio::EnsEMBL::Hive::DBSQL::DataflowRuleAdaptor;
48 
49 use base ( 'Bio::EnsEMBL::Hive::Storable' );
50 
51 
52 sub unikey {
53  return [ 'from_analysis', 'branch_code', 'funnel_dataflow_rule', 'unitargets' ];
54 }
55 
56 
57 =head1 AUTOLOADED
58 
59  from_analysis_id / from_analysis
60 
61  funnel_dataflow_rule_id / funnel_dataflow_rule
62 
63 =cut
64 
65 
66 =head2 branch_code
67 
68  Function: getter/setter method for the branch_code of the dataflow rule
69 
70 =cut
71 
72 sub branch_code {
73  my $self = shift @_;
74 
75  if(@_) {
76  my $branch_name_or_code = shift @_;
77  $self->{'_branch_code'} = $branch_name_or_code && Bio::EnsEMBL::Hive::DBSQL::DataflowRuleAdaptor::branch_name_2_code( $branch_name_or_code );
78  }
79  return $self->{'_branch_code'};
80 }
81 
82 
83 sub get_my_targets {
84  my $self = shift @_;
85 
86  return $self->hive_pipeline->collection_of( 'DataflowTarget' )->find_all_by('source_dataflow_rule', $self);
87 }
88 
89 
90 sub get_my_targets_grouped_by_condition {
91  my $self = shift @_;
92  my $df_targets = shift @_;
93 
94  $df_targets //= $self->get_my_targets;
95 
96  my %my_targets_by_condition = ();
97  foreach my $df_target (@$df_targets) {
98  my $this_pair = $my_targets_by_condition{ $df_target->on_condition // ''} ||= [ $df_target->on_condition, []];
99  push @{$this_pair->[1]}, $df_target;
100  }
101 
102  return [ sort { ($b->[0]//'') cmp ($a->[0]//'') } values %my_targets_by_condition ];
103 }
104 
105 
106 sub _compute_unitargets {
107  my $targets = shift;
108  return join( ';', map { ($_->on_condition//'').':'.($_->input_id_template//'').':'.$_->to_analysis_url }
109  sort { ($a->on_condition//'') cmp ($b->on_condition//'')
110  or ($a->input_id_template//'') cmp ($b->input_id_template//'') }
111  @$targets);
112 }
113 
114 # NOTE: By caching the "unitargets" value, we assume that the list of
115 # targets will *not* change once the object is loaded. This holds true at
116 # the moment, but we need to be careful it remains the case in the future,
117 # otherwise the bits that change the targets would have to invalidate the
118 # cached value
119 sub unitargets {
120  my $self = shift @_;
121 
122  if (@_) {
123  $self->{'_cached_unitargets'} = shift @_;
124  }
125 
126  unless ($self->{'_cached_unitargets'}) {
127  my $targets = $self->get_my_targets;
128  $self->{'_cached_unitargets'} = _compute_unitargets( $targets );
129  }
130 
131  return $self->{'_cached_unitargets'};
132 }
133 
134 
135 =head2 toString
136 
137  Args : (none)
138  Example : print $df_rule->toString()."\n";
139  Description: returns a stringified representation of the rule
140  Returntype : string
141 
142 =cut
143 
144 sub toString {
145  my $self = shift @_;
146  my $short = shift @_;
147 
148  return join('',
149  $short ? () : (
150  'DataflowRule[',
151  ($self->dbID // ''),
152  ']: ',
153  $self->from_analysis->logic_name,
154  ),
155  ' --#',
156  $self->branch_code,
157  '--> [ ',
158  join(', ', map { $_->toString($short) } sort { ($b->on_condition // '') cmp ($a->on_condition // '') } (@{$self->get_my_targets()})),
159  ' ]',
160  ($self->funnel_dataflow_rule ? ' ---|| ('.$self->funnel_dataflow_rule->toString(1).' )' : ''),
161  );
162 }
163 
164 1;
165 
Bio::EnsEMBL::Hive::DBSQL::DataflowRuleAdaptor::branch_name_2_code
public branch_name_2_code()
EnsEMBL
Definition: Filter.pm:1
map
public map()
Bio::EnsEMBL::Hive::Version
Definition: Version.pm:19
Bio::EnsEMBL::Hive::AnalysisJob::dataflow_output_id
public dataflow_output_id()
Bio::EnsEMBL::Hive::DataflowRule
Definition: DataflowRule.pm:20
Bio::EnsEMBL::Hive
Definition: Hive.pm:38
Bio::EnsEMBL::Hive::TheApiary
Definition: TheApiary.pm:16
Bio::EnsEMBL::Hive::Storable
Definition: Storable.pm:20
Bio
Definition: AltAlleleGroup.pm:4