9 # initialize the database and build the graph in it (it will also print the value of EHIVE_URL) :
12 # optionally also seed it with your specific values:
13 seed_pipeline.pl -url $EHIVE_URL -logic_name take_b_apart -input_id
'{ "a_multiplier" => "12345678", "b_multiplier" => "3359559666" }'
16 beekeeper.pl -url $EHIVE_URL -loop
20 This is the PipeConfig file
for the
long multiplication pipeline example.
21 The
main point of
this pipeline is to provide an example of how to write
Hive Runnables and link them together into a pipeline.
25 The setting. let's assume we are given two loooooong numbers to multiply. reeeeally long.
26 soooo long that they do not fit into registers of the cpu and should be multiplied digit-by-digit.
27 For the purposes of this example we also assume this task is very computationally intensive and has to be done in parallel.
29 The long multiplication pipeline consists of three "analyses" (types of tasks):
30 'take_b_apart', 'part_multiply' and 'add_together' that we use to examplify various features of the
Hive.
32 * A 'take_b_apart' job takes in two string parameters, 'a_multiplier' and 'b_multiplier',
33 takes the second one apart into digits, finds what _different_ digits are there,
34 creates several jobs of the 'part_multiply' analysis and one job of 'add_together' analysis.
36 * A 'part_multiply' job takes in 'a_multiplier' and 'digit', multiplies them and accumulates the result in 'partial_product' accumulator.
38 * An 'add_together' job waits for the first two analyses to complete,
39 takes in 'a_multiplier', 'b_multiplier' and 'partial_product' hash and produces the final result in 'final_result' table.
41 Please see the implementation details in Runnable modules themselves.
45 See the NOTICE file distributed with this work for additional information
46 regarding copyright ownership.
48 Licensed under the Apache License,
Version 2.0 (the "License"); you may not use this file except in compliance with the License.
49 You may obtain a copy of the License at
53 Unless required by applicable law or agreed to in writing, software distributed under the License
54 is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
55 See the License for the specific language governing permissions and limitations under the License.
59 Please subscribe to the
Hive mailing list: http:
69 use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf'); # All
Hive databases configuration files should inherit from HiveGeneric, directly or indirectly
73 =head2 pipeline_create_commands
76 In addition to the standard creation of the database and populating it with
Hive tables and procedures it also creates two pipeline-specific tables used by Runnables to communicate.
80 sub pipeline_create_commands {
83 @{$self->SUPER::pipeline_create_commands}, # inheriting database and hive tables
' creation
85 # additional tables needed for long multiplication pipeline's operation:
86 $self->db_cmd(
'CREATE TABLE final_result (a_multiplier varchar(255) NOT NULL, b_multiplier varchar(255) NOT NULL, result varchar(255) NOT NULL, PRIMARY KEY (a_multiplier, b_multiplier))'),
91 =head2 pipeline_wide_parameters
93 Description : Interface method that should
return a hash of pipeline_wide_parameter_name->pipeline_wide_parameter_value pairs.
94 The value doesn
't have to be a scalar, can be any Perl structure now (will be stringified and de-stringified automagically).
95 Please see existing PipeConfig modules for examples.
99 sub pipeline_wide_parameters {
102 %{$self->SUPER::pipeline_wide_parameters}, # here we inherit anything from the base class
109 =head2 pipeline_analyses
111 Description : Implements pipeline_analyses() interface method of Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that defines the structure of the pipeline: analyses, jobs, rules, etc.
112 Here it defines three analyses:
113 * 'take_b_apart
' that is auto-seeded with a pair of jobs (to check the commutativity of multiplication).
114 Each job will dataflow (create more jobs) via branch #2 into 'part_multiply
' and via branch #1 into 'add_together
'.
116 * 'part_multiply
' with jobs fed from take_b_apart#2.
117 It multiplies input parameters 'a_multiplier
' and 'digit
' and dataflows 'partial_product
' parameter into branch #1.
119 * 'add_together
' with jobs fed from take_b_apart#1.
120 It adds together results of partial multiplication computed by 'part_multiply
'.
121 These results are accumulated in 'partial_product
' hash.
122 Until the hash is complete the corresponding 'add_together
' job is blocked by a semaphore.
126 sub pipeline_analyses {
129 { -logic_name => 'take_b_apart
',
130 -comment => "A factory that takes in #b_multiplier# and dataflows all its unique non-trivial digits into #2",
132 -meadow_type => 'LOCAL
', # do not bother the farm with such a simple task (and get it done faster)
133 -analysis_capacity => 2, # use per-analysis limiter
135 { 'a_multiplier
' => '9650156169
', 'b_multiplier
' => '327358788
' },
136 { 'a_multiplier
' => '327358788
', 'b_multiplier
' => '9650156169
' },
139 # creating a semaphored fan of jobs; filtering by WHEN; using INPUT_PLUS or templates to top-up the hashes.
141 # A WHEN block is not a hash, so multiple occurences of each condition (including ELSE) is permitted.
143 '#digit#>1
' => { 'part_multiply
' => INPUT_PLUS() }, # make parent job's parameters available to the kids
144 # ELSE { 'part_multiply' => { 'a_multiplier' => '#a_multiplier#', 'digit' => '#digit#' } },
146 # creating a semaphored funnel job to wait
for the fan to complete and add the results:
147 'A->1' => [
'add_together' ],
151 { -logic_name =>
'part_multiply',
152 -comment =>
"Multiplies #a_multiplier# by #digit# and dataflows #product# into #1",
153 -module =>
'Bio::EnsEMBL::Hive::Examples::LongMult::RunnableDB::PartMultiply',
154 -analysis_capacity => 4, # use per-analysis limiter
156 1 => [
'?accu_name=partial_product&accu_address={digit}&accu_input_variable=product' ],
160 { -logic_name =>
'add_together',
161 -comment =>
"Takes in #a_multiplier#, #b_multiplier# and the #partial_product# hash and dataflows the product of #a_multiplier# and #b_multiplier# into #1",
162 -module =>
'Bio::EnsEMBL::Hive::Examples::LongMult::RunnableDB::AddTogether',
164 1 => [
'?table_name=final_result' ],