9 # Initialize the pipeline
12 # Run all the jobs except the final one
13 runWorker.pl -url $EHIVE_URL -can_respecialize -analyses_pattern
'%-report'
15 # Run it in debug mode to see its parameters and find the values that have been folded
16 runWorker.pl -url $EHIVE_URL -analyses_pattern
'report' -
debug
18 # The job input_ids only contain the various states of "input_id_list"
19 db_cmd.pl -url $EHIVE_URL -sql
'SELECT job_id, prev_job_id , analysis_id, input_id FROM job'
21 # whilst the computed data is in the "accu" table
22 db_cmd.pl -url $EHIVE_URL -sql
'SELECT * FROM accu'
26 eHive implementation of tail-recursion of
"fold-left".
27 In general terms, we have a list $l (each element is of type
'a), a function $f from ('b,
'a) to 'b, and an initial value $ini of type
'b.
28 Left-folding is a recursion on $l where we first compute $f($ini, $l[1]) and use this value as $ini on the sub-list that starts from the
29 second element. Overall, the formula is $f($f( ... $f($f($ini, $l[1]), $l[2]) ... , $l[n-1]), $l[n]) [1-based arrays]
30 This recursion mode is also called "tail-recursion" because once we've consumed the first element, we can loop with a
new $ini and a
new $l.
32 In
this example, each element of the list (each input_id) has two parameters: an integer
"val" and a
string "str". We have a
"fold" analysis
33 that folds the list in three ways: integer addition,
string concatenation, and total sum length (to show we can accumulate a different type
38 See the NOTICE file distributed with
this work
for additional information
39 regarding copyright ownership.
41 Licensed under the Apache License, Version 2.0 (the
"License"); you may not use
this file except in compliance with the License.
42 You may obtain a copy of the License at
46 Unless required by applicable law or agreed to in writing, software distributed under the License
47 is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
48 See the License
for the specific language governing permissions and limitations under the License.
52 Please subscribe to the Hive mailing list: http:
53 to discuss Hive-related questions or to be notified of our updates
64 use base (
'Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf'); # All Hive databases configuration files should inherit from HiveGeneric, directly or indirectly
67 sub pipeline_analyses {
71 # The default parameter of GrabN is to take 1 hash at a time from the left
72 { -logic_name =>
'consume_list',
73 -module =>
'Bio::EnsEMBL::Hive::Examples::Factories::RunnableDB::GrabN',
75 'input_id_list' => [
map { {val => $_, str =>
"x$_"} } (1, 6, 3, 8)],
78 # To "fold", the fan requires access to its parent's parameters, via either INPUT_PLUS or the parameter stack
79 '2->A' => {
'fold' => INPUT_PLUS },
80 'A->1' => WHEN(
'#_list_exhausted#' => [
'report' ], ELSE [
'consume_list' ] ),
84 { -logic_name =>
'fold',
85 -module =>
'Bio::EnsEMBL::Hive::RunnableDB::Dummy',
87 # default value to initialise the recursion
91 # Note that the expressions refer to the parameters defined above. The first job will
92 # use those default values stored at the analysis level, but the next jobs will use the
93 # value accumulated by the accu below, and propagated with INPUT_PLUS
94 'new_sum' =>
'#expr( #sum# + #val# )expr#',
95 'new_concat' =>
'#expr( #concat# . #str# )expr#',
96 'new_length' =>
'#expr( #length# + length(#str#) )expr#',
100 # accu_name must be the name of a parameter that has a default value above
101 # accu_input_variable must be generated by the runnable
102 '?accu_name=sum&accu_input_variable=new_sum',
103 '?accu_name=concat&accu_input_variable=new_concat',
104 '?accu_name=length&accu_input_variable=new_length',
109 # This analysis will have 1 job with sum=118, concat="START:x1x6x3x8" and length=8
110 { -logic_name =>
'report',
111 -module =>
'Bio::EnsEMBL::Hive::RunnableDB::Dummy',