ensembl-hive  2.7.0
FoldLeft_conf.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 SYNOPSIS
8 
9  # Initialize the pipeline
10  init_pipeline.pl Bio::EnsEMBL::Hive::Examples::Factories::PipeConfig::FoldLeft_conf -pipeline_url $EHIVE_URL
11 
12  # Run all the jobs except the final one
13  runWorker.pl -url $EHIVE_URL -can_respecialize -analyses_pattern '%-report'
14 
15  # Run it in debug mode to see its parameters and find the values that have been folded
16  runWorker.pl -url $EHIVE_URL -analyses_pattern 'report' -debug
17 
18  # The job input_ids only contain the various states of "input_id_list"
19  db_cmd.pl -url $EHIVE_URL -sql 'SELECT job_id, prev_job_id , analysis_id, input_id FROM job'
20 
21  # whilst the computed data is in the "accu" table
22  db_cmd.pl -url $EHIVE_URL -sql 'SELECT * FROM accu'
23 
24 =head1 DESCRIPTION
25 
26  eHive implementation of tail-recursion of "fold-left".
27  In general terms, we have a list $l (each element is of type 'a), a function $f from ('b, 'a) to 'b, and an initial value $ini of type 'b.
28  Left-folding is a recursion on $l where we first compute $f($ini, $l[1]) and use this value as $ini on the sub-list that starts from the
29  second element. Overall, the formula is $f($f( ... $f($f($ini, $l[1]), $l[2]) ... , $l[n-1]), $l[n]) [1-based arrays]
30  This recursion mode is also called "tail-recursion" because once we've consumed the first element, we can loop with a new $ini and a new $l.
31 
32  In this example, each element of the list (each input_id) has two parameters: an integer "val" and a string "str". We have a "fold" analysis
33  that folds the list in three ways: integer addition, string concatenation, and total sum length (to show we can accumulate a different type
34  from the elements)
35 
36 =head1 LICENSE
37 
38  See the NOTICE file distributed with this work for additional information
39  regarding copyright ownership.
40 
41  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
42  You may obtain a copy of the License at
43 
44  http://www.apache.org/licenses/LICENSE-2.0
45 
46  Unless required by applicable law or agreed to in writing, software distributed under the License
47  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
48  See the License for the specific language governing permissions and limitations under the License.
49 
50 =head1 CONTACT
51 
52  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users
53  to discuss Hive-related questions or to be notified of our updates
54 
55 =cut
56 
57 
59 
60 use strict;
61 use warnings;
62 
64 use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf'); # All Hive databases configuration files should inherit from HiveGeneric, directly or indirectly
65 
66 
67 sub pipeline_analyses {
68  my ($self) = @_;
69  return [
70 
71  # The default parameter of GrabN is to take 1 hash at a time from the left
72  { -logic_name => 'consume_list',
73  -module => 'Bio::EnsEMBL::Hive::Examples::Factories::RunnableDB::GrabN',
74  -input_ids => [{
75  'input_id_list' => [map { {val => $_, str => "x$_"} } (1, 6, 3, 8)],
76  }],
77  -flow_into => {
78  # To "fold", the fan requires access to its parent's parameters, via either INPUT_PLUS or the parameter stack
79  '2->A' => { 'fold' => INPUT_PLUS },
80  'A->1' => WHEN( '#_list_exhausted#' => [ 'report' ], ELSE [ 'consume_list' ] ),
81  },
82  },
83 
84  { -logic_name => 'fold',
85  -module => 'Bio::EnsEMBL::Hive::RunnableDB::Dummy',
86  -parameters => {
87  # default value to initialise the recursion
88  'sum' => 100,
89  'concat' => 'START:',
90  'length' => 0,
91  # Note that the expressions refer to the parameters defined above. The first job will
92  # use those default values stored at the analysis level, but the next jobs will use the
93  # value accumulated by the accu below, and propagated with INPUT_PLUS
94  'new_sum' => '#expr( #sum# + #val# )expr#',
95  'new_concat' => '#expr( #concat# . #str# )expr#',
96  'new_length' => '#expr( #length# + length(#str#) )expr#',
97  },
98  -flow_into => {
99  1 => [
100  # accu_name must be the name of a parameter that has a default value above
101  # accu_input_variable must be generated by the runnable
102  '?accu_name=sum&accu_input_variable=new_sum',
103  '?accu_name=concat&accu_input_variable=new_concat',
104  '?accu_name=length&accu_input_variable=new_length',
105  ],
106  }
107  },
108 
109  # This analysis will have 1 job with sum=118, concat="START:x1x6x3x8" and length=8
110  { -logic_name => 'report',
111  -module => 'Bio::EnsEMBL::Hive::RunnableDB::Dummy',
112  },
113  ];
114 }
115 
116 1;
117 
map
public map()
Bio::EnsEMBL::Hive::Version
Definition: Version.pm:19
debug
public debug()
Bio::EnsEMBL::Hive::Examples::Factories::PipeConfig::FoldLeft_conf
Definition: FoldLeft_conf.pm:40
Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf
Definition: HiveGeneric_conf.pm:54
Bio::EnsEMBL::Hive
Definition: Hive.pm:38