ensembl-hive  2.8.1
DigitFactory.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 SYNOPSIS
8 
9  Please refer to Bio::EnsEMBL::Hive::Examples::LongMult::PipeConfig::LongMult_conf pipeline configuration file
10  to understand how this particular example pipeline is configured and ran.
11 
12 =head1 DESCRIPTION
13 
14  'Bio::EnsEMBL::Hive::Examples::LongMult::RunnableDB::DigitFactory' is the first step of the LongMult example pipeline that multiplies two long numbers.
15 
16  It takes apart the second multiplier and creates several 'LongMult::PartMultiply' jobs
17  that correspond to the different digits of the second multiplier.
18 
19  It also "flows into" one 'LongMult::AddTogether' job that will wait until 'LongMult::PartMultiply' jobs
20  complete and will arrive at the final result.
21 
22 =head1 LICENSE
23 
24  See the NOTICE file distributed with this work for additional information
25  regarding copyright ownership.
26 
27  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
28  You may obtain a copy of the License at
29 
30  http://www.apache.org/licenses/LICENSE-2.0
31 
32  Unless required by applicable law or agreed to in writing, software distributed under the License
33  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
34  See the License for the specific language governing permissions and limitations under the License.
35 
36 =head1 CONTACT
37 
38  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
39 
40 =cut
41 
42 
43 package Bio::EnsEMBL::Hive::Examples::LongMult::RunnableDB::DigitFactory;
44 
45 use strict;
46 use warnings;
47 
48 use base ('Bio::EnsEMBL::Hive::Process');
49 
50 
51 =head2 param_defaults
52 
53  Description : Implements param_defaults() interface method of Bio::EnsEMBL::Hive::Process that defines module defaults for parameters.
54 
55 =cut
56 
57 sub param_defaults {
58 
59  return {
60  'take_time' => 0, # how much time run() method will spend in sleeping state
61  };
62 }
63 
64 
65 =head2 fetch_input
66 
67  Description : Implements fetch_input() interface method of Bio::EnsEMBL::Hive::Process that is used to read in parameters and load data.
68  Here the task of fetch_input() is to read in the two multipliers, split the second one into digits and create a set of input_ids that will be used later.
69 
70  param('b_multiplier'): The second long number (a string of digits - doesn't have to fit a register)
71 
72  param('take_time'): How much time to spend sleeping (seconds).
73 
74 =cut
75 
76 sub fetch_input {
77  my $self = shift @_;
78 
79  my $b_multiplier = $self->param_required('b_multiplier');
80 
81  my %digit_hash = ();
82  foreach my $digit (split(//,$b_multiplier)) {
83 # next if (($digit eq '0') or ($digit eq '1'));
84  $digit_hash{$digit}++;
85  }
86 
87  # parameter hashes of partial multiplications to be computed:
88  my @sub_tasks = map { { 'digit' => $_ } } sort { $a <=> $b } keys %digit_hash;
89 
90  # store them for future use:
91  $self->param('sub_tasks', \@sub_tasks);
92 }
93 
94 
95 =head2 run
96 
97  Description : Implements run() interface method of Bio::EnsEMBL::Hive::Process that is used to perform the main bulk of the job (minus input and output).
98  Here we don't have any real work to do, just input and output, so run() just spends some time waiting.
99 
100 =cut
101 
102 sub run {
103  my $self = shift @_;
104 
105  sleep( $self->param('take_time') );
106 }
107 
108 
109 =head2 write_output
110 
111  Description : Implements write_output() interface method of Bio::EnsEMBL::Hive::Process that is used to deal with job's output after the execution.
112  Here we dataflow all the partial multiplication jobs whose input_ids were generated in fetch_input() into the branch-2 ("fan out"),
113  and also dataflow the original task down branch-1 (create the "funnel job").
114 
115 =cut
116 
117 sub write_output { # nothing to write out, but some dataflow to perform:
118  my $self = shift @_;
119 
120  my $sub_tasks = $self->param('sub_tasks');
121 
122  # "fan out" into branch#2 first, branch#1 will be created if we wire it (and we do)
123  $self->dataflow_output_id($sub_tasks, 2);
124 
125  $self->warning('Attempted to create '.scalar(@$sub_tasks)." multiplication jobs"); # warning messages get recorded into 'log_message' table
126 
127 ## extra information sent to the funnel will extend its stack:
128 # $self->dataflow_output_id( { 'different_digits' => scalar(@$sub_tasks) } , 1);
129 }
130 
131 1;
132 
Bio::EnsEMBL::Hive::Examples::LongMult::RunnableDB::DigitFactory
Definition: DigitFactory.pm:25
Bio::EnsEMBL::Hive::Process::param
public param()
Bio::EnsEMBL::Hive::Process
Definition: Process.pm:77
run
public run()
Bio::EnsEMBL::Hive::Examples::LongMult::PipeConfig::LongMult_conf
Definition: LongMult_conf.pm:47
Bio
Definition: AltAlleleGroup.pm:4