ensembl-hive  2.8.1
AddTogether.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 SYNOPSIS
8 
9  Please refer to Bio::EnsEMBL::Hive::Examples::LongMult::PipeConfig::LongMult_conf pipeline configuration file
10  to understand how this particular example pipeline is configured and ran.
11 
12 =head1 DESCRIPTION
13 
14  'Bio::EnsEMBL::Hive::Examples::LongMult::RunnableDB::AddTogether' is the final step of the pipeline that, naturally,
15  adds the products together and dataflows the result (which gets normally stored in 'final_result' table).
16 
17 =head1 LICENSE
18 
19  See the NOTICE file distributed with this work for additional information
20  regarding copyright ownership.
21 
22  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
23  You may obtain a copy of the License at
24 
25  http://www.apache.org/licenses/LICENSE-2.0
26 
27  Unless required by applicable law or agreed to in writing, software distributed under the License
28  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29  See the License for the specific language governing permissions and limitations under the License.
30 
31 =head1 CONTACT
32 
33  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
34 
35 =cut
36 
37 
38 package Bio::EnsEMBL::Hive::Examples::LongMult::RunnableDB::AddTogether;
39 
40 use strict;
41 use warnings;
42 
44 
45 use base ('Bio::EnsEMBL::Hive::Process');
46 
47 
48 =head2 param_defaults
49 
50  Description : Implements param_defaults() interface method of Bio::EnsEMBL::Hive::Process that defines module defaults for parameters.
51 
52 =cut
53 
54 sub param_defaults {
55 
56  return {
57  'intermediate_table_url' => undef, # if defined, take data from there rather than from accu
58  'final_table_url' => '?table_name=final_result', # used by post_healthcheck() to fetch the final result from
59  'partial_product' => { }, # to be used when b_multiplier only contains digits '0' and '1'
60  'take_time' => 0, # how much time run() method will spend in sleeping state
61  };
62 }
63 
64 
65 =head2 fetch_input
66 
67  Description : Implements fetch_input() interface method of Bio::EnsEMBL::Hive::Process that is used to read in parameters and load data.
68  Here all relevant partial products are fetched from the 'partial_product' accumulator and stored in a hash for future use.
69 
70  param('a_multiplier'): The first long number (a string of digits - doesn't have to fit a register).
71 
72  param('b_multiplier'): The second long number (also a string of digits).
73 
74  param('take_time'): How much time to spend sleeping (seconds).
75 
76 =cut
77 
78 sub fetch_input { # fetch all the (relevant) precomputed products
79  my $self = shift @_;
80 
81  my $a_multiplier = $self->param_required('a_multiplier');
82  my $intermediate_table_url = $self->param('intermediate_table_url');
83  my $partial_product;
84 
85  if($intermediate_table_url) { # special compatibility mode, where data is fetched from a given table
86  my $intermediate_table = Bio::EnsEMBL::Hive::TheApiary->find_by_url( $intermediate_table_url, $self->input_job->hive_pipeline );
87  $partial_product = $self->param('partial_product', $intermediate_table->adaptor->fetch_by_a_multiplier_HASHED_FROM_digit_TO_partial_product( $a_multiplier ) );
88  } else {
89  $partial_product = $self->param('partial_product');
90  }
91 
92  $partial_product->{1} = $a_multiplier;
93  $partial_product->{0} = 0;
94 }
95 
96 =head2 run
97 
98  Description : Implements run() interface method of Bio::EnsEMBL::Hive::Process that is used to perform the main bulk of the job (minus input and output).
99  The only thing we do here is make a call to the function that will add together the intermediate results.
100 
101 =cut
102 
103 sub run { # call the function that will compute the stuff
104  my $self = shift @_;
105 
106  my $a_multiplier = $self->param_required('a_multiplier');
107  my $b_multiplier = $self->param_required('b_multiplier');
108  my $partial_product = $self->param('partial_product');
109 
110  $self->param('result', _add_together($a_multiplier, $b_multiplier, $partial_product));
111 
112  sleep( $self->param('take_time') );
113 }
114 
115 
116 =head2 write_output
117 
118  Description : Implements write_output() interface method of Bio::EnsEMBL::Hive::Process that is used to deal with job's output after the execution.
119  Dataflows both original multipliers and the final result down branch-1, which will be routed into 'final_result' table.
120 
121 =cut
122 
123 sub write_output { # store and dataflow
124  my $self = shift @_;
125 
126  $self->dataflow_output_id({
127  'result' => $self->param('result'),
128  }, 1);
129 }
130 
131 
132 =head2 post_healthcheck
133 
134  Description : Implements post_healthcheck() interface method of Bio::EnsEMBL::Hive::Process that is used to healthcheck the result of the job's execution.
135  Here it assumes that the location of the final result has been given through the 'final_table_url' parameter (it can be a partial URL).
136  If the destination of the data changes, make sure you either set this URL correctly or undefine it (in which case the healthcheck will not be run).
137 
138 =cut
139 
140 sub post_healthcheck {
141  my $self = shift @_;
142 
143  my $a_multiplier = $self->param_required('a_multiplier');
144  my $b_multiplier = $self->param_required('b_multiplier');
145  my $final_result;
146  my $location_desc;
147 
148  if( my $final_table_url = $self->param('final_table_url') ) {
149 
150  my $final_table = Bio::EnsEMBL::Hive::TheApiary->find_by_url( $final_table_url, $self->input_job->hive_pipeline );
151  $final_result = $final_table->adaptor->fetch_by_a_multiplier_AND_b_multiplier_TO_result( $a_multiplier, $b_multiplier);
152  $location_desc = "stored in '$final_table_url' table";
153  } else {
154  $final_result = $self->param('result');
155  $location_desc = "not stored";
156  }
157 
158  my $correct_or_not = ($a_multiplier * $b_multiplier == $final_result) ? 'CORRECT' : 'INCORRECT';
159 
160  $self->warning("The result ($location_desc) for ${a_multiplier} x ${b_multiplier} is $final_result, this result is $correct_or_not");
161 }
162 
163 
164 =head2 _add_together
165 
166  Description: this is a private function (not a method) that adds all the products with a shift
167 
168 =cut
169 
170 sub _add_together {
171  my ($a_multiplier, $b_multiplier, $partial_product) = @_;
172 
173  my @accu = ();
174 
175  my @b_digits = reverse split(//, $b_multiplier);
176  foreach my $b_index (0..(@b_digits-1)) {
177  my $b_digit = $b_digits[$b_index];
178  my $product = $partial_product->{$b_digit};
179 
180  die "The partial product of $a_multiplier x $b_digit has not arrived at AddTogether stage - please check your wiring!" unless(defined($product));
181 
182  my @p_digits = reverse split(//, $product);
183  foreach my $p_index (0..(@p_digits-1)) {
184  $accu[$b_index+$p_index] += $p_digits[$p_index];
185  }
186  }
187 
188  foreach my $a_index (0..(@accu-1)) {
189  my $a_digit = $accu[$a_index];
190  my $carry = int($a_digit/10);
191  $accu[$a_index] = $a_digit % 10;
192  $accu[$a_index+1] += $carry;
193  }
194 
195  # get rid of the leading zero
196  unless($accu[@accu-1]) {
197  pop @accu;
198  }
199 
200  return join('', reverse @accu);
201 }
202 
203 1;
204 
Bio::EnsEMBL::Hive::Process
Definition: Process.pm:77
run
public run()
Bio::EnsEMBL::Hive::TheApiary::find_by_url
public find_by_url()
Bio::EnsEMBL::Hive::Examples::LongMult::PipeConfig::LongMult_conf
Definition: LongMult_conf.pm:47
Bio::EnsEMBL::Hive::TheApiary
Definition: TheApiary.pm:16
Bio::EnsEMBL::Hive::Examples::LongMult::RunnableDB::AddTogether
Definition: AddTogether.pm:21
Bio
Definition: AltAlleleGroup.pm:4