ensembl-hive  2.7.0
LongMultClient_conf.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 SYNOPSIS
8 
9  # initialize the "server" database first and note its URL - you will need it to initialize the "client" later:
11 
12  # initialize the "client" database by plugging the server's URL:
13  init_pipeline.pl Bio::EnsEMBL::Hive::Examples::LongMult::PipeConfig::LongMultClient_conf -password <mypass> -server_url $SERVER_HIVE_URL
14 
15  # optionally also seed it with your specific values:
16  seed_pipeline.pl -url $CLIENT_HIVE_URL -logic_name take_b_apart -input_id '{ "a_multiplier" => "12345678", "b_multiplier" => "3359559666" }'
17 
18  # run the first analysis of the "Client" in order to seed the first jobs into the "Server" pipeline
19  runWorker.pl -url $CLIENT_HIVE_URL
20 
21  # run the "Server" (it will exit when all its jobs are done)
22  beekeeper.pl -url $SERVER_HIVE_URL -loop_until NO_WORK
23 
24  # run the "Client" (it will exit when all its jobs are done)
25  beekeeper.pl -url $CLIENT_HIVE_URL -loop_until NO_WORK
26 
27 =head1 DESCRIPTION
28 
29  This is the "Client" PipeConfig file of a special two-part version of the long multiplication example pipeline.
30  Please make sure you FULLY understand how the LongMult_conf works before trying this one.
31 
32  We have split the original LongMult_conf into two parts, the "Client" and the "Server" that can be used to initialize
33  two separate Hive pipeline databases.
34 
35  The "Client" kept all the original analyses and the 'final_result' table, but prefers to delegate some of the jobs on the "Server" side.
36  So the "Server" has its own 'part_multiply' to do some of the multiplication work, and its own 'add_together' and the 'final_result'
37  table to do some of the final additions.
38 
39  The link between the pipelines is established via the -server_url command line flag that is passed to the Client database.
40  Thanks to the support of cross-database semaphores we no longer need to depend on static tables, and use cross-database accumulators
41  for returning the data, whether from a local or a remote fan (in this example we have a mix).
42 
43 =head1 LICENSE
44 
45  See the NOTICE file distributed with this work for additional information
46  regarding copyright ownership.
47 
48  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
49  You may obtain a copy of the License at
50 
51  http://www.apache.org/licenses/LICENSE-2.0
52 
53  Unless required by applicable law or agreed to in writing, software distributed under the License
54  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
55  See the License for the specific language governing permissions and limitations under the License.
56 
57 =head1 CONTACT
58 
59  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
60 
61 =cut
62 
63 
64 package Bio::EnsEMBL::Hive::Examples::LongMult::PipeConfig::LongMultClient_conf;
65 
66 use strict;
67 use warnings;
68 
69 use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf'); # All Hive databases configuration files should inherit from HiveGeneric, directly or indirectly
70 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf; # Allow this particular config to use conditional dataflow and INPUT_PLUS
71 
72 
73 
74 =head2 pipeline_create_commands
75 
76  Description : Implements pipeline_create_commands() interface method of Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that lists the commands that will create and set up the Hive database.
77  In addition to the standard creation of the database and populating it with Hive tables and procedures it also creates two pipeline-specific tables used by Runnables to communicate.
78 
79 =cut
80 
81 sub pipeline_create_commands {
82  my ($self) = @_;
83  return [
84  @{$self->SUPER::pipeline_create_commands}, # inheriting database and hive tables' creation
85 
86  # additional tables needed for long multiplication pipeline's operation:
87  $self->db_cmd('CREATE TABLE final_result (a_multiplier varchar(40) NOT NULL, b_multiplier varchar(40) NOT NULL, result varchar(80) NOT NULL, PRIMARY KEY (a_multiplier, b_multiplier))'),
88  ];
89 }
90 
91 
92 =head2 pipeline_wide_parameters
93 
94  Description : Interface method that should return a hash of pipeline_wide_parameter_name->pipeline_wide_parameter_value pairs.
95  The value doesn't have to be a scalar, can be any Perl structure now (will be stringified and de-stringified automagically).
96  Please see existing PipeConfig modules for examples.
97 
98 =cut
99 
100 sub pipeline_wide_parameters {
101  my ($self) = @_;
102  return {
103  %{$self->SUPER::pipeline_wide_parameters}, # here we inherit anything from the base class
104 
105  'take_time' => 1,
106  };
107 }
108 
109 
110 sub pipeline_analyses {
111  my ($self) = @_;
112  return [
113  # the factory:
114  { -logic_name => 'take_b_apart',
116  -meadow_type=> 'LOCAL', # do not bother the farm with such a simple task (and get it done faster)
117  -analysis_capacity => 2, # use per-analysis limiter
118  -input_ids => [
119  { 'a_multiplier' => '9650156169', 'b_multiplier' => '327358788' },
120  { 'a_multiplier' => '327358788', 'b_multiplier' => '9650156169' },
121  ],
122  -flow_into => {
123  '2->A' => WHEN(
124  # some fan jobs are outsourced to the "Server":
125  '#digit#>5' => { $self->o('server_url').'?logic_name=part_multiply'
126  => INPUT_PLUS( { 'digit' => '#digit#', 'take_time' => '#take_time#' } ),
127  },
128 
129  # some are run locally:
130  '#digit#>1 && #digit#<=5' => { 'part_multiply' => INPUT_PLUS() },
131  ),
132 
133  'A->1' => WHEN (
134  # some funnels are outsourced to the "Server":
135  '#a_multiplier#>=#b_multiplier#' => $self->o('server_url').'?logic_name=add_together',
136 
137  # and some are run locally:
138  ELSE 'add_together',
139  ),
140  },
141  },
142 
143  # the "Client"-side fan analysis (performs multiplication by lower digits)
144  { -logic_name => 'part_multiply',
146  -analysis_capacity => 4, # use per-analysis limiter
147  -flow_into => {
148  1 => '?accu_name=partial_product&accu_address={digit}&accu_input_variable=product',
149  },
150  },
151 
152  # the "Client"-side funnel:
153  { -logic_name => 'add_together',
155  -flow_into => {
156  1 => '?table_name=final_result',
157  },
158  },
159  ];
160 }
161 
162 1;
163 
Bio::EnsEMBL::Hive::Examples::LongMult::RunnableDB::DigitFactory
Definition: DigitFactory.pm:25
Bio::EnsEMBL::Hive::Examples::LongMult::PipeConfig::LongMultServer_conf
Definition: LongMultServer_conf.pm:47
Bio::EnsEMBL::Hive::Examples::LongMult::RunnableDB::PartMultiply
Definition: PartMultiply.pm:20
Bio::EnsEMBL::Hive::Examples::LongMult::PipeConfig::LongMultClient_conf
Definition: LongMultClient_conf.pm:47
Bio::EnsEMBL::Hive::Version
Definition: Version.pm:19
Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf
Definition: HiveGeneric_conf.pm:54
Bio::EnsEMBL::Hive
Definition: Hive.pm:38
Bio::EnsEMBL::Hive::Examples::LongMult::RunnableDB::AddTogether
Definition: AddTogether.pm:21
Bio
Definition: AltAlleleGroup.pm:4