ensembl-hive  2.7.0
TableDumperZipper_conf.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 SYNOPSIS
8 
9  init_pipeline.pl Bio::EnsEMBL::Hive::PipeConfig::TableDumperZipper_conf -password $ENSADMIN_PSW -db_conn "mysql://ensadmin:${ENSADMIN_PSW}@localhost/lg4_long_mult"
10 
11  seed_pipeline.pl -url "mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables" -logic_name find_tables -input_id "{'only_tables' => '%_result'}"
12 
13  runWorker.pl -url mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables
14  runWorker.pl -url mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables
15  runWorker.pl -url mysql://ensadmin:${ENSADMIN_PSW}@localhost:3306/lg4_zip_tables
16 
17 =head1 DESCRIPTION
18 
19  This is an example pipeline put together from three analyses (with pre-existing Runnables) :
20 
21  Analysis_1: JobFactory.pm is used to turn the list of tables of the given database into jobs
22 
23  these jobs are sent down the branch #2 into the second analysis
24 
25  Analysis_2: SystemCmd.pm is used to dump individual tables; each flows via branch #1 into Analysis_3
26 
27  Analysis_3: another instance of SystemCmd.pm is used to compress an individual table dump file
28 
29 =head1 LICENSE
30 
31  See the NOTICE file distributed with this work for additional information
32  regarding copyright ownership.
33 
34  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
35  You may obtain a copy of the License at
36 
37  http://www.apache.org/licenses/LICENSE-2.0
38 
39  Unless required by applicable law or agreed to in writing, software distributed under the License
40  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
41  See the License for the specific language governing permissions and limitations under the License.
42 
43 =head1 CONTACT
44 
45  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
46 
47 =cut
48 
49 
50 package Bio::EnsEMBL::Hive::Examples::DbCmd::PipeConfig::TableDumperZipper_conf;
51 
52 use strict;
53 use warnings;
54 
55 use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf'); # All Hive databases configuration files should inherit from HiveGeneric, directly or indirectly
56 
57 
58 =head2 pipeline_wide_parameters
59 
60  Description : Interface method that should return a hash of pipeline_wide_parameter_name->pipeline_wide_parameter_value pairs.
61  The value doesn't have to be a scalar, can be any Perl structure (will be stringified and de-stringified automagically).
62 
63 =cut
64 
65 sub pipeline_wide_parameters {
66  my ($self) = @_;
67  return {
68  %{$self->SUPER::pipeline_wide_parameters}, # here we inherit anything from the base class, then add our own stuff
69 
70  'db_conn' => $self->o('db_conn'),
71  'dumping_flag' => '-t', # '-t' for "dump without table definition" or undef for "dump with table definition"
72  'directory' => '.', # directory where both source and target files are located
73  'matching_op' => 'LIKE', # 'LIKE' or 'NOT LIKE'
74  'only_tables' => '%', # any wildcard understood by MySQL
75  };
76 }
77 
78 
79 =head2 pipeline_analyses
80 
81  Description : Implements pipeline_analyses() interface method of Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf that defines the structure of the pipeline: analyses, jobs, rules, etc.
82  Here it defines two analyses:
83 
84  * 'find_tables' generates a list of tables whose names match the pattern #only_tables#
85  Each job of this analysis will dataflow (create jobs) via branch #2 into 'table_dumper' analysis.
86 
87  * 'table_dumper' dumps table contents (possibly with table definition) and flows via branch #1 into 'file_compressor' analysis.
88 
89  * 'file_compressor' compresses the dump file
90 
91 =cut
92 
93 sub pipeline_analyses {
94  my ($self) = @_;
95  return [
96  { -logic_name => 'find_tables',
98  -parameters => {
99  'inputquery' => 'SHOW TABLE STATUS WHERE Name #matching_op# "#only_tables#"',
100  },
101  -flow_into => {
102  2 => { 'table_dumper' => { 'table_name' => '#Name#' }, },
103  },
104  },
105 
106  { -logic_name => 'table_dumper',
108  -parameters => {
109  'output_file' => '#directory#/#table_name#.sql',
110  'executable' => 'mysqldump',
111  'append' => ['#dumping_flag#', '#table_name#'],
112  },
113  -analysis_capacity => 2,
114  -flow_into => {
115  1 => [ 'file_compressor' ],
116  },
117  },
118 
119  { -logic_name => 'file_compressor',
121  -parameters => {
122  'filename' => '#directory#/#table_name#.sql',
123  'cmd' => 'gzip #filename#',
124  },
125  -analysis_capacity => 8,
126  },
127  ];
128 }
129 
130 1;
131 
Bio::EnsEMBL::Hive::Examples::DbCmd::PipeConfig::TableDumperZipper_conf
Definition: TableDumperZipper_conf.pm:19
Bio::EnsEMBL::Hive::RunnableDB::SystemCmd
Definition: SystemCmd.pm:31
Bio::EnsEMBL::Hive::Version
Definition: Version.pm:19
Bio::EnsEMBL::Hive
Definition: Hive.pm:38
Bio::EnsEMBL::Hive::RunnableDB::DbCmd
Definition: DbCmd.pm:19
Bio::EnsEMBL::Hive::RunnableDB::JobFactory
Definition: JobFactory.pm:35