ensembl-hive  2.8.1
PCL.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 DESCRIPTION
8 
9  This module deals with parsing pipeline configuration files written in Perl-based "PipeConfig Language"
10 
11 =head1 LICENSE
12 
13  See the NOTICE file distributed with this work for additional information
14  regarding copyright ownership.
15 
16  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
17  You may obtain a copy of the License at
18 
19  http://www.apache.org/licenses/LICENSE-2.0
20 
21  Unless required by applicable law or agreed to in writing, software distributed under the License
22  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23  See the License for the specific language governing permissions and limitations under the License.
24 
25 =head1 CONTACT
26 
27  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
28 
29 =cut
30 
31 
32 package Bio::EnsEMBL::Hive::Utils::PCL;
33 
34 use strict;
35 use warnings;
36 
37 use Exporter 'import';
38 our @EXPORT = qw(WHEN ELSE INPUT_PLUS);
39 
40 use Bio::EnsEMBL::Hive::Utils ('stringify');
42 
43 
44 our $cond_group_marker = 'CONDitionGRoup';
45 
46 sub WHEN {
47  return [ $cond_group_marker, @_ ];
48 }
49 
50 
51 sub ELSE ($) {
52  my ($foo) = @_;
53 
54  return (undef, $foo);
55 }
56 
57 
58 sub INPUT_PLUS {
59  my $template = shift @_ // '';
60 
61  return '+'.(ref($template) ? stringify($template) : $template);
62 }
63 
64 
65 sub parse_wait_for {
66  my ($pipeline, $ctrled_analysis, $wait_for, $verbose) = @_;
67 
68  $wait_for ||= [];
69  $wait_for = [ $wait_for ] unless(ref($wait_for) eq 'ARRAY'); # force scalar into an arrayref
70 
71  # create control rules:
72  foreach my $condition_url (@$wait_for) {
73  if($condition_url =~ m{^\w+$}) {
74  # Just a warning because analyses can be added later
75  my $condition_analysis = $pipeline->collection_of('Analysis')->find_one_by('logic_name', $condition_url)
76  or warn "WARNING: Could not find a local analysis '$condition_url' to create a control rule (in '".($ctrled_analysis->logic_name)."')\n";
77  } else {
78  # URLs that can't be parsed won't magically become parsable, so this is an error
79  my $url_hash = Bio::EnsEMBL::Hive::Utils::URL::parse($condition_url)
80  or die "ERROR: Could not parse the URL '$condition_url' to create a control rule (in '".($ctrled_analysis->logic_name)."')\n";
81  # Now check that the target is actually an analysis
82  if ((not exists $url_hash->{'query_params'}) or ($url_hash->{'query_params'}->{'object_type'} ne 'Analysis')) {
83  die "ERROR: The URL '$condition_url' does not refer to an Analysis (to create a control rule in '".($ctrled_analysis->logic_name)."')\n";
84  }
85  }
86  my ($c_rule) = $pipeline->add_new_or_update( 'AnalysisCtrlRule', $verbose, # NB: add_new_or_update returns a list
87  'condition_analysis_url' => $condition_url,
88  'ctrled_analysis' => $ctrled_analysis,
89  );
90  }
91 }
92 
93 
94 sub parse_flow_into {
95  my ($pipeline, $from_analysis, $flow_into, $verbose) = @_;
96 
97  $flow_into = { 1 => $flow_into } unless(ref($flow_into) eq 'HASH'); # force non-hash into a hash
98 
99  my %group_tag_to_funnel_dataflow_rule = ();
100 
101  my $semaphore_sign = '->';
102 
103  my @all_branch_tags = sort keys %$flow_into;
104  foreach my $branch_tag ((grep {/^[A-Z]$semaphore_sign/} @all_branch_tags), (grep {/$semaphore_sign[A-Z]$/} @all_branch_tags), (grep {!/$semaphore_sign/} @all_branch_tags)) {
105 
106  my ($branch_name_or_code, $group_role, $group_tag);
107 
108  if($branch_tag=~/^([A-Z])$semaphore_sign(-?\w+)$/) {
109  ($branch_name_or_code, $group_role, $group_tag) = ($2, 'funnel', $1);
110  } elsif($branch_tag=~/^(-?\w+)$semaphore_sign([A-Z])$/) {
111  ($branch_name_or_code, $group_role, $group_tag) = ($1, 'fan', $2);
112  } elsif($branch_tag=~/^(-?\w+)$/) {
113  ($branch_name_or_code, $group_role, $group_tag) = ($1, '');
114  } elsif($branch_tag=~/:/) {
115  die "Please use newer '2${semaphore_sign}A' and 'A${semaphore_sign}1' notation instead of '2:1' and '1'\n";
116  } else {
117  die "Error parsing the group tag '$branch_tag'\n";
118  }
119 
120  my $funnel_dataflow_rule = undef; # NULL by default
121 
122  if($group_role eq 'fan') {
123  unless($funnel_dataflow_rule = $group_tag_to_funnel_dataflow_rule{$group_tag}) {
124  die "No funnel dataflow_rule defined for group '$group_tag'\n";
125  }
126  }
127 
128  my $pre_cond_groups = $flow_into->{$branch_tag};
129 
130  # [first pass] force pre_cond_groups into a list:
131  if( !ref($pre_cond_groups) # a scalar (a single target)
132  or (ref($pre_cond_groups) eq 'HASH') # a hash (a combination of targets with templates)
133  or ((ref($pre_cond_groups) eq 'ARRAY') and @$pre_cond_groups and !ref($pre_cond_groups->[0]) and ($pre_cond_groups->[0] eq $cond_group_marker)) # a single WHEN group
134  ) {
135  $pre_cond_groups = [ $pre_cond_groups ];
136  }
137 
138  my @uniform_cond_groups = ();
139 
140  # [second pass] rework them into a true list of WHEN-groups:
141  foreach my $pre_group (@$pre_cond_groups) {
142  if( !ref($pre_group) ) { # wrap the scalar:
143  push @uniform_cond_groups, WHEN( ELSE( $pre_group ));
144  } elsif( ref($pre_group) eq 'HASH') { # break up the hash and wrap the parts:
145  while(my ($target, $templates) = each %$pre_group) {
146  $templates = [$templates] unless(ref($templates) eq 'ARRAY');
147  push @uniform_cond_groups, map { WHEN( ELSE( { $target => $_ } )) } @$templates;
148  }
149  } else { # keep the WHEN groups unchanged
150  push @uniform_cond_groups, $pre_group;
151  }
152  }
153 
154  foreach my $cond_group (@uniform_cond_groups) {
155 
156  unless(ref($cond_group) eq 'ARRAY') {
157  use Data::Dumper;
158  die "Expecting ARRAYref, but got ".Dumper($cond_group)." instead.";
159  }
160  # chop the condition group marker off:
161  my $this_cond_group_marker = shift @$cond_group;
162  die "Expecting $cond_group_marker, got $this_cond_group_marker" unless($this_cond_group_marker eq $cond_group_marker);
163 
164  my $suspended_targets = [];
165  while(@$cond_group) {
166  my $on_condition = shift @$cond_group;
167  my $heirs = shift @$cond_group;
168 
169  # force anything else to the common denominator format:
170  $heirs = [ $heirs ] unless(ref($heirs));
171  $heirs = { map { ($_ => undef) } @$heirs } if(ref($heirs) eq 'ARRAY');
172 
173  foreach my $heir_url (sort keys %$heirs) {
174  my $input_id_template_list = $heirs->{$heir_url};
175 
176  if($heir_url =~ m{^\w+$}) {
177  # Just a warning because analyses can be added later
178  my $heir_analysis = $pipeline->collection_of('Analysis')->find_one_by('logic_name', $heir_url)
179  or warn "WARNING: Could not find a local analysis named '$heir_url' (dataflow from analysis '".($from_analysis->logic_name)."')\n";
180  } else {
181  # URLs that can't be parsed won't magically become parsable, so this is an error
182  my $url_hash = Bio::EnsEMBL::Hive::Utils::URL::parse($heir_url)
183  or die "ERROR: Could not parse the URL '$heir_url' (dataflow from analysis '".($from_analysis->logic_name)."'";
184  }
185 
186  $input_id_template_list = [ $input_id_template_list ] unless(ref($input_id_template_list) eq 'ARRAY'); # allow for more than one template per analysis
187 
188  foreach my $input_id_template (@$input_id_template_list) {
189 
190  my $template_string = (ref($input_id_template) ? stringify($input_id_template) : $input_id_template);
191  my $extend_param_stack = ($template_string && $template_string=~s/^\+(.*)$/$1/) ? 1 : 0;
192 
193  my ($df_target) = $pipeline->add_new_or_update( 'DataflowTarget', $verbose, # NB: add_new_or_update returns a list
194  'source_dataflow_rule' => undef, # NB: had to create the "suspended targets" to break the dependence circle
195  'on_condition' => $on_condition,
196  'input_id_template' => $template_string,
197  'extend_param_stack' => $extend_param_stack,
198  'to_analysis_url' => $heir_url,
199  );
200  push @$suspended_targets, $df_target;
201 
202  } # /for all templates
203  } # /for all heirs
204  } # /for each condition and heir
205 
206  my ($df_rule, $df_rule_is_new) = $pipeline->add_new_or_update( 'DataflowRule', $verbose, # NB: add_new_or_update returns a list
207  'from_analysis' => $from_analysis,
208  'branch_code' => $branch_name_or_code,
209  'funnel_dataflow_rule' => $funnel_dataflow_rule,
210  'unitargets' => Bio::EnsEMBL::Hive::DataflowRule::_compute_unitargets($suspended_targets),
211 # 'unitargets' => $suspended_targets,
212  );
213 
214  if( $df_rule_is_new ) {
215  foreach my $suspended_target (@$suspended_targets) {
216  $suspended_target->source_dataflow_rule( $df_rule );
217  }
218  } else {
219  foreach my $suspended_target (@$suspended_targets) {
220  $pipeline->collection_of('DataflowTarget')->forget( $suspended_target );
221  }
222  }
223 
224  if($group_role eq 'funnel') {
225  if($group_tag_to_funnel_dataflow_rule{$group_tag}) {
226  die "More than one funnel dataflow_rule defined for group '$group_tag'\n";
227  } else {
228  $group_tag_to_funnel_dataflow_rule{$group_tag} = $df_rule;
229  }
230  }
231  } # /foreach $cond_group
232 
233  } # /for all branch_tags
234 }
235 
236 1;
Bio::EnsEMBL::Hive::Utils
Definition: Collection.pm:4
Bio::EnsEMBL::Hive::Utils::URL
Definition: URL.pm:11
Bio::EnsEMBL::Hive::Utils::URL::parse
public parse()
map
public map()
Bio::EnsEMBL::Hive::DataflowRule::_compute_unitargets
protected _compute_unitargets()
Bio::EnsEMBL::Hive::Version
Definition: Version.pm:19
Bio::EnsEMBL::Hive::Utils::PCL
Definition: PCL.pm:12
Bio::EnsEMBL::Hive
Definition: Hive.pm:38