ensembl-hive  2.6
Valley.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 DESCRIPTION
8 
9  A Valley represents a collection of available Meadows.
10 
11  Certain methods fit better with the concept of Valley -
12  such as identifying all dead workers, or killing a particular one given worker_id.
13 
14 =head1 LICENSE
15 
16  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
17  Copyright [2016-2024] EMBL-European Bioinformatics Institute
18 
19  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
20  You may obtain a copy of the License at
21 
22  http://www.apache.org/licenses/LICENSE-2.0
23 
24  Unless required by applicable law or agreed to in writing, software distributed under the License
25  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  See the License for the specific language governing permissions and limitations under the License.
27 
28 =head1 CONTACT
29 
30  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
31 
32 =cut
33 
34 
35 package Bio::EnsEMBL::Hive::Valley;
36 
37 use strict;
38 use warnings;
39 use List::Util ('sum');
40 use Sys::Hostname ('hostname');
41 use Bio::EnsEMBL::Hive::Utils ('find_submodules', 'whoami');
43 
44 use base ('Bio::EnsEMBL::Hive::Configurable');
45 
46 
47 sub meadow_class_path {
48 
49  return 'Bio::EnsEMBL::Hive::Meadow';
50 }
51 
52 
53 our $_loaded_meadow_drivers;
54 
55 sub loaded_meadow_drivers {
56 
57  unless( $_loaded_meadow_drivers ) {
58  foreach my $meadow_class (@{ $_loaded_meadow_drivers = Bio::EnsEMBL::Hive::Utils::find_submodules( meadow_class_path() ) }) {
59  eval "require $meadow_class";
60  die $@ if($@); # Even if the Meadow is unavailable, we still expect all the drivers that are in the path to compile correctly.
61  }
62  }
63  return $_loaded_meadow_drivers;
64 }
65 
66 
67 sub new {
68  my ($class, $config, $default_meadow_type, $pipeline_name) = @_;
69 
70  my $self = bless {}, $class;
71 
72  $self->config( $config );
73  $self->context( [ 'Valley' ] );
74 
75  my $amh = $self->available_meadow_hash( {} );
76 
77  # make sure modules are loaded and available ones are checked prior to setting the current one:
78  foreach my $meadow_class (@{ $self->loaded_meadow_drivers }) {
79 
80  if( $meadow_class->check_version_compatibility
81  and (my $name = $meadow_class->name)) { # the assumption is if we can get a name, it is available
82 
83  my $meadow_object = $meadow_class->new( $config, $name );
84 
85  $meadow_object->pipeline_name( $pipeline_name ) if($pipeline_name);
86 
87  $amh->{$meadow_class->type} = $meadow_object;
88  }
89  }
90 
91  $self->set_default_meadow_type($default_meadow_type); # run this method even if $default_meadow_type was not specified
92 
93  return $self;
94 }
95 
96 
97 sub available_meadow_hash {
98  my $self = shift @_;
99 
100  if(@_) {
101  $self->{_available_meadow_hash} = shift @_;
102  }
103  return $self->{_available_meadow_hash};
104 }
105 
106 
107 sub get_available_meadow_list { # this beautiful one-liner pushes $local to the bottom of the list
108  my $self = shift @_;
109 
110  my $local = $self->meadow_class_path . '::LOCAL';
111 
112  return [ sort { (ref($a) eq $local) or -(ref($b) eq $local) } values %{ $self->available_meadow_hash } ];
113 }
114 
115 
116 sub set_default_meadow_type {
117  my ($self, $default_meadow_type) = @_;
118 
119  if($default_meadow_type) {
120  if( my $default_meadow = $self->available_meadow_hash->{$default_meadow_type} ) { # store if available
121  $self->{_default_meadow} = $default_meadow;
122  } else {
123  die "Meadow '$default_meadow_type' does not seem to be available on this machine, please investigate";
124  }
125  } else {
126  $self->{_default_meadow} = $self->get_available_meadow_list->[0]; # take the first from preference list
127  }
128 }
129 
130 
131 sub get_default_meadow {
132  my $self = shift @_;
133 
134  return $self->{_default_meadow};
135 }
136 
137 
138 sub find_available_meadow_responsible_for_worker {
139  my ($self, $worker) = @_;
140 
141  if( my $meadow = $self->available_meadow_hash->{$worker->meadow_type} ) {
142  if($meadow->cached_name eq $worker->meadow_name) {
143  return $meadow;
144  }
145  }
146  return undef;
147 }
148 
149 
150 sub whereami {
151  my $self = shift @_;
152 
153  my $meadow_user = Bio::EnsEMBL::Hive::Utils::whoami();
154 
155  foreach my $meadow (@{ $self->get_available_meadow_list }) {
156  my $pid;
157  my $meadow_host;
158  eval {
159  # get_current_worker_process_id() is expected to die if the pid
160  # cannot be determined. With the eval{} and the unless{} it will
161  # skip the meadow and try the next one.
162  $pid = $meadow->get_current_worker_process_id();
163  $meadow_host = $meadow->get_current_hostname();
164  };
165  unless($@) {
166  return ($meadow, $pid, $meadow_host, $meadow_user);
167  }
168  }
169  die "Could not determine the Meadow, please investigate";
170 }
171 
172 
173 sub generate_limiters {
174  my ($self, $reconciled_worker_statuses) = @_;
175 
176  my $valley_running_worker_count = 0;
177  my %meadow_capacity_limiter_hashed_by_type = ();
178 
179  foreach my $meadow (@{ $self->get_available_meadow_list }) {
180  my $this_worker_count = scalar( @{ $reconciled_worker_statuses->{ $meadow->signature }{ 'RUN' } || [] } );
181 
182  $valley_running_worker_count += $this_worker_count;
183 
184  my $available_worker_slots = defined($meadow->config_get('TotalRunningWorkersMax'))
185  ? $meadow->config_get('TotalRunningWorkersMax') - $this_worker_count
186  : undef;
187 
188  # so the hash will contain limiters for every meadow_type, but not all of them active:
189  $meadow_capacity_limiter_hashed_by_type{ $meadow->type } = Bio::EnsEMBL::Hive::Limiter->new( "Number of workers in '".$meadow->signature."' meadow", $available_worker_slots );
190  }
191 
192  return ($valley_running_worker_count, \%meadow_capacity_limiter_hashed_by_type);
193 }
194 
195 
196 =head2 query_worker_statuses
197 
198  Arg[1] : Hashref {meadow_type}{meadow_name}{meadow_user}{process_id} => $db_status
199  Output : Hashref {meadow_signature}{meadow_status} => [process_ids]
200 
201  Description : Queries the available meadows to get the (meadow) status of the given workers
202 
203 =cut
204 
205 sub query_worker_statuses {
206  my ($self, $db_registered_workers_from_all_meadows_deemed_alive) = @_;
207 
208  my %reconciled_worker_statuses = ();
209 
210  foreach my $meadow (@{ $self->get_available_meadow_list }) { # only go through the available meadows
211  my $db_registered_workers_this_meadow = $db_registered_workers_from_all_meadows_deemed_alive->{$meadow->type}{$meadow->cached_name};
212  my $involved_users = [keys %$db_registered_workers_this_meadow];
213 
214  next unless @$involved_users;
215 
216  my %meadow_seen_worker_status = map { ( $_->[0] => $_->[2] ) } @{ $meadow->status_of_all_our_workers( $involved_users ) };
217 
218  my $worker_statuses_of_this_meadow = $reconciled_worker_statuses{ $meadow->signature } = {}; # manually vivify every Meadow's subhash
219 
220  while(my ($meadow_user, $db_user_subhash) = each %$db_registered_workers_this_meadow) { # start the reconciliation from the DB view and check it against Meadow view
221  while(my ($worker_pid, $db_worker_status) = each %$db_user_subhash) {
222  my $combined_status = $meadow_seen_worker_status{$worker_pid}
223  // ( ($db_worker_status=~/^(?:SUBMITTED|DEAD)$/) ? $db_worker_status : 'LOST' );
224 
225  push @{ $worker_statuses_of_this_meadow->{ $combined_status } }, $worker_pid;
226  }
227  }
228  }
229  return \%reconciled_worker_statuses;
230 }
231 
232 
233 sub status_of_all_our_workers_by_meadow_signature {
234  my ($self, $reconciled_worker_statuses) = @_;
235 
236  my %signature_and_pid_to_worker_status = ();
237  foreach my $meadow (@{ $self->get_available_meadow_list }) {
238  my $meadow_signature = $meadow->signature;
239  $signature_and_pid_to_worker_status{ $meadow_signature } = {};
240 
241  my $status_2_pid_list = $reconciled_worker_statuses->{ $meadow_signature };
242  while(my ($status, $pid_list) = each %$status_2_pid_list) {
243  $signature_and_pid_to_worker_status{$meadow_signature}{$_} = $status for @$pid_list;
244  }
245  }
246  return \%signature_and_pid_to_worker_status;
247 }
248 
249 
250 1;
Bio::EnsEMBL::Hive::Utils
Definition: Collection.pm:4
Bio::EnsEMBL::Hive::Limiter::new
public new()
map
public map()
Bio::EnsEMBL::Hive::Version
Definition: Version.pm:19
Bio::EnsEMBL::Hive::Meadow
Definition: Meadow.pm:12
Bio::EnsEMBL::Hive::Configurable::config
public config()
Bio::EnsEMBL::Hive::Limiter
Definition: Limiter.pm:10
Bio::EnsEMBL::Hive::Utils::whoami
public whoami()
Bio::EnsEMBL::Hive::Utils::find_submodules
public find_submodules()
run
public run()
Bio::EnsEMBL::Hive
Definition: Hive.pm:38
Bio::EnsEMBL::Hive::Valley
Definition: Valley.pm:16