ensembl-hive  2.6
DockerSwarm.pm
Go to the documentation of this file.
1 =pod
2 
3 =head1 NAME
4 
6 
7 =head1 DESCRIPTION
8 
9  This is the implementation of Meadow for a Swarm or Docker Engines
10 
11 =head1 LICENSE
12 
13  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
14  Copyright [2016-2023] EMBL-European Bioinformatics Institute
15 
16  Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
17  You may obtain a copy of the License at
18 
19  http://www.apache.org/licenses/LICENSE-2.0
20 
21  Unless required by applicable law or agreed to in writing, software distributed under the License
22  is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23  See the License for the specific language governing permissions and limitations under the License.
24 
25 =head1 CONTACT
26 
27  Please subscribe to the Hive mailing list: http://listserver.ebi.ac.uk/mailman/listinfo/ehive-users to discuss Hive-related questions or to be notified of our updates
28 
29 =cut
30 
31 
32 package Bio::EnsEMBL::Hive::Meadow::DockerSwarm;
33 
34 use strict;
35 use warnings;
36 use Cwd ('cwd');
37 use Bio::EnsEMBL::Hive::Utils ('destringify', 'split_for_bash', 'stringify');
38 
39 use base ('Bio::EnsEMBL::Hive::Meadow', 'Bio::EnsEMBL::Hive::Utils::RESTclient');
40 
41 
42 our $VERSION = '5.2'; # Semantic version of the Meadow interface:
43  # change the Major version whenever an incompatible change is introduced,
44  # change the Minor version whenever the interface is extended, but compatibility is retained.
45 
46 sub construct_base_url {
47  my $dma = $ENV{'DOCKER_MASTER_ADDR'};
48  return $dma && "http://$dma/v1.30";
49 }
50 
51 
52 sub new {
53  my $class = shift @_;
54 
55  my $self = $class->SUPER::new( @_ ); # First construct a Meadow
56  $self->base_url( $class->construct_base_url // '' ); # Then initialise the RESTclient extension
57  $self->{_DOCKER_MASTER_ADDR} = $ENV{'DOCKER_MASTER_ADDR'}; # saves the location of the manager node
58 
59  return $self;
60 }
61 
62 
63 sub name { # also called to check for availability
64  my ($self) = @_;
65 
66  my $url = '';
67  unless (ref($self)) {
68  # Object instances have defined the base URL in the parent class
69  $url = $self->construct_base_url;
70  return undef unless $url;
71  }
72  $url .= '/swarm';
73 
74  my $swarm_attribs = $self->GET( $url ) || {};
75 
76  return $swarm_attribs->{'ID'};
77 }
78 
79 
80 sub _get_our_task_attribs {
81  my ($self) = @_;
82 
83  return $self->{_task_attribs} if $self->{_task_attribs};
84 
85  # Get the container ID. Although in simple cases, the hostname is the same as
86  # the container ID, it is not always true. So we need to dig into cgroup stuff
87 
88 # # docker node ls
89 # ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
90 # lcprncbmd0z1523t0ft8ej9uy * head-node Ready Active Leader 18.09.0
91 # ksactwapa4nxaaokcj1xw62pr worker-1 Ready Active 18.09.0
92 # wcms6zxgq0hocoutznggs9r0u worker-2 Ready Active 18.09.0
93 # ior43tdjz9x7n4bzzmr5njvcr worker-3 Ready Active 18.09.0
94 # l6khe63f71z3ntv4abii3n9o1 worker-4 Ready Active 18.09.0
95 # nvmy341e4a3sqtt9k3cfdmc7w worker-5 Ready Active 18.09.0
96 # w42pyk5wvoa0qrzbnjhn1yyt7 worker-6 Ready Active 18.09.0
97 # 28h1sk9zwkw53bkv4bi95q2f1 worker-7 Ready Active 18.09.0
98 # rldg2wm4h19oo9cxxrdbpx5n4 worker-8 Ready Active 18.09.0
99 # 72cv6frnei4gjdv3p3l8bmd3c worker-9 Ready Active 18.09.0
100 # u21fny9eapmh09sflk45zzscz worker-10 Ready Active 18.09.0
101 
102 # # cat /proc/self/cgroup
103 #13:name=systemd:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
104 #12:pids:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
105 #11:hugetlb:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
106 #10:net_prio:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
107 #9:perf_event:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
108 #8:net_cls:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
109 #7:freezer:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
110 #6:devices:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
111 #5:memory:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
112 #4:blkio:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
113 #3:cpuacct:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
114 #2:cpu:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
115 #1:cpuset:/docker/c8ecf8b2f3f2a26543971b57fd37205164a19908871d7bd43405914fcd054bfd
116 
117  open(my $fh, '<', '/proc/self/cgroup');
118  my $container_prefix;
119  while (<$fh>) {
120  if (m{:/docker/(.*)$}) {
121  $container_prefix = $1;
122  last;
123  }
124  }
125  # Not running in a container
126  return unless $container_prefix;
127 
128  my $tasks_list = $self->GET( '/tasks' );
129  my ($our_task_attribs) = grep { ($_->{'Status'}{'ContainerStatus'}{'ContainerID'} || '') =~ /^${container_prefix}/ } @$tasks_list;
130  $self->{_task_attribs} = $our_task_attribs;
131 
132  return $self->{_task_attribs};
133 }
134 
135 
136 sub get_current_hostname {
137  my ($self) = @_;
138 
139  my $nodes_list = $self->GET( '/nodes' );
140  my %node_id_2_ip = map { ($_->{'ID'} => $_->{'Status'}{'Addr'}) } @$nodes_list;
141  my $our_node_ip = $node_id_2_ip{ $self->_get_our_task_attribs()->{'NodeID'} };
142 
143  return $our_node_ip;
144 }
145 
146 
147 sub get_current_worker_process_id {
148  my ($self) = @_;
149 
150  my $our_task_id = $self->_get_our_task_attribs()->{'ID'};
151 
152  return $our_task_id;
153 }
154 
155 
156 sub deregister_local_process {
157  my $self = shift @_;
158  # so that the LOCAL child processes don't think they belong to the DockerSwarm meadow
159  delete $ENV{'DOCKER_MASTER_ADDR'};
160 }
161 
162 
163 sub status_of_all_our_workers { # returns an arrayref
164  my ($self) = @_;
165 
166  # my $service_tasks_struct = $self->GET( '/tasks?filters={"name":["' . $service_name . '"]}' );
167  my $service_tasks_struct = $self->GET( '/tasks' );
168 
169  my @status_list = ();
170  foreach my $task_entry (@$service_tasks_struct) {
171  my $slot = $task_entry->{'Slot'}; # an index within the given service
172  my $task_id = $task_entry->{'ID'};
173  my $prestatus = lc $task_entry->{'Status'}{'State'};
174 
175  # Some statuses are explained at https://docs.docker.com/datacenter/ucp/2.2/guides/admin/monitor-and-troubleshoot/troubleshoot-task-state/
176  my $status = {
177  'new' => 'PEND',
178  'pending' => 'PEND',
179  'assigned' => 'PEND',
180  'accepted' => 'PEND',
181  'preparing' => 'RUN',
182  'starting' => 'RUN',
183  'running' => 'RUN',
184  'complete' => 'DONE',
185  'shutdown' => 'DONE',
186  'failed' => 'EXIT',
187  'rejected' => 'EXIT',
188  'orphaned' => 'EXIT',
189  }->{$prestatus} || $prestatus;
190 
191  push @status_list, [ $task_id, 'docker_user', $status ];
192  }
193 
194  return \@status_list;
195 }
196 
197 
198 #sub check_worker_is_alive_and_mine {
199 # my ($self, $worker) = @_;
200 #
201 # my $wpid = $worker->process_id();
202 # my $is_alive_and_mine = kill 0, $wpid;
203 #
204 # return $is_alive_and_mine;
205 #}
206 #
207 #
208 #sub kill_worker {
209 # my ($self, $worker, $fast) = @_;
210 #
211 # system('kill', '-9', $worker->process_id());
212 #}
213 
214 sub type_resources_as_numeric {
215 
216  # In Perl, large numbers would be stringified as strings by stringify
217  # and then JSON. Here we force them to be numeric
218  #
219  # 'Resources' => {
220  # 'Reservations' => {
221  # 'NanoCPUs' => 1000000000,
222  # 'MemoryBytes' => '34359738368'
223  # },
224  # 'Limits' => {
225  # 'NanoCPUs' => 1000000000,
226  # 'MemoryBytes' => '34359738368'
227  # }
228  # }
229  #
230 
231  my $resources = shift;
232 
233  if (exists $resources->{'Reservations'}) {
234  $resources->{'Reservations'}->{'NanoCPUs'} += 0 if exists $resources->{'Reservations'}->{'NanoCPUs'};
235  $resources->{'Reservations'}->{'MemoryBytes'} += 0 if exists $resources->{'Reservations'}->{'MemoryBytes'};
236  }
237  if (exists $resources->{'Limits'}) {
238  $resources->{'Limits'}->{'NanoCPUs'} += 0 if exists $resources->{'Limits'}->{'NanoCPUs'};
239  $resources->{'Limits'}->{'MemoryBytes'} += 0 if exists $resources->{'Limits'}->{'MemoryBytes'};
240  }
241 }
242 
243 
244 sub submit_workers_return_meadow_pids {
245  my ($self, $worker_cmd, $required_worker_count, $iteration, $rc_name, $rc_specific_submission_cmd_args, $submit_log_subdir) = @_;
246 
247  my $worker_cmd_components = [ split_for_bash($worker_cmd) ];
248 
249  my $job_array_common_name = $self->job_array_common_name($rc_name, $iteration);
250 
251  # Name collision detection
252  my $extra_suffix = 0;
253  my $service_name = $job_array_common_name;
254  while (scalar(@{ $self->GET( '/tasks?filters={"name":["' . $service_name . '"]}' ) })) {
255  $extra_suffix++;
256  $service_name = "$job_array_common_name-$extra_suffix";
257  }
258  if ($extra_suffix) {
259  warn "'$job_array_common_name' already used to name a service. Using '$service_name' instead.\n";
260  $job_array_common_name = $service_name;
261  }
262 
263  die "The image name for the ".$self->name." DockerSwarm meadow is not configured. Cannot submit jobs !" unless $self->config_get('ImageName');
264 
265  # If the resource description is missing, use 1 core
266  my $default_resources = {
267  'Reservations' => {
268  'NanoCPUs' => 1_000_000_000,
269  },
270  };
271  my $resources = destringify($rc_specific_submission_cmd_args);
272 
273  my $service_create_data = {
274  'Name' => $job_array_common_name, # NB: service names in DockerSwarm have to be unique!
275  'TaskTemplate' => {
276  'ContainerSpec' => {
277  'Image' => $self->config_get('ImageName'),
278  'Args' => $worker_cmd_components,
279  'Mounts' => $self->config_get('Mounts'),
280  'Env' => [
281  # Propagate these to the workers
282  "DOCKER_MASTER_ADDR=$self->{'_DOCKER_MASTER_ADDR'}",
283  "_EHIVE_HIDDEN_PASS=$ENV{'_EHIVE_HIDDEN_PASS'}",
284  ],
285  },
286  # NOTE: By default, docker alway keeps logs. Should we disable them here
287  # $submit_log_subdir has been set ? There are no options to redirect
288  # the logs, so the option's value would be ignored.
289  #'LogDriver' => {
290  #'Name' => 'none',
291  #},
292  'Resources' => $resources || $default_resources,
293  'RestartPolicy' => {
294  'Condition' => 'none',
295  },
296  },
297  'Mode' => {
298  'Replicated' => {
299  'Replicas' => int($required_worker_count),
300  },
301  },
302  };
303  type_resources_as_numeric($service_create_data->{'TaskTemplate'}->{'Resources'});
304 
305  my $service_created_struct = $self->POST( '/services/create', $service_create_data );
306  unless (exists $service_created_struct->{'ID'}) {
307  die "Submission unsuccessful: " . ($service_created_struct->{'message'} // stringify($service_created_struct)) . "\n";
308  }
309 
310  # Give some time to the Docker daemon to process the request
311  sleep(5);
312 
313  my $service_id = $service_created_struct->{'ID'};
314  my $service_tasks_list = $self->GET( qq{/tasks?filters={"service":["$service_id"]}} );
315  if (scalar(@$service_tasks_list) != int($required_worker_count)) {
316  die "Submission unsuccessful: found " . scalar(@$service_tasks_list) . " tasks instead of " . int($required_worker_count) . "\n";
317  }
318 
319  my @children_task_ids = map { $_->{'ID'} } @$service_tasks_list;
320 
321  return \@children_task_ids;
322 }
323 
324 
325 sub run_on_host { # Overrides Meadow::run_on_host ; not supported yet - it's just a placeholder to block the base class' functionality
326  my ($self, $meadow_host, $meadow_user, $command) = @_;
327 
328  return undef;
329 }
330 
331 1;
Bio::EnsEMBL::Hive::Utils
Definition: Collection.pm:4
map
public map()
Bio::EnsEMBL::Hive::Version
Definition: Version.pm:19
Bio::EnsEMBL::Hive::Meadow
Definition: Meadow.pm:12
Bio::EnsEMBL::Hive
Definition: Hive.pm:38
Bio::EnsEMBL::Hive::Meadow::DockerSwarm
Definition: DockerSwarm.pm:12