3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package EnsCloud::Cmd::Command::SpeciesToVolumes;
23 use Data::Dump qw(dump);
26 use Moose::Util::TypeConstraints;
31 use namespace::autoclean;
32 use IPC::Cmd qw[can_run
run];
34 # use List::Utils qw(first);
35 use Log::Log4perl qw(:easy);
36 with
'MooseX::Log::Log4perl';
37 extends qw(MooseX::App::Cmd::Command);
40 Log::Log4perl->easy_init();
43 # ABSTRACT: list the application's commands
46 return "build ensembl MySQL instances by species";
53 # * This expects a volume containing the Ensembl MYDs attached to the instance (Public Snapshot of 65 dbs = snap-56c9ab32)
54 # - default path is /vols/ensembl_mysql_data
55 # Run like this: ( N.B --base_snapshot is the base AMI with the OS)
57 # ecloud speciestovolumes --base_snapshot snap-e36fde86 --species saccharomyces_cerevisiae (omit --species to do all)
59 # ecloud speciestovolumes --base_snapshot snap-e36fde86 --species homo_sapiens --dbtype variation
61 # * Let it run until completion. then wait until all PENDING snapshots are complete.
62 # Once all PENDING snapshots are complete you will have one snapshot per species
63 # (Do not start process again while there are are still PENDING snapshots)
64 # Now run it again with the same parameters - this will loop the completed snapshots to combine each with the base_image OS
69 # traits => ['Getopt'],
71 # # cmd_aliases => "h",
72 # documentation => "instances volumes or images",
75 # # default => sub { die "bucket name required" },
78 has
'region_alias' => (
84 documentation =>
"asia, useast, uswest or eu",
89 has
'volume_path' => (
96 documentation =>
"path to the species DBs",
98 default =>
'/vols/ensembl_mysql_data',
102 enum 'CompositeGroup' => qw(core coreplusvariation corepluscompara variation compara all);
106 isa =>
'CompositeGroup',
107 traits => [
'Getopt'],
109 # cmd_aliases => "h",
110 documentation =>
"database types to copy: default=core",
116 has db_type_lookup => (
122 core => [qw(core coreexpressionatlas coreexpressionest funcgen otherfeatures rnaseq vega )],
124 [qw(core coreexpressionatlas coreexpressionest funcgen otherfeatures rnaseq vega variation)],
126 [qw(core coreexpressionatlas coreexpressionest funcgen otherfeatures rnaseq vega compara )],
128 [qw(core coreexpressionatlas coreexpressionest funcgen otherfeatures rnaseq vega compara variation )],
129 variation => [qw(variation)],
130 compara => [qw(compara)],
139 traits => [
'Getopt'],
141 # cmd_aliases => "h",
142 documentation =>
"the species to copy. default=all",
148 has
'start_from' => (
151 traits => [
'Getopt'],
153 # cmd_aliases => "h",
154 documentation =>
"the species to start dumping from",
161 traits => [
'Getopt'],
163 # cmd_aliases => "h",
164 documentation =>
"the species to stop dumping at",
167 has
'volume_dump_queue' => (
170 all_queued_volumes =>
'elements',
171 volume_add_to_queue =>
'push',
172 next_volume_from_queue =>
'shift',
173 sort_queue_by_size_desc => [ sort_in_place => ( sub { $_[1]->total_size <=> $_[0]->total_size } ) ],
175 queue_length =>
'count',
176 filter_queue =>
'grep'
178 isa =>
'ArrayRef[EnsCloud::Image::VolumeBundle::Volume]',
181 has db_species_details => (
185 isa =>
'HashRef[EnsCloud:::DatabaseDetails]',
186 default => sub { {} }
189 has this_instance_id => (
192 default => sub {
return `curl -s 169.254.169.254/latest/meta-data/instance-
id` },
197 has myd_destination_folder => (
200 default =>
'/vols/MYDCOPY',
207 default =>
'/dev/sdi',
211 has base_snapshot => (
220 default => sub {
return `curl -s 169.254.169.254/latest/meta-data/placement/availability-zone/` },
224 with
'EnsCloud::Describer';
228 $self->build_queue();
230 $self->log->fatal(
"No databases in the queue");
233 $self->sort_queue_by_size_desc;
235 foreach my $volume ( $self->all_queued_volumes ) {
236 $self->log->info(
"Doing: " . $volume->species );
238 if ( $volume->snapshot_id && $volume->status eq
'completed' ) {
240 # $self->log->info($volume->snapshot_id): $self->log->warn("No snapshot for: " . $volume->tag);
241 my $image = $self->make_image($volume);
242 push @image_list, $image;
245 $self->make_snapshot($volume);
250 my $image_count = @image_list > 0 ? scalar @image_list : 0;
251 $self->log->info(
"Finished, made ", $self->queue_length,
" Volumes" );
252 $self->log->info(
"Finished, made $image_count Images");
253 my @no_snapshots = $self->filter_queue( sub { $_->status =~ /no snapshot/ } );
254 $self->log->info( scalar @no_snapshots .
" Snapshots still waiting" );
260 my $ec2_snapshots = $self->ec2->describe_snapshots( Owner =>
'self' );
261 foreach my $ec2_snap (@$ec2_snapshots) {
262 my $description = $ec2_snap->{description} || next;
263 $description =~ s/\s+$
264 if ( my $has_snapshot = $self->find_tag( sub { $_->tag eq $description } ) ) {
265 $has_snapshot->snapshot_id( $ec2_snap->{snapshot_id} );
266 $has_snapshot->status( $ec2_snap->{status} );
274 my ( $self, $snapshot ) = @_;
276 $self->log->debug(
"MAKE IMAGE");
280 . $snapshot->ensembl_release .
" "
281 . $snapshot->species .
" ["
282 . ( join
" ",
map { $_->type } $snapshot->all_dbs ) .
"]";
283 my $name =
"Ensembl" . $snapshot->ensembl_release .
" " . $snapshot->species .
" MySQL AMI";
285 # has it already been created
286 my $existing_images = $self->ec2->describe_images( Owner =>
'self' );
287 foreach my $i (@$existing_images) {
288 if ( $i->{description} eq $tag ) {
289 $self->log->warn(
"Image " . $i->{image_id} .
" already exists for " . $tag );
293 my $create_image = $self->ec2->register_image(
296 Architecture =>
'x86_64',
297 KernelId =>
'aki-427d952b',
298 RootDeviceName =>
'/dev/sda1',
299 BlockDeviceMapping => [
301 deviceName =>
'/dev/sda1',
303 snapshotId => $self->base_snapshot,
304 deleteOnTermination =>
'true'
308 deviceName =>
'/dev/sdh',
309 ebs => { snapshotId => $snapshot->{snapshot_id}, deleteOnTermination =>
'true' }
313 if ( $create_image->can(
'errors') ) {
314 $self->log->error(
"[Error Creating Image] " . $self->pp_ec2_errors( $create_image->errors ) );
317 $self->log->info(
"Created Image $create_image for $tag");
318 return $create_image;
322 my ( $self, $bag_of_dbs ) = @_;
323 my $size_as_float = $bag_of_dbs->total_size;
324 $self->log->debug(
"Making SNAPSHOT " . $bag_of_dbs->tag .
" Current status=" . $bag_of_dbs->status );
326 # round up volumesize to nearest Gb +1
327 # rounding to nearest Gig doesn't seem to enough
328 my $round_up_size = int( $size_as_float + 2 );
331 my $volume = $self->ec2->create_volume( Size => $round_up_size, AvailabilityZone => $self->this_zone );
333 if ( $volume->can(
'errors') ) {
334 $self->log->error(
"[Error Creating Volume] " . $self->pp_ec2_errors( $volume->errors ) );
338 $self->log->info(
"Created ", $volume->volume_id,
" ", $volume->size,
" Gb" );
340 my $do_attach = $self->ec2->attach_volume(
341 VolumeId => $volume->volume_id,
342 InstanceId => $self->this_instance_id,
343 Device => $self->device
345 $self->log->info(
"Attaching " . $volume->volume_id .
" Device " . $self->device );
346 if ( $do_attach->can(
'errors') ) {
347 $self->log->error(
"[Error attaching Volume] " . $self->pp_ec2_errors( $do_attach->errors ) );
348 $self->log->info(
"Deleting " . $volume->volume_id );
349 $self->ec2->delete_volume( VolumeId => $volume->volume_id );
354 my $attach_status =
'';
355 while ( $attach_status ne
'attached' && $wait_time < 30 ) {
357 $self->log->info(
"Waiting 10 seconds for volume to become available");
361 $self->ec2->describe_volumes( VolumeId => $volume->volume_id )->[0]->attachments->[0]->{status};
362 $self->log->info(
"Volume $attach_status");
364 unless ( $attach_status eq
'attached' ) {
365 $self->log->error(
"ERROR ATTACHING VOLUME AFTER 30 seconds... DETACHING AND DELETING");
366 $self->clean_up_by_volume($volume);
368 # todo, check for errors in the delete call
369 $self->log->fatal(
"COULD NOT ATTACH VOLUME. THIS IS BAD. EXITING") && die;
374 # Make filesystem on the new volume
375 $self->log->info(
"Making filesystem on " . $self->device );
376 my $mkfs_path = can_run(
'mkfs.xfs') or $self->log->warn('mkfs.xfs not installed!');
377 my $mkfs_cmd = [ 'sudo', $mkfs_path, $self->device ];
378 unless ( $self->run_command($mkfs_cmd) ) {
379 $self->log->info(
"Cleaning up after failed mkfs");
380 $self->clean_up_by_volume($volume);
386 my $mkdir_cmd = [
'sudo',
'mkdir',
'-p', $self->myd_destination_folder ];
387 unless ( $self->run_command($mkdir_cmd) ) {
388 $self->log->info(
"Cleaning up after failed mkdir");
389 $self->clean_up_by_volume($volume);
394 # Mount new volume to it
395 $self->log->info(
"Mounting " . $self->device );
396 my $mount_path = can_run(
'mount') or $self->log->warn('mount not installed!');
397 my $mount_cmd = [ 'sudo', $mount_path, $self->device, $self->myd_destination_folder ];
398 unless ( $self->run_command($mount_cmd) ) {
399 $self->log->info(
"Cleaning up after failed mount");
400 $self->clean_up_by_volume($volume);
404 # Copy the each MYD dir
405 foreach my $myd_dir ( $bag_of_dbs->all_dbs ) {
406 my $copy_cmd = [
'sudo',
'cp',
'-r', $myd_dir->myd_path, $self->myd_destination_folder ];
407 $self->log->debug( join
" ", @$copy_cmd );
408 unless ( $self->run_command($copy_cmd) ) {
410 $self->log->info(
"Cleaning up after failed copy");
411 my $umount_path = can_run(
'umount') or $self->log->warn('umount not installed!');
412 my $umount_cmd = [ 'sudo', $umount_path, $self->device ];
413 unless ( $self->run_command($umount_cmd) ) {
414 $self->log->fatal(
"CANNOT UMOUNT " . $self->device .
"EXITING" ) && die;
416 $self->clean_up_by_volume($volume);
420 $myd_dir->is_copied(1);
422 # $snapshot_description .= $myd_dir->name . " ";
425 $self->log->info(
"umounting " . $self->device );
426 my $umount_path = can_run(
'umount') or $self->log->warn('umount not installed!');
427 my $umount_cmd = [ 'sudo', $umount_path, $self->device ];
428 return unless $self->run_command($umount_cmd);
431 $self->ec2->detach_volume( VolumeId => $volume->volume_id );
432 while ( defined eval { $self->ec2->describe_volumes( VolumeId => $volume->volume_id )->[0]->attachments }
435 $self->log->info(
"Waiting 10 seconds for volume to detach");
440 my $snapshot_description = $bag_of_dbs->{tag};
441 $self->log->info(
"Creating Snapshot");
442 my $snapshot = $self->ec2->create_snapshot( VolumeId => $volume->volume_id, Description => $snapshot_description );
444 if ( $snapshot->can(
'errors') ) {
445 $self->log->error(
"[Snapshot Creation Error] " . $self->pp_ec2_errors( $snapshot->errors ) );
449 $self->log->info(
"Created Snapshot ", $snapshot->snapshot_id,
" ", $volume->size,
" Gb" );
450 $self->log->info(
"Deleting " . $volume->volume_id );
451 $self->ec2->delete_volume( VolumeId => $volume->volume_id );
452 $self->log->info(
"Tagging the Snapshot");
453 my $tag_path = can_run(
'ec2-create-tags') or $self->log->warn('ec2-describe-tags not found');
454 my $tag_cmd = [ $tag_path, $snapshot->snapshot_id, '-t', "Name=$snapshot_description" ];
455 return unless $self->run_command($tag_cmd);
457 # $self->log->info("Detaching Temp Volume");
458 # $self->ec2->detach_volume(VolumeId => $volume->volume_id);
459 # $self->log->info("Deleting Temp Volume");
460 # $self->ec2->delete_volume(VolumeId => $volume->volume_id);
466 sub clean_up_by_volume {
467 my ( $self, $volume ) = @_;
470 $self->log->info(
"CLEANUP: Detaching " . $volume->volume_id );
472 my $do_detach = $self->ec2->detach_volume( VolumeId => $volume->volume_id );
474 if ( $do_detach->can(
'errors') ) {
475 $self->log->error(
"[Error detaching Volume] " . $self->pp_ec2_errors( $do_detach->errors ) );
477 $self->log->fatal(
"CANNOT DETACH VOLUME DURING CLEANUP. THIS IS BAD. EXITING :" . $volume->volume_id )
482 defined eval { $self->ec2->describe_volumes( VolumeId => $volume->volume_id )->[0]->attachments }
488 # $self->log->info("Volume $attach_status");
490 $self->log->info(
"Waiting 10 seconds for volume to detach");
496 $self->log->info(
"CLEANUP: Deleting " . $volume->volume_id );
497 $self->ec2->delete_volume( VolumeId => $volume->volume_id );
503 my ( $self, $error_obj ) = @_;
505 foreach my $error (@$error_obj) {
506 $full_message .= $error->message .
"\n";
509 return $full_message;
513 my ( $self, $cmd ) = @_;
515 ### in list context ###
516 my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
run( command => $cmd, verbose => 0 );
520 $self->log->info( join
" ", @$cmd,
" Successful" );
522 # return $stdout_buf;
523 # print " is what the command printed:\n";
524 my $stdout_str = join
"", @$stdout_buf;
525 return length $stdout_str > 0 ? $stdout_str : $success;
529 $self->log->error(
"Failed!!: $error_message\n" . ( join
"", @$stderr_buf ) );
539 my @du = (
"du", $self->volume_path, );
541 # Get a list of files
542 $self->log->info(
"Getting Directory Sizes");
543 my $du_path = can_run(
'du') or $self->log->warn('du not installed!');
544 my $du_command = [ 'du', $self->volume_path, '|', 'sort', '-n', '-k', '1' ];
545 my $du_output = $self->run_command($du_command);
547 # my @dir_size = qx{@du};
549 my $species_filter = $self->species eq
'all' ?
'.*' : $self->species;
550 my $start_from_filter = $self->start_from ? $self->start_from :
'.*';
551 my $dbgroup_to_dbnames = {
552 core => [qw(core coreexpressionatlas coreexpressionest funcgen otherfeatures rnaseq vega )],
554 [qw(core coreexpressionatlas coreexpressionest funcgen otherfeatures rnaseq vega variation)],
555 corepluscompara => [qw(core coreexpressionatlas coreexpressionest funcgen otherfeatures rnaseq vega compara )],
556 all => [qw(core coreexpressionatlas coreexpressionest funcgen otherfeatures rnaseq vega compara variation )],
557 variation => [qw(variation)],
558 compara => [qw(compara)],
561 my $wanted_dbs = $dbgroup_to_dbnames->{ $self->dbtypes };
564 my ( $start, $end ) = ( $self->start_from, $self->end_at );
565 map { push @lines, $_
if ( /$start/ .. /$end/ ) } split
"\n", $du_output;
566 $self->log->fatal(
"Nothing found at " . $self->volume_path ) && die
if @lines == 0;
570 foreach my $du_line (@lines) {
571 my ( $db_size, $db_path ) = split /\s+/, $du_line;
572 my ( $volume, $directories, $db_name ) = File::Spec->splitpath($db_path);
573 my ( $db_species, $db_release, $db_type );
574 if ( ( $db_species, $db_type, $db_release ) = $db_name =~ /^([a-z]+_[a-z]+)_([a-z]+)_(\d+)_\w+$/ ) {
575 next unless $db_species =~ /$species_filter/;
576 next unless grep {$_ eq $db_type} @$wanted_dbs;
577 push @{ $db_hash->{$db_species} }, [ $db_size, $db_path, $db_type, $db_release, $db_name ];
579 elsif ( $db_name =~ /ensembl_compara_(\d+)/ &&
'compara' ~~ @$wanted_dbs ) {
580 push @{ $db_hash->{$db_name} }, [ $db_size, $db_path,
'compara', $1, $db_name ];
583 foreach my $species ( keys %$db_hash ) {
585 my $dbs = $db_hash->{$species};
587 foreach my $db (@$dbs) {
589 myd_path => $db->[1],
592 size => $db->[0] / 1024 / 1024
595 $volume->add_db($db_detail);
596 $volume->add_size( $db_detail->size );
597 my $tag =
"e$db->[3] MYD $species [" . ( join
" ",
map { $_->type } $volume->all_dbs ) .
"]";
599 # my $tag = join " ", map { $_->name } $volume->all_dbs;
601 $volume->ensembl_release( $db->[3] );
603 $volume->sort_in_place_curried;
604 $self->volume_add_to_queue($volume);
609 __PACKAGE__->meta->make_immutable;