Skip to content

Commit e98d259

Browse files
authored
Merge pull request #33 from metacpan/mickey/snapshot
Added snapshot script
2 parents b52930b + 1476f81 commit e98d259

File tree

2 files changed

+301
-0
lines changed

2 files changed

+301
-0
lines changed

bin/snapshot.pl

Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
use strict;
2+
use warnings;
3+
use v5.36;
4+
5+
use Cpanel::JSON::XS qw< decode_json encode_json >;
6+
use DateTime ();
7+
use DateTime::Format::ISO8601 ();
8+
use HTTP::Tiny ();
9+
use Getopt::Long;
10+
use Sys::Hostname qw< hostname >;
11+
12+
use MetaCPAN::Logger qw< :log :dlog >;
13+
14+
use MetaCPAN::ES;
15+
use MetaCPAN::Ingest qw< are_you_sure >;
16+
17+
# setup
18+
my $hostname = hostname();
19+
my $mode = $hostname =~ /dev/ ? 'testing' : 'production';
20+
my $bucket = "mc-${mode}-backups"; # So we don't break production
21+
my $repository_name = 'our_backups';
22+
23+
#my $es = MetaCPAN::ES->new( type => "distribution" );
24+
#my $bulk = $es->bulk();
25+
26+
# args
27+
my (
28+
$date_format, $indices, $list, $purge_old, $restore,
29+
$setup, $snap, $snap_name, $snap_stub
30+
);
31+
my $host = MetaCPAN::Server::Config::config()->{elasticsearch_servers};
32+
GetOptions(
33+
"list" => \$list,
34+
"date_format=s" => \$date_format,
35+
"host=s" => \$host,
36+
"indices=s" => \$indices,
37+
"purge_old" => \$purge_old,
38+
"restore" => \$restore,
39+
"setup" => \$setup,
40+
"snap" => \$snap,
41+
"snap_name=s" => \$snap_name,
42+
"snap_stub=s" => \$snap_stub,
43+
);
44+
45+
# Note: can take wild cards https://www.elastic.co/guide/en/elasticsearch/reference/2.4/multi-index.html
46+
$indices //= '*';
47+
48+
my $config = {}; ## TODO ( use MetaCPAN::Server::Config (); ??? )
49+
50+
my $aws_key = $config->{es_aws_s3_access_key};
51+
my $aws_secret = $config->{es_aws_s3_secret};
52+
53+
my $http_client = HTTP::Tiny->new(
54+
default_headers => { 'Accept' => 'application/json' },
55+
timeout => 120, # list can be slow
56+
);
57+
58+
# run
59+
die "es_aws_s3_access_key not in config" unless $aws_key;
60+
die "es_aws_s3_secret not in config" unless $aws_secret;
61+
62+
run_list_snaps() if $list;
63+
run_setup() if $setup;
64+
run_snapshot() if $snap;
65+
run_purge_old() if $purge_old;
66+
run_restore() if $restore;
67+
68+
die "setup, restore, purge-old or snap argument required";
69+
70+
1;
71+
72+
###
73+
74+
sub run_snapshot () {
75+
$snap_stub || die 'Missing snap-stub';
76+
$date_format || die 'Missing date-format (e.g. %Y-%m-%d)';
77+
78+
my $date = DateTime->now->strftime($date_format);
79+
my $snap_name = $snap_stub . '_' . $date;
80+
81+
my $data = {
82+
"ignore_unavailable" => 0,
83+
"include_global_state" => 1,
84+
"indices" => $indices,
85+
};
86+
87+
log_debug { 'snapping: ' . $snap_name };
88+
log_debug { 'with indices: ' . $indices };
89+
90+
my $path = "${repository_name}/${snap_name}";
91+
92+
my $response = _request( 'put', $path, $data );
93+
return $response;
94+
}
95+
96+
sub run_list_snaps () {
97+
my $path = "${repository_name}/_all";
98+
my $response = _request( 'get', $path, {} );
99+
100+
my $data = eval { decode_json $response->{content} };
101+
102+
foreach my $snapshot ( @{ $data->{snapshots} || [] } ) {
103+
log_info { $snapshot->{snapshot} }
104+
Dlog_debug {$_} $snapshot;
105+
}
106+
107+
return $response;
108+
}
109+
110+
sub run_purge_old () {
111+
my $keep_all_after = DateTime->now->subtract( days => 30 );
112+
113+
# fetch the current list
114+
my $path = "${repository_name}/_all";
115+
my $response = _request( 'get', $path, {} );
116+
my $data = eval { decode_json $response->{content} };
117+
118+
my %to_delete;
119+
foreach my $snapshot ( @{ $data->{snapshots} || [] } ) {
120+
121+
my $snap_date = DateTime::Format::ISO8601->parse_datetime(
122+
$snapshot->{start_time} );
123+
my $recent_so_keep = DateTime->compare( $snap_date, $keep_all_after );
124+
125+
# keep 1st of each month
126+
next if $snap_date->day eq '1';
127+
128+
# keep anything that is recent (as per $keep_all_after)
129+
next if $recent_so_keep eq '1';
130+
131+
# we want to delete it then
132+
$to_delete{ $snapshot->{snapshot} } = 1;
133+
}
134+
135+
foreach my $snap ( sort keys %to_delete ) {
136+
my $path = "${repository_name}/${snap}";
137+
log_info {"Deleting ${path}"};
138+
my $response = _request( 'delete', $path, {} );
139+
}
140+
}
141+
142+
sub run_restore () {
143+
my $snap_name = $snap_name;
144+
145+
are_you_sure('Restoring... will NOT rename indices as ES::Model breaks');
146+
147+
# IF we were not using ES::Model!..
148+
# This is a safety feature, we can always
149+
# create aliases to point to them if required
150+
# just make sure there is enough disk space
151+
my $data = {
152+
153+
# "rename_pattern" => '(.+)',
154+
# "rename_replacement" => 'restored_$1',
155+
};
156+
157+
# We wait until it's actually done!
158+
my $path = "${repository_name}/${snap_name}/_restore";
159+
my $response = _request( 'post', $path, $data );
160+
log_info {
161+
'restoring: ' . $snap_name . ' - see /_cat/recovery for progress'
162+
}
163+
if $response;
164+
return $response;
165+
}
166+
167+
sub run_setup () {
168+
log_debug { 'setup: ' . $repository_name };
169+
170+
my $data = {
171+
"type" => "s3",
172+
"settings" => {
173+
"access_key" => $aws_key,
174+
"bucket" => $bucket,
175+
"canned_acl" => "private",
176+
"max_restore_bytes_per_sec" => '500mb',
177+
"max_snapshot_bytes_per_sec" => '500mb',
178+
"protocol" => "https",
179+
"region" => "us-east",
180+
"secret_key" => $aws_secret,
181+
"server_side_encryption" => 1,
182+
"storage_class" => "standard",
183+
}
184+
};
185+
186+
my $path = "${repository_name}";
187+
188+
my $response = _request( 'put', $path, $data );
189+
return $response;
190+
}
191+
192+
sub _request ( $method, $path, $data ) {
193+
my $url = $host . '/_snapshot/' . $path;
194+
195+
my $json = encode_json($data);
196+
197+
my $response = $http_client->$method( $url, { content => $json } );
198+
199+
if ( !$response->{success} && length $response->{content} ) {
200+
201+
log_error { 'Problem requesting ' . $url };
202+
203+
try {
204+
my $resp_json = decode_json( $response->{content} );
205+
Dlog_error {"Error response: $_"} $resp_json;
206+
}
207+
catch {
208+
log_error { 'Error msg: ' . $response->{content} }
209+
}
210+
return 0;
211+
}
212+
return $response;
213+
}
214+
215+
__END__
216+
217+
=head1 NAME
218+
219+
MetaCPAN::Script::Snapshot - Snapshot (and restore) Elasticsearch indices
220+
221+
=head1 SYNOPSIS
222+
223+
# Setup
224+
$ bin/metacpan snapshot --setup (only needed once)
225+
226+
# Snapshot all indexes daily
227+
$ bin/metacpan snapshot --snap --snap-stub full --date-format %Y-%m-%d
228+
229+
# List what has been snapshotted
230+
$ bin/metacpan snapshot --list
231+
232+
# restore (indices are renamed from `foo` to `restored_foo`)
233+
$ bin/metacpan snapshot --restore --snap-name full_2016-12-01
234+
235+
# purge anything older than 30 days and not created on the 1st of a month
236+
$ bin/metacpan snapshot --purge-old
237+
238+
Another example..
239+
240+
# Snapshot just user* indexes hourly and restore
241+
$ bin/metacpan snapshot --snap --indices 'user*' --snap-stub user --date-format '%Y-%m-%d-%H'
242+
$ bin/metacpan snapshot --restore --snap-name user_2016-12-01-12
243+
244+
Also useful:
245+
246+
See status of snapshot...
247+
248+
curl localhost:9200/_snapshot/our_backups/SNAP-NAME/_status
249+
250+
curl localhost:9200/_cat/recovery
251+
252+
Add an alias to the restored index
253+
254+
curl -X POST 'localhost:9200/_aliases' -d '
255+
{
256+
"actions" : [
257+
{ "add" : { "index" : "restored_user", "alias" : "user" } }
258+
]
259+
}'
260+
261+
=head1 DESCRIPTION
262+
263+
Tell elasticsearch to setup (only needed once), snap or
264+
restore from backups stored in AWS S3.
265+
266+
You will need to run --setup on any box you wish to restore to
267+
268+
You will need es_aws_s3_access_key and es_aws_s3_secret setup
269+
in your local metacpan_server_local.conf
270+
271+
272+
=cut

lib/MetaCPAN/Ingest.pm

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use MetaCPAN::Logger qw< :log :dlog >;
1818

1919
use Sub::Exporter -setup => {
2020
exports => [ qw<
21+
are_you_sure
2122
config
2223
author_dir
2324
cpan_dir
@@ -54,6 +55,34 @@ $config->init_logger;
5455

5556
sub config () {$config}
5657

58+
sub are_you_sure ( $msg ) {
59+
my $iconfirmed = 0;
60+
61+
if ( -t *STDOUT ) {
62+
my $answer
63+
= prompt colored( ['bold red'], "*** Warning ***: $msg" ) . "\n"
64+
. 'Are you sure you want to do this (type "YES" to confirm) ? ';
65+
if ( $answer ne 'YES' ) {
66+
log_error {"Confirmation incorrect: '$answer'"};
67+
print "Operation will be interruped!\n";
68+
69+
# System Error: 125 - ECANCELED - Operation canceled
70+
exit_code(125);
71+
}
72+
else {
73+
log_info {'Operation confirmed.'};
74+
print "alright then...\n";
75+
$iconfirmed = 1;
76+
}
77+
}
78+
else {
79+
log_info {"*** Warning ***: $msg"};
80+
$iconfirmed = 1;
81+
}
82+
83+
return $iconfirmed;
84+
}
85+
5786
sub author_dir ($pauseid) {
5887
my $dir = 'id/'
5988
. sprintf( "%s/%s/%s",

0 commit comments

Comments
 (0)