|
| 1 | +use strict; |
| 2 | +use warnings; |
| 3 | +use v5.36; |
| 4 | + |
| 5 | +use Cpanel::JSON::XS qw< decode_json encode_json >; |
| 6 | +use DateTime (); |
| 7 | +use DateTime::Format::ISO8601 (); |
| 8 | +use HTTP::Tiny (); |
| 9 | +use Getopt::Long; |
| 10 | +use Sys::Hostname qw< hostname >; |
| 11 | + |
| 12 | +use MetaCPAN::Logger qw< :log :dlog >; |
| 13 | + |
| 14 | +use MetaCPAN::ES; |
| 15 | +use MetaCPAN::Ingest qw< are_you_sure >; |
| 16 | + |
| 17 | +# setup |
| 18 | +my $hostname = hostname(); |
| 19 | +my $mode = $hostname =~ /dev/ ? 'testing' : 'production'; |
| 20 | +my $bucket = "mc-${mode}-backups"; # So we don't break production |
| 21 | +my $repository_name = 'our_backups'; |
| 22 | + |
| 23 | +#my $es = MetaCPAN::ES->new( type => "distribution" ); |
| 24 | +#my $bulk = $es->bulk(); |
| 25 | + |
| 26 | +# args |
| 27 | +my ( |
| 28 | + $date_format, $indices, $list, $purge_old, $restore, |
| 29 | + $setup, $snap, $snap_name, $snap_stub |
| 30 | +); |
| 31 | +my $host = MetaCPAN::Server::Config::config()->{elasticsearch_servers}; |
| 32 | +GetOptions( |
| 33 | + "list" => \$list, |
| 34 | + "date_format=s" => \$date_format, |
| 35 | + "host=s" => \$host, |
| 36 | + "indices=s" => \$indices, |
| 37 | + "purge_old" => \$purge_old, |
| 38 | + "restore" => \$restore, |
| 39 | + "setup" => \$setup, |
| 40 | + "snap" => \$snap, |
| 41 | + "snap_name=s" => \$snap_name, |
| 42 | + "snap_stub=s" => \$snap_stub, |
| 43 | +); |
| 44 | + |
| 45 | +# Note: can take wild cards https://www.elastic.co/guide/en/elasticsearch/reference/2.4/multi-index.html |
| 46 | +$indices //= '*'; |
| 47 | + |
| 48 | +my $config = {}; ## TODO ( use MetaCPAN::Server::Config (); ??? ) |
| 49 | + |
| 50 | +my $aws_key = $config->{es_aws_s3_access_key}; |
| 51 | +my $aws_secret = $config->{es_aws_s3_secret}; |
| 52 | + |
| 53 | +my $http_client = HTTP::Tiny->new( |
| 54 | + default_headers => { 'Accept' => 'application/json' }, |
| 55 | + timeout => 120, # list can be slow |
| 56 | +); |
| 57 | + |
| 58 | +# run |
| 59 | +die "es_aws_s3_access_key not in config" unless $aws_key; |
| 60 | +die "es_aws_s3_secret not in config" unless $aws_secret; |
| 61 | + |
| 62 | +run_list_snaps() if $list; |
| 63 | +run_setup() if $setup; |
| 64 | +run_snapshot() if $snap; |
| 65 | +run_purge_old() if $purge_old; |
| 66 | +run_restore() if $restore; |
| 67 | + |
| 68 | +die "setup, restore, purge-old or snap argument required"; |
| 69 | + |
| 70 | +1; |
| 71 | + |
| 72 | +### |
| 73 | + |
| 74 | +sub run_snapshot () { |
| 75 | + $snap_stub || die 'Missing snap-stub'; |
| 76 | + $date_format || die 'Missing date-format (e.g. %Y-%m-%d)'; |
| 77 | + |
| 78 | + my $date = DateTime->now->strftime($date_format); |
| 79 | + my $snap_name = $snap_stub . '_' . $date; |
| 80 | + |
| 81 | + my $data = { |
| 82 | + "ignore_unavailable" => 0, |
| 83 | + "include_global_state" => 1, |
| 84 | + "indices" => $indices, |
| 85 | + }; |
| 86 | + |
| 87 | + log_debug { 'snapping: ' . $snap_name }; |
| 88 | + log_debug { 'with indices: ' . $indices }; |
| 89 | + |
| 90 | + my $path = "${repository_name}/${snap_name}"; |
| 91 | + |
| 92 | + my $response = _request( 'put', $path, $data ); |
| 93 | + return $response; |
| 94 | +} |
| 95 | + |
| 96 | +sub run_list_snaps () { |
| 97 | + my $path = "${repository_name}/_all"; |
| 98 | + my $response = _request( 'get', $path, {} ); |
| 99 | + |
| 100 | + my $data = eval { decode_json $response->{content} }; |
| 101 | + |
| 102 | + foreach my $snapshot ( @{ $data->{snapshots} || [] } ) { |
| 103 | + log_info { $snapshot->{snapshot} } |
| 104 | + Dlog_debug {$_} $snapshot; |
| 105 | + } |
| 106 | + |
| 107 | + return $response; |
| 108 | +} |
| 109 | + |
| 110 | +sub run_purge_old () { |
| 111 | + my $keep_all_after = DateTime->now->subtract( days => 30 ); |
| 112 | + |
| 113 | + # fetch the current list |
| 114 | + my $path = "${repository_name}/_all"; |
| 115 | + my $response = _request( 'get', $path, {} ); |
| 116 | + my $data = eval { decode_json $response->{content} }; |
| 117 | + |
| 118 | + my %to_delete; |
| 119 | + foreach my $snapshot ( @{ $data->{snapshots} || [] } ) { |
| 120 | + |
| 121 | + my $snap_date = DateTime::Format::ISO8601->parse_datetime( |
| 122 | + $snapshot->{start_time} ); |
| 123 | + my $recent_so_keep = DateTime->compare( $snap_date, $keep_all_after ); |
| 124 | + |
| 125 | + # keep 1st of each month |
| 126 | + next if $snap_date->day eq '1'; |
| 127 | + |
| 128 | + # keep anything that is recent (as per $keep_all_after) |
| 129 | + next if $recent_so_keep eq '1'; |
| 130 | + |
| 131 | + # we want to delete it then |
| 132 | + $to_delete{ $snapshot->{snapshot} } = 1; |
| 133 | + } |
| 134 | + |
| 135 | + foreach my $snap ( sort keys %to_delete ) { |
| 136 | + my $path = "${repository_name}/${snap}"; |
| 137 | + log_info {"Deleting ${path}"}; |
| 138 | + my $response = _request( 'delete', $path, {} ); |
| 139 | + } |
| 140 | +} |
| 141 | + |
| 142 | +sub run_restore () { |
| 143 | + my $snap_name = $snap_name; |
| 144 | + |
| 145 | + are_you_sure('Restoring... will NOT rename indices as ES::Model breaks'); |
| 146 | + |
| 147 | + # IF we were not using ES::Model!.. |
| 148 | + # This is a safety feature, we can always |
| 149 | + # create aliases to point to them if required |
| 150 | + # just make sure there is enough disk space |
| 151 | + my $data = { |
| 152 | + |
| 153 | + # "rename_pattern" => '(.+)', |
| 154 | + # "rename_replacement" => 'restored_$1', |
| 155 | + }; |
| 156 | + |
| 157 | + # We wait until it's actually done! |
| 158 | + my $path = "${repository_name}/${snap_name}/_restore"; |
| 159 | + my $response = _request( 'post', $path, $data ); |
| 160 | + log_info { |
| 161 | + 'restoring: ' . $snap_name . ' - see /_cat/recovery for progress' |
| 162 | + } |
| 163 | + if $response; |
| 164 | + return $response; |
| 165 | +} |
| 166 | + |
| 167 | +sub run_setup () { |
| 168 | + log_debug { 'setup: ' . $repository_name }; |
| 169 | + |
| 170 | + my $data = { |
| 171 | + "type" => "s3", |
| 172 | + "settings" => { |
| 173 | + "access_key" => $aws_key, |
| 174 | + "bucket" => $bucket, |
| 175 | + "canned_acl" => "private", |
| 176 | + "max_restore_bytes_per_sec" => '500mb', |
| 177 | + "max_snapshot_bytes_per_sec" => '500mb', |
| 178 | + "protocol" => "https", |
| 179 | + "region" => "us-east", |
| 180 | + "secret_key" => $aws_secret, |
| 181 | + "server_side_encryption" => 1, |
| 182 | + "storage_class" => "standard", |
| 183 | + } |
| 184 | + }; |
| 185 | + |
| 186 | + my $path = "${repository_name}"; |
| 187 | + |
| 188 | + my $response = _request( 'put', $path, $data ); |
| 189 | + return $response; |
| 190 | +} |
| 191 | + |
| 192 | +sub _request ( $method, $path, $data ) { |
| 193 | + my $url = $host . '/_snapshot/' . $path; |
| 194 | + |
| 195 | + my $json = encode_json($data); |
| 196 | + |
| 197 | + my $response = $http_client->$method( $url, { content => $json } ); |
| 198 | + |
| 199 | + if ( !$response->{success} && length $response->{content} ) { |
| 200 | + |
| 201 | + log_error { 'Problem requesting ' . $url }; |
| 202 | + |
| 203 | + try { |
| 204 | + my $resp_json = decode_json( $response->{content} ); |
| 205 | + Dlog_error {"Error response: $_"} $resp_json; |
| 206 | + } |
| 207 | + catch { |
| 208 | + log_error { 'Error msg: ' . $response->{content} } |
| 209 | + } |
| 210 | + return 0; |
| 211 | + } |
| 212 | + return $response; |
| 213 | +} |
| 214 | + |
| 215 | +__END__ |
| 216 | +
|
| 217 | +=head1 NAME |
| 218 | +
|
| 219 | +MetaCPAN::Script::Snapshot - Snapshot (and restore) Elasticsearch indices |
| 220 | +
|
| 221 | +=head1 SYNOPSIS |
| 222 | +
|
| 223 | +# Setup |
| 224 | + $ bin/metacpan snapshot --setup (only needed once) |
| 225 | +
|
| 226 | +# Snapshot all indexes daily |
| 227 | + $ bin/metacpan snapshot --snap --snap-stub full --date-format %Y-%m-%d |
| 228 | +
|
| 229 | +# List what has been snapshotted |
| 230 | + $ bin/metacpan snapshot --list |
| 231 | +
|
| 232 | +# restore (indices are renamed from `foo` to `restored_foo`) |
| 233 | + $ bin/metacpan snapshot --restore --snap-name full_2016-12-01 |
| 234 | +
|
| 235 | +# purge anything older than 30 days and not created on the 1st of a month |
| 236 | + $ bin/metacpan snapshot --purge-old |
| 237 | +
|
| 238 | +Another example.. |
| 239 | +
|
| 240 | +# Snapshot just user* indexes hourly and restore |
| 241 | + $ bin/metacpan snapshot --snap --indices 'user*' --snap-stub user --date-format '%Y-%m-%d-%H' |
| 242 | + $ bin/metacpan snapshot --restore --snap-name user_2016-12-01-12 |
| 243 | +
|
| 244 | +Also useful: |
| 245 | +
|
| 246 | +See status of snapshot... |
| 247 | +
|
| 248 | + curl localhost:9200/_snapshot/our_backups/SNAP-NAME/_status |
| 249 | +
|
| 250 | + curl localhost:9200/_cat/recovery |
| 251 | +
|
| 252 | +Add an alias to the restored index |
| 253 | +
|
| 254 | + curl -X POST 'localhost:9200/_aliases' -d ' |
| 255 | + { |
| 256 | + "actions" : [ |
| 257 | + { "add" : { "index" : "restored_user", "alias" : "user" } } |
| 258 | + ] |
| 259 | + }' |
| 260 | +
|
| 261 | +=head1 DESCRIPTION |
| 262 | +
|
| 263 | +Tell elasticsearch to setup (only needed once), snap or |
| 264 | +restore from backups stored in AWS S3. |
| 265 | +
|
| 266 | +You will need to run --setup on any box you wish to restore to |
| 267 | +
|
| 268 | +You will need es_aws_s3_access_key and es_aws_s3_secret setup |
| 269 | +in your local metacpan_server_local.conf |
| 270 | +
|
| 271 | +
|
| 272 | +=cut |
0 commit comments