From 7d6697ce2275ae2187b3a7a1fe048f873ffc8db0 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Mon, 3 Feb 2025 18:57:06 +0100 Subject: [PATCH] rewrite mirror script to avoid ESXM The mirror network is gone, but we still index the list of the one mirror. While we could consider removing this index in the future, for now just fix the script to use Elasticsearch directly rather than ElasticSearchX::Model. --- lib/MetaCPAN/Script/Mirrors.pm | 62 +++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/lib/MetaCPAN/Script/Mirrors.pm b/lib/MetaCPAN/Script/Mirrors.pm index 3aa80b530..c6eab75cd 100644 --- a/lib/MetaCPAN/Script/Mirrors.pm +++ b/lib/MetaCPAN/Script/Mirrors.pm @@ -6,6 +6,8 @@ use warnings; use Cpanel::JSON::XS (); use Log::Contextual qw( :log :dlog ); use Moose; +use MetaCPAN::ESConfig qw( es_doc_path ); +use MetaCPAN::Util qw( true false ); with 'MetaCPAN::Role::Script', 'MooseX::Getopt'; @@ -19,26 +21,60 @@ sub index_mirrors { my $self = shift; log_info { 'Getting mirrors.json file from ' . $self->cpan }; - my $json = $self->cpan->child( 'indices', 'mirrors.json' )->slurp; - my $type = $self->model->doc('mirror'); - - # Clear out everything in the index - # so don't end up with old mirrors - $type->delete; + my $es = $self->es; + my $json = $self->cpan->child( 'indices', 'mirrors.json' )->slurp; my $mirrors = Cpanel::JSON::XS::decode_json($json); - foreach my $mirror (@$mirrors) { - $mirror->{location} - = { lon => $mirror->{longitude}, lat => $mirror->{latitude} }; + my %mirrors = map +( $_->{name} => $_ ), @$mirrors; + + my $need_purge; + + my $scroll = $es->scroll_helper( es_doc_path('mirror'), size => 500, ); + my $bulk = $es->bulk_helper( + es_doc_path('mirror'), + on_success => sub { + my ( $method, $res ) = @_; + if ( $method eq 'update' ) { + + # result is not supported until 5, but this will work when we + # update + if ( exists $res->{result} ) { + return + if $res->{result} eq 'noop'; + } + } + $need_purge++; + }, + ); + while ( my $doc = $scroll->next ) { + if ( !$mirrors{ $doc->{_id} } ) { + Dlog_trace {"Deleting $doc->{_id}"}; + $bulk->delete_ids( $doc->{_id} ); + } + } + + for my $mirror (@$mirrors) { + my $data = {%$mirror}; + delete $data->{$_} for grep !defined $data->{$_}, keys %$data; + $data->{location} = { + lon => delete $mirror->{longitude}, + lat => delete $mirror->{latitude}, + }; + Dlog_trace {"Indexing $_"} $mirror; - $type->put( { - map { $_ => $mirror->{$_} } - grep { defined $mirror->{$_} } keys %$mirror + $bulk->update( { + id => $mirror->{name}, + doc => $data, + doc_as_upsert => true, } ); } + + $bulk->flush; + log_info {'done'}; - $self->cdn_purge_now( { keys => ['MIRRORS'], } ); + $self->cdn_purge_now( { keys => ['MIRRORS'] } ) + if $need_purge; }