Skip to content

More ES syntax updates #62

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 16, 2024
32 changes: 19 additions & 13 deletions bin/backpan.pl
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,17 @@ ()
log_info {"find_releases"};

my $scroll = $es_release->scroll(
fields => [qw< author archive name >],
body => get_release_query(),
body => {
%{ get_release_query() },
size => 500,
_source => [qw< author archive name >],
},
);

while ( my $release = $scroll->next ) {
my $author = $release->{fields}{author}[0];
my $archive = $release->{fields}{archive}[0];
my $name = $release->{fields}{name}[0];
my $author = $release->{_source}{author};
my $archive = $release->{_source}{archive};
my $name = $release->{_source}{name};
next unless $name; # bypass some broken releases

$release_status{$author}{$name} = [
Expand All @@ -64,8 +67,10 @@ ()
unless ($undo) {
return +{
query => {
not => { term => { status => 'backpan' } }
}
bool => {
must_not => [ { term => { status => 'backpan' } }, ],
},
},
};
}

Expand Down Expand Up @@ -118,23 +123,24 @@ ( $author, $author_releases )

my $scroll_file = $es_file->scroll(
scroll => '5m',
fields => [qw< release >],
body => {
query => {
bool => {
must => [
{ term => { author => $author } },
{ terms => { release => $author_releases } }
]
}
}
{ terms => { release => $author_releases } },
],
},
},
size => 500,
_source => [qw< release >],
},
);

$bulk{file} ||= $es_file->bulk( timeout => '5m' );

while ( my $file = $scroll_file->next ) {
my $release = $file->{fields}{release}[0];
my $release = $file->{_source}{release};
$bulk{file}->update( {
id => $file->{_id},
doc => {
Expand Down
11 changes: 7 additions & 4 deletions bin/backup.pl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
use Try::Tiny qw< catch try >;

use MetaCPAN::ES;
use MetaCPAN::Ingest qw< home >;
use MetaCPAN::Ingest qw< home true >;

# config

Expand Down Expand Up @@ -82,7 +82,7 @@ ()
$bulk_store{$key} ||= $es->bulk( max_count => $batch_size );
my $bulk = $bulk_store{$key};

my $parent = $raw->{fields}{_parent};
my $parent = $raw->{_parent};

if ( $raw->{_type} eq 'author' ) {

Expand Down Expand Up @@ -169,9 +169,12 @@ sub run_backup {
( $type ? ( type => $type ) : () )
);
my $scroll = $es->scroll(
size => $size,
fields => [qw< _parent _source >],
scroll => '1m',
body => {
_source => true,
size => $size,
sort => '_doc',
},
);

log_info { 'Backing up ', $scroll->total, ' documents' };
Expand Down
66 changes: 41 additions & 25 deletions bin/check.pl
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,7 @@

# look up this module in ElasticSearch and see what we have on it
my $results = $es_file->search(
size => 100, # shouldn't get more than this
fields => [
qw< name release author distribution version authorized indexed maturity date >
],
query => {
query => {
bool => {
must => [
{ term => { 'module.name' => $pkg } },
Expand All @@ -51,22 +47,38 @@
],
},
},
size => 100, # shouldn't get more than this
_source => [ qw<
name
release
author
distribution
version
authorized
indexed
maturity
date
> ],

);
my @files = @{ $results->{hits}{hits} };

# now find the first latest releases for these files
foreach my $file (@files) {
my $release_results = $es_release->search(
size => 1,
fields => [qw< name status authorized version id date >],
query => {
query => {
bool => {
must => [
{ term => { name => $file->{fields}{release} } },
{
term =>
{ name => $file->{_source}{release} }
},
{ term => { status => 'latest' } },
],
},
},
size => 1,
_source => [qw< name status authorized version id date >],
);

push @releases, $release_results->{hits}{hits}[0]
Expand All @@ -78,16 +90,20 @@
if ( !@releases ) {
foreach my $file (@files) {
my $release_results = $es_release->search(
size => 1,
fields =>
[qw< name status authorized version id date >],
query => {
query => {
bool => {
must => [
{ term => { name => $file->{fields}{release} } },
{
term => {
name => $file->{_source}{release}
}
},
],
},
},
size => 1,
_source =>
[qw< name status authorized version id date >],
);

push @releases, @{ $release_results->{hits}{hits} };
Expand All @@ -97,22 +113,22 @@
# if we found the releases tell them about it
if (@releases) {
if ( @releases == 1
and $releases[0]->{fields}{status} eq 'latest' )
and $releases[0]->{_source}{status} eq 'latest' )
{
log_info {
"Found latest release $releases[0]->{fields}{name} for $pkg"
"Found latest release $releases[0]->{_source}{name} for $pkg"
}
unless $errors_only;
}
else {
log_error {"Could not find latest release for $pkg"};
foreach my $rel (@releases) {
log_warn {" Found release $rel->{fields}{name}"};
log_warn {" STATUS : $rel->{fields}{status}"};
log_warn {" Found release $rel->{_source}{name}"};
log_warn {" STATUS : $rel->{_source}{status}"};
log_warn {
" AUTORIZED : $rel->{fields}{authorized}"
" AUTORIZED : $rel->{_source}{authorized}"
};
log_warn {" DATE : $rel->{fields}{date}"};
log_warn {" DATE : $rel->{_source}{date}"};
}

$error_count++;
Expand All @@ -123,13 +139,13 @@
"Module $pkg doesn't have any releases in ElasticSearch!"
};
foreach my $file (@files) {
log_warn {" Found file $file->{fields}{name}"};
log_warn {" RELEASE : $file->{fields}{release}"};
log_warn {" AUTHOR : $file->{fields}{author}"};
log_warn {" Found file $file->{_source}{name}"};
log_warn {" RELEASE : $file->{_source}{release}"};
log_warn {" AUTHOR : $file->{_source}{author}"};
log_warn {
" AUTHORIZED : $file->{fields}{authorized}"
" AUTHORIZED : $file->{_source}{authorized}"
};
log_warn {" DATE : $file->{fields}{date}"};
log_warn {" DATE : $file->{_source}{date}"};
}
$error_count++;
}
Expand Down
16 changes: 8 additions & 8 deletions bin/checksum.pl
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@
not => {
exists => {
field => "checksum_md5"
}
}
}
},
},
},
_source => [qw< id name download_url >],
},
fields => [qw< id name download_url >],
);

log_warn { "Found " . $scroll->total . " releases" };
Expand All @@ -50,11 +50,11 @@
last;
}

log_info { "Adding checksums for " . $p->{fields}{name}[0] };
log_info { "Adding checksums for " . $p->{_source}{name} };

if ( my $download_url = $p->{fields}{download_url} ) {
if ( my $download_url = $p->{_source}{download_url} ) {
my $file
= cpan_dir . "/authors" . $p->{fields}{download_url}[0]
= cpan_dir . "/authors" . $p->{_source}{download_url}
=~ s/^.*authors//r;
my $checksum_md5 = digest_file_hex( $file, 'MD5' );
my $checksum_sha256 = digest_file_hex( $file, 'SHA-256' );
Expand All @@ -75,7 +75,7 @@
}
}
else {
log_info { $p->{fields}{name}[0] . " is missing a download_url" };
log_info { $p->{_source}{name} . " is missing a download_url" };
}
}

Expand Down
31 changes: 14 additions & 17 deletions bin/cve.pl
Original file line number Diff line number Diff line change
Expand Up @@ -134,32 +134,29 @@

if (@filters) {
my $query = {
query => {
bool => {
must => [
{ term => { distribution => $dist } }, @filters,
]
}
},
bool => {
must =>
[ { term => { distribution => $dist } }, @filters, ]
}
};

my $releases = $es->search(
index => 'cpan',
type => 'release',
body => $query,
fields => [ "version", "name", "author", ],
size => 2000,
index => 'cpan',
type => 'release',
body => {
query => $query,
_source => [qw< version name author >],
size => 2000,
},
);

if ( $releases->{hits}{total} ) {
## no critic (ControlStructures::ProhibitMutatingListFunctions)
@matches = map { $_->[0] }
sort { $a->[1] <=> $b->[1] }
map {
my %fields = %{ $_->{fields} };
ref $_ and $_ = $_->[0] for values %fields;
[ \%fields, numify_version( $fields{version} ) ];
} @{ $releases->{hits}{hits} };
map { [ $_->{_source},
numify_version( $_->{_source}{version} ) ] }
@{ $releases->{hits}{hits} };
}
else {
log_debug {
Expand Down
Loading
Loading