Skip to content

Commit 04eff87

Browse files
committed
Added check script
1 parent 7564c06 commit 04eff87

File tree

1 file changed

+179
-0
lines changed

1 file changed

+179
-0
lines changed

bin/check.pl

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
use strict;
2+
use warnings;
3+
use v5.36;
4+
5+
use Getopt::Long;
6+
use MetaCPAN::Logger qw< :log :dlog >;
7+
8+
use MetaCPAN::ES;
9+
use MetaCPAN::Ingest qw< read_02packages_fh >;
10+
11+
# args
12+
my ( $module, $max_errors, $errors_only );
13+
14+
GetOptions(
15+
"module=s" => \$module,
16+
"max_errors" => \$max_errors,
17+
"errors_only" => \$errors_only,
18+
);
19+
20+
# setup
21+
my $error_count = 0;
22+
my $packages_fh = read_02packages_fh();
23+
my $es_file = MetaCPAN::ES->new( type => "file" );
24+
my $es_release = MetaCPAN::ES->new( type => "release" );
25+
26+
my $modules_start = 0;
27+
while ( my $line = <$packages_fh> ) {
28+
last if $max_errors && $error_count >= $max_errors;
29+
chomp($line);
30+
31+
if ($modules_start) {
32+
my ( $pkg, $ver, $dist ) = split( /\s+/, $line );
33+
my @releases;
34+
35+
# we only care about packages if we aren't searching for a
36+
# particular module or if it matches
37+
if ( !$module || $pkg eq $module ) {
38+
39+
# look up this module in ElasticSearch and see what we have on it
40+
my $results = $es_file->search(
41+
size => 100, # shouldn't get more than this
42+
fields => [
43+
qw< name release author distribution version authorized indexed maturity date >
44+
],
45+
query => { match_all => {} },
46+
filter => {
47+
and => [
48+
{ term => { 'module.name' => $pkg } },
49+
{ term => { authorized => 'true' } },
50+
{ term => { maturity => 'released' } },
51+
],
52+
},
53+
);
54+
my @files = @{ $results->{hits}{hits} };
55+
56+
# now find the first latest releases for these files
57+
foreach my $file (@files) {
58+
my $release_results = $es_release->search(
59+
size => 1,
60+
fields => [qw< name status authorized version id date >],
61+
query => { match_all => {} },
62+
filter => {
63+
and => [
64+
{ term => { name => $file->{fields}{release} } },
65+
{ term => { status => 'latest' } },
66+
],
67+
},
68+
);
69+
70+
push @releases, $release_results->{hits}{hits}[0]
71+
if $release_results->{hits}{hits}[0];
72+
}
73+
74+
# if we didn't find the latest release, then look at all of the
75+
# releases so we can find out what might be wrong
76+
if ( !@releases ) {
77+
foreach my $file (@files) {
78+
my $release_results = $es_release->search(
79+
size => 1,
80+
fields =>
81+
[qw< name status authorized version id date >],
82+
query => { match_all => {} },
83+
filter => {
84+
and => [
85+
{
86+
term =>
87+
{ name => $file->{fields}{release} }
88+
}
89+
]
90+
},
91+
);
92+
93+
push @releases, @{ $release_results->{hits}{hits} };
94+
}
95+
}
96+
97+
# if we found the releases tell them about it
98+
if (@releases) {
99+
if ( @releases == 1
100+
and $releases[0]->{fields}{status} eq 'latest' )
101+
{
102+
log_info {
103+
"Found latest release $releases[0]->{fields}{name} for $pkg"
104+
}
105+
unless $errors_only;
106+
}
107+
else {
108+
log_error {"Could not find latest release for $pkg"};
109+
foreach my $rel (@releases) {
110+
log_warn {" Found release $rel->{fields}{name}"};
111+
log_warn {" STATUS : $rel->{fields}->{status}"};
112+
log_warn {
113+
" AUTORIZED : $rel->{fields}->{authorized}"
114+
};
115+
log_warn {" DATE : $rel->{fields}->{date}"};
116+
}
117+
118+
$error_count++;
119+
}
120+
}
121+
elsif (@files) {
122+
log_error {
123+
"Module $pkg doesn't have any releases in ElasticSearch!"
124+
};
125+
foreach my $file (@files) {
126+
log_warn {" Found file $file->{fields}->{name}"};
127+
log_warn {" RELEASE : $file->{fields}->{release}"};
128+
log_warn {" AUTHOR : $file->{fields}->{author}"};
129+
log_warn {
130+
" AUTHORIZED : $file->{fields}->{authorized}"
131+
};
132+
log_warn {" DATE : $file->{fields}->{date}"};
133+
}
134+
$error_count++;
135+
}
136+
else {
137+
log_error {
138+
"Module $pkg [$dist] doesn't not appear in ElasticSearch!"
139+
};
140+
$error_count++;
141+
}
142+
last if $module;
143+
}
144+
}
145+
elsif ( !length $line ) {
146+
$modules_start = 1;
147+
}
148+
}
149+
150+
log_info {"done"};
151+
152+
1;
153+
__END__
154+
155+
=pod
156+
157+
=head1 SYNOPSIS
158+
159+
Performs checks on the MetaCPAN data store to make sure an
160+
author/module/distribution has been indexed correctly and has the
161+
appropriate information.
162+
163+
=head2 check_modules
164+
165+
Checks all of the modules in CPAN against the information in ElasticSearch.
166+
If is C<module> attribute exists, it will just look at packages that match
167+
that module name.
168+
169+
=head1 TODO
170+
171+
=over
172+
173+
=item * Add support for checking authors
174+
175+
=item * Add support for checking releases
176+
177+
=back
178+
179+
=cut

0 commit comments

Comments
 (0)