Skip to content

Commit 3f97c44

Browse files
committed
Added checksum script
1 parent dbff073 commit 3f97c44

File tree

1 file changed

+112
-0
lines changed

1 file changed

+112
-0
lines changed

bin/checksum.pl

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
use strict;
2+
use warnings;
3+
use v5.36;
4+
5+
use Getopt::Long;
6+
use Digest::file qw< digest_file_hex >;
7+
use MetaCPAN::Logger qw< :log :dlog >;
8+
9+
use MetaCPAN::ES;
10+
use MetaCPAN::Ingest qw< cpan_dir >;
11+
12+
# args
13+
my $limit = 1000;
14+
my $dry_run;
15+
GetOptions(
16+
"limit=i" => \$limit,
17+
"dry_run" => \$dry_run,
18+
);
19+
20+
# setup
21+
my $es = MetaCPAN::ES->new( type => "release" );
22+
my $bulk;
23+
$bulk = $es->bulk() unless $dry_run;
24+
25+
log_warn {"--- DRY-RUN ---"} if $dry_run;
26+
log_info {"Searching for releases missing checksums"};
27+
28+
my $scroll = $es->scroll(
29+
scroll => '10m',
30+
body => {
31+
query => {
32+
not => {
33+
exists => {
34+
field => "checksum_md5"
35+
}
36+
}
37+
}
38+
},
39+
fields => [qw< id name download_url >],
40+
);
41+
42+
log_warn { "Found " . $scroll->total . " releases" };
43+
log_warn { "Limit is " . $limit };
44+
45+
my $count = 0;
46+
47+
while ( my $p = $scroll->next ) {
48+
if ( $limit >= 0 and $count++ >= $limit ) {
49+
log_info {"Max number of changes reached."};
50+
last;
51+
}
52+
53+
log_info { "Adding checksums for " . $p->{fields}{name}[0] };
54+
55+
if ( my $download_url = $p->{fields}{download_url} ) {
56+
my $file
57+
= cpan_dir . "/authors" . $p->{fields}{download_url}[0]
58+
=~ s/^.*authors//r;
59+
my $checksum_md5 = digest_file_hex( $file, 'MD5' );
60+
my $checksum_sha256 = digest_file_hex( $file, 'SHA-256' );
61+
62+
if ($dry_run) {
63+
log_info { "--- MD5: " . $checksum_md5 };
64+
log_info { "--- SHA256: " . $checksum_sha256 };
65+
}
66+
else {
67+
$bulk->update( {
68+
id => $p->{_id},
69+
doc => {
70+
checksum_md5 => $checksum_md5,
71+
checksum_sha256 => $checksum_sha256
72+
},
73+
doc_as_upsert => 1,
74+
} );
75+
}
76+
}
77+
else {
78+
log_info { $p->{fields}{name}[0] . " is missing a download_url" };
79+
}
80+
}
81+
82+
$bulk->flush unless $dry_run;
83+
84+
log_info {'Finished adding checksums'};
85+
86+
1;
87+
88+
__END__
89+
90+
=pod
91+
92+
=head1 SYNOPSIS
93+
94+
# bin/metacpan checksum --[no-]dry_run --limit X
95+
96+
=head1 DESCRIPTION
97+
98+
Backfill checksums for releases
99+
100+
=head2 dry_run
101+
102+
Don't update - just show what would have been updated (default)
103+
104+
=head2 no-dry_run
105+
106+
Update records
107+
108+
=head2 limit
109+
110+
Max number of records to update. default=1000, for unlimited set to -1
111+
112+
=cut

0 commit comments

Comments
 (0)