|
| 1 | +use strict; |
| 2 | +use warnings; |
| 3 | +use v5.36; |
| 4 | + |
| 5 | +use Getopt::Long; |
| 6 | +use Digest::file qw< digest_file_hex >; |
| 7 | +use MetaCPAN::Logger qw< :log :dlog >; |
| 8 | + |
| 9 | +use MetaCPAN::ES; |
| 10 | +use MetaCPAN::Ingest qw< cpan_dir >; |
| 11 | + |
| 12 | +# args |
| 13 | +my $limit = 1000; |
| 14 | +my $dry_run; |
| 15 | +GetOptions( |
| 16 | + "limit=i" => \$limit, |
| 17 | + "dry_run" => \$dry_run, |
| 18 | +); |
| 19 | + |
| 20 | +# setup |
| 21 | +my $es = MetaCPAN::ES->new( type => "release" ); |
| 22 | +my $bulk; |
| 23 | +$bulk = $es->bulk() unless $dry_run; |
| 24 | + |
| 25 | +log_warn {"--- DRY-RUN ---"} if $dry_run; |
| 26 | +log_info {"Searching for releases missing checksums"}; |
| 27 | + |
| 28 | +my $scroll = $es->scroll( |
| 29 | + scroll => '10m', |
| 30 | + body => { |
| 31 | + query => { |
| 32 | + not => { |
| 33 | + exists => { |
| 34 | + field => "checksum_md5" |
| 35 | + } |
| 36 | + } |
| 37 | + } |
| 38 | + }, |
| 39 | + fields => [qw< id name download_url >], |
| 40 | +); |
| 41 | + |
| 42 | +log_warn { "Found " . $scroll->total . " releases" }; |
| 43 | +log_warn { "Limit is " . $limit }; |
| 44 | + |
| 45 | +my $count = 0; |
| 46 | + |
| 47 | +while ( my $p = $scroll->next ) { |
| 48 | + if ( $limit >= 0 and $count++ >= $limit ) { |
| 49 | + log_info {"Max number of changes reached."}; |
| 50 | + last; |
| 51 | + } |
| 52 | + |
| 53 | + log_info { "Adding checksums for " . $p->{fields}{name}[0] }; |
| 54 | + |
| 55 | + if ( my $download_url = $p->{fields}{download_url} ) { |
| 56 | + my $file |
| 57 | + = cpan_dir . "/authors" . $p->{fields}{download_url}[0] |
| 58 | + =~ s/^.*authors//r; |
| 59 | + my $checksum_md5 = digest_file_hex( $file, 'MD5' ); |
| 60 | + my $checksum_sha256 = digest_file_hex( $file, 'SHA-256' ); |
| 61 | + |
| 62 | + if ($dry_run) { |
| 63 | + log_info { "--- MD5: " . $checksum_md5 }; |
| 64 | + log_info { "--- SHA256: " . $checksum_sha256 }; |
| 65 | + } |
| 66 | + else { |
| 67 | + $bulk->update( { |
| 68 | + id => $p->{_id}, |
| 69 | + doc => { |
| 70 | + checksum_md5 => $checksum_md5, |
| 71 | + checksum_sha256 => $checksum_sha256 |
| 72 | + }, |
| 73 | + doc_as_upsert => 1, |
| 74 | + } ); |
| 75 | + } |
| 76 | + } |
| 77 | + else { |
| 78 | + log_info { $p->{fields}{name}[0] . " is missing a download_url" }; |
| 79 | + } |
| 80 | +} |
| 81 | + |
| 82 | +$bulk->flush unless $dry_run; |
| 83 | + |
| 84 | +log_info {'Finished adding checksums'}; |
| 85 | + |
| 86 | +1; |
| 87 | + |
| 88 | +__END__ |
| 89 | +
|
| 90 | +=pod |
| 91 | +
|
| 92 | +=head1 SYNOPSIS |
| 93 | +
|
| 94 | + # bin/metacpan checksum --[no-]dry_run --limit X |
| 95 | +
|
| 96 | +=head1 DESCRIPTION |
| 97 | +
|
| 98 | +Backfill checksums for releases |
| 99 | +
|
| 100 | +=head2 dry_run |
| 101 | +
|
| 102 | +Don't update - just show what would have been updated (default) |
| 103 | +
|
| 104 | +=head2 no-dry_run |
| 105 | +
|
| 106 | +Update records |
| 107 | +
|
| 108 | +=head2 limit |
| 109 | +
|
| 110 | +Max number of records to update. default=1000, for unlimited set to -1 |
| 111 | +
|
| 112 | +=cut |
0 commit comments