Skip to content

Commit 8bded68

Browse files
committed
Added backpan script
1 parent d94425b commit 8bded68

File tree

1 file changed

+168
-0
lines changed

1 file changed

+168
-0
lines changed

bin/backpan.pl

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
use strict;
2+
use warnings;
3+
use v5.36;
4+
5+
use Getopt::Long;
6+
use MetaCPAN::Logger qw< :log :dlog >;
7+
8+
use MetaCPAN::ES;
9+
use MetaCPAN::Ingest qw< cpan_file_map >;
10+
11+
# args
12+
my ( $distribution, $files_only, $undo );
13+
GetOptions(
14+
"distribution=s" => \$distribution,
15+
"files_only" => \$files_only,
16+
"undo" => \$undo,
17+
);
18+
19+
# setup
20+
my $cpan_file_map = cpan_file_map();
21+
my $es_release = MetaCPAN::ES->new( type => "release" );
22+
my $es_file = MetaCPAN::ES->new( type => "file" );
23+
24+
my %bulk;
25+
my %release_status;
26+
27+
build_release_status_map();
28+
update_releases() unless $files_only;
29+
update_files();
30+
31+
$_->flush for values %bulk;
32+
33+
log_info {"done"};
34+
35+
###
36+
37+
sub build_release_status_map () {
38+
log_info {"find_releases"};
39+
40+
my $scroll = $es_release->scroll(
41+
fields => [qw< author archive name >],
42+
body => get_release_query(),
43+
);
44+
45+
while ( my $release = $scroll->next ) {
46+
my $author = $release->{fields}{author}[0];
47+
my $archive = $release->{fields}{archive}[0];
48+
my $name = $release->{fields}{name}[0];
49+
next unless $name; # bypass some broken releases
50+
51+
$release_status{$author}{$name} = [
52+
(
53+
$undo
54+
or exists $cpan_file_map->{$author}{$archive}
55+
)
56+
? 'cpan'
57+
: 'backpan',
58+
$release->{_id}
59+
];
60+
}
61+
}
62+
63+
sub get_release_query () {
64+
unless ($undo) {
65+
return +{
66+
query => {
67+
not => { term => { status => 'backpan' } }
68+
}
69+
};
70+
}
71+
72+
return +{
73+
query => {
74+
bool => {
75+
must => [
76+
{ term => { status => 'backpan' } },
77+
(
78+
$distribution
79+
? { term => { distribution => $distribution } }
80+
: ()
81+
)
82+
]
83+
}
84+
}
85+
};
86+
}
87+
88+
sub update_releases () {
89+
log_info {"update_releases"};
90+
91+
$bulk{release} ||= $es_release->bulk( timeout => '5m' );
92+
93+
for my $author ( keys %release_status ) {
94+
95+
# value = [ status, _id ]
96+
for ( values %{ $release_status{$author} } ) {
97+
$bulk{release}->update( {
98+
id => $_->[1],
99+
doc => {
100+
status => $_->[0],
101+
}
102+
} );
103+
}
104+
}
105+
}
106+
107+
sub update_files () {
108+
for my $author ( keys %release_status ) {
109+
my @releases = keys %{ $release_status{$author} };
110+
while ( my @chunk = splice @releases, 0, 1000 ) {
111+
update_files_author( $author, \@chunk );
112+
}
113+
}
114+
}
115+
116+
sub update_files_author ( $author, $author_releases ) {
117+
log_info { "update_files: " . $author };
118+
119+
my $scroll_file = $es_file->scroll(
120+
scroll => '5m',
121+
fields => [qw< release >],
122+
body => {
123+
query => {
124+
bool => {
125+
must => [
126+
{ term => { author => $author } },
127+
{ terms => { release => $author_releases } }
128+
]
129+
}
130+
}
131+
},
132+
);
133+
134+
$bulk{file} ||= $es_file->bulk( timeout => '5m' );
135+
136+
while ( my $file = $scroll_file->next ) {
137+
my $release = $file->{fields}{release}[0];
138+
$bulk{file}->update( {
139+
id => $file->{_id},
140+
doc => {
141+
status => $release_status{$author}{$release}[0]
142+
}
143+
} );
144+
}
145+
}
146+
147+
1;
148+
149+
=pod
150+
151+
=head1 SYNOPSIS
152+
153+
$ bin/backpan
154+
155+
$ bin/backpan --distribution DIST
156+
157+
$ bin/backpan --files_only
158+
159+
$ bin/backpan --undo ...
160+
161+
=head1 DESCRIPTION
162+
163+
Sets "backpan" status on all BackPAN releases.
164+
165+
--undo will set distributions' status back as 'cpan'
166+
--file_only will only fix the 'file' type
167+
168+
=cut

0 commit comments

Comments
 (0)