Skip to content

Commit 538eace

Browse files
authored
Merge pull request #29 from metacpan/mickey/favorite
Added favorite script
2 parents a858be2 + fe5036f commit 538eace

File tree

1 file changed

+215
-0
lines changed

1 file changed

+215
-0
lines changed

bin/favorite.pl

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
use strict;
2+
use warnings;
3+
use v5.36;
4+
5+
use Getopt::Long;
6+
use MetaCPAN::Logger qw< :log :dlog >;
7+
8+
use MetaCPAN::ES;
9+
use MetaCPAN::Ingest qw< minion >;
10+
11+
# args
12+
my ( $age, $check_missing, $count, $distribution, $limit, $queue );
13+
my $dry_run;
14+
GetOptions(
15+
"age=i" => \$age,
16+
"check_missing" => \$check_missing,
17+
"count=i" => \$count,
18+
"distribution=s" => \$distribution,
19+
"limit=i" => \$limit,
20+
"queue" => \$queue,
21+
);
22+
23+
if ( $count and !$distribution ) {
24+
die
25+
"Cannot set count in a distribution search mode, this flag only applies to a single distribution. please use together with --distribution DIST";
26+
}
27+
28+
if ( $check_missing and $distribution ) {
29+
die
30+
"check_missing doesn't work in filtered mode - please remove other flags";
31+
}
32+
33+
index_favorites();
34+
35+
log_info {'done'};
36+
37+
1;
38+
39+
###
40+
41+
sub index_favorites () {
42+
my $body;
43+
my $age_filter;
44+
45+
if ($age) {
46+
$age_filter
47+
= { range => { date => { gte => sprintf( 'now-%dm', $age ) } } };
48+
}
49+
50+
if ($distribution) {
51+
$body = {
52+
query => {
53+
term => { distribution => $distribution }
54+
}
55+
};
56+
}
57+
elsif ($age) {
58+
my $es = MetaCPAN::ES->new( type => "favorite" );
59+
my $favs = $es->scroll(
60+
scroll => '5m',
61+
fields => [qw< distribution >],
62+
body => {
63+
query => $age_filter,
64+
( $limit ? ( size => $limit ) : () )
65+
}
66+
);
67+
68+
my %recent_dists;
69+
70+
while ( my $fav = $favs->next ) {
71+
my $dist = $fav->{fields}{distribution}[0];
72+
$recent_dists{$dist}++ if $dist;
73+
}
74+
75+
my @keys = keys %recent_dists;
76+
if (@keys) {
77+
$body = {
78+
query => {
79+
terms => { distribution => \@keys }
80+
}
81+
};
82+
}
83+
$es->index_refresh;
84+
}
85+
86+
# get total fav counts for distributions
87+
88+
my %dist_fav_count;
89+
90+
if ($count) {
91+
$dist_fav_count{$distribution} = $count;
92+
}
93+
else {
94+
my $es = MetaCPAN::ES->new( type => "favorite" );
95+
my $favs = $es->scroll(
96+
scroll => '30s',
97+
fields => [qw< distribution >],
98+
( $body ? ( body => $body ) : () ),
99+
);
100+
101+
while ( my $fav = $favs->next ) {
102+
my $dist = $fav->{fields}{distribution}[0];
103+
$dist_fav_count{$dist}++ if $dist;
104+
}
105+
106+
$es->index_refresh;
107+
log_debug {"Done counting favs for distributions"};
108+
}
109+
110+
# Report missing distributions if requested
111+
112+
if ($check_missing) {
113+
my %missing;
114+
my @age_filter;
115+
if ($age) {
116+
@age_filter = ( must => [$age_filter] );
117+
}
118+
119+
my $es = MetaCPAN::ES->new( type => "file" );
120+
my $files = $es->scroll(
121+
scroll => '15m',
122+
fields => [qw< id distribution >],
123+
size => 500,
124+
body => {
125+
query => {
126+
bool => {
127+
must_not => [
128+
{ range => { dist_fav_count => { gte => 1 } } }
129+
],
130+
@age_filter,
131+
}
132+
}
133+
},
134+
);
135+
136+
while ( my $file = $files->next ) {
137+
my $dist = $file->{fields}{distribution}[0];
138+
next unless $dist;
139+
next if exists $missing{$dist} or exists $dist_fav_count{$dist};
140+
141+
if ($queue) {
142+
log_debug {"Queueing: $dist"};
143+
my $minion = minion();
144+
145+
my @count_flag;
146+
if ( $count or $dist_fav_count{$dist} ) {
147+
@count_flag
148+
= ( '--count', $count || $dist_fav_count{$dist} );
149+
}
150+
151+
$minion->enqueue(
152+
index_favorite =>
153+
[ '--distribution', $dist, @count_flag ],
154+
{ priority => 0, attempts => 10 }
155+
);
156+
}
157+
else {
158+
log_debug {"Found missing: $dist"};
159+
}
160+
161+
$missing{$dist} = 1;
162+
last if $limit and scalar( keys %missing ) >= $limit;
163+
}
164+
165+
my $total_missing = scalar( keys %missing );
166+
log_debug {"Total missing: $total_missing"} unless $queue;
167+
168+
$es->index_refresh;
169+
return;
170+
}
171+
172+
# Update fav counts for files per distributions
173+
174+
for my $dist ( keys %dist_fav_count ) {
175+
log_debug {"Dist $dist"};
176+
177+
if ($queue) {
178+
my $minion = minion();
179+
$minion->enqueue(
180+
index_favorite => [
181+
'--distribution', $dist, '--count',
182+
( $count ? $count : $dist_fav_count{$dist} )
183+
],
184+
{ priority => 0, attempts => 10 }
185+
);
186+
}
187+
else {
188+
my $es = MetaCPAN::ES->new( type => "file" );
189+
my $bulk = $es->bulk( timeout => '120m' );
190+
my $files = $es->scroll(
191+
scroll => '15s',
192+
fields => [qw< id >],
193+
body => {
194+
query => { term => { distribution => $dist } }
195+
},
196+
);
197+
198+
while ( my $file = $files->next ) {
199+
my $id = $file->{fields}{id}[0];
200+
my $cnt = $dist_fav_count{$dist};
201+
202+
log_debug {"Updating file id $id with fav_count $cnt"};
203+
204+
$bulk->update( {
205+
id => $file->{fields}{id}[0],
206+
doc => { dist_fav_count => $cnt },
207+
} );
208+
}
209+
210+
$bulk->flush;
211+
}
212+
}
213+
}
214+
215+
1;

0 commit comments

Comments
 (0)