Skip to content

Commit 2ede0d3

Browse files
authored
Merge pull request #21 from metacpan/mickey/tickets
Added tickets script
2 parents 13e035b + 8ed5e54 commit 2ede0d3

File tree

2 files changed

+283
-1
lines changed

2 files changed

+283
-1
lines changed

bin/river.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
my $river_url //= 'https://neilb.org/river-of-cpan.json.gz';
1515
my $river_data = decode_json( read_url($river_url) );
1616

17-
my $es = MetaCPAN::ES->new( index => "cpan", type => "distribution" );
17+
my $es = MetaCPAN::ES->new( type => "distribution" );
1818
my $bulk = $es->bulk();
1919

2020
log_info {'Updating the distribution index'};

bin/tickets.pl

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
use strict;
2+
use warnings;
3+
use v5.36;
4+
5+
use Getopt::Long;
6+
7+
use MetaCPAN::Logger qw< :log :dlog >;
8+
9+
use Ref::Util qw< is_hashref is_ref >;
10+
use HTTP::Request::Common qw< GET >;
11+
use URI::Escape qw< uri_escape >;
12+
use Text::CSV_XS ();
13+
use Net::GitHub::V4 ();
14+
15+
use MetaCPAN::ES;
16+
use MetaCPAN::Ingest qw<
17+
config
18+
read_url
19+
ua
20+
>;
21+
22+
# setup
23+
my $rt_summary_url //= 'https://rt.cpan.org/Public/bugs-per-dist.tsv';
24+
my $gh_issues_url
25+
//= 'https://api.github.com/repos/%s/%s/issues?per_page=100';
26+
27+
$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;
28+
29+
my $config = config();
30+
my $gh_token = $config->{github_token}; ### TODO: add to config
31+
32+
# Some issue with rt.cpan.org's cert
33+
34+
my $gh_graphql = Net::GitHub::V4->new(
35+
( $gh_token ? ( access_token => $gh_token ) : () ) );
36+
37+
my $es = MetaCPAN::ES->new( index => "cpan", type => "distribution" );
38+
my $bulk = $es->bulk();
39+
40+
check_all_distributions();
41+
index_rt_bugs();
42+
index_github_bugs();
43+
44+
1;
45+
46+
###
47+
48+
# make sure all distributions have an entry
49+
sub check_all_distributions () {
50+
my $es_release = MetaCPAN::ES->new( type => "release" );
51+
my $scroll_release = $es_release->scroll(
52+
fields => ['distribution'],
53+
body => {
54+
query => {
55+
not => { term => { status => 'backpan' } },
56+
}
57+
},
58+
);
59+
60+
my %dists;
61+
62+
while ( my $release = $scroll_release->next ) {
63+
my $d = $release->{'fields'}{'distribution'}[0];
64+
$d or next;
65+
66+
log_debug { sprintf( "Adding missing distribution record: %s", $d ) };
67+
68+
$dists{$d} = { name => $d };
69+
}
70+
71+
_bulk_update( \%dists );
72+
}
73+
74+
# rt issues are counted for all dists (the download tsv contains everything).
75+
sub index_rt_bugs () {
76+
log_debug {'Fetching RT bugs'};
77+
78+
my $ua = ua();
79+
my $resp = $ua->request( GET $rt_summary_url );
80+
81+
log_error { $resp->status_line } unless $resp->is_success;
82+
83+
# NOTE: This is sending a byte string.
84+
my $summary = _parse_tsv( $resp->content );
85+
86+
log_info {"writing rt data"};
87+
88+
_bulk_update($summary);
89+
}
90+
91+
sub _parse_tsv ($tsv) {
92+
$tsv
93+
=~ s/^#\s*(dist\s.+)/$1/m; # uncomment the field spec for Text::CSV_XS
94+
$tsv =~ s/^#.*\n//mg;
95+
96+
open my $fh, '<', \$tsv;
97+
98+
# NOTE: This is byte-oriented.
99+
my $tsv_parser = Text::CSV_XS->new( { sep_char => "\t" } );
100+
$tsv_parser->header($fh);
101+
102+
my %summary;
103+
while ( my $row = $tsv_parser->getline_hr($fh) ) {
104+
$summary{ $row->{dist} }{'bugs'}{'rt'} = {
105+
source => _rt_dist_url( $row->{dist} ),
106+
active => $row->{active},
107+
closed => $row->{inactive},
108+
map { $_ => $row->{$_} + 0 }
109+
grep { not /^(dist|active|inactive)$/ }
110+
keys %$row,
111+
};
112+
}
113+
114+
return \%summary;
115+
}
116+
117+
sub _rt_dist_url ($d) {
118+
return sprintf( 'https://rt.cpan.org/Public/Dist/Display.html?Name=%s',
119+
uri_escape($d) );
120+
}
121+
122+
# gh issues are counted for any dist with a github url in `resources.bugtracker.web`.
123+
sub index_github_bugs () {
124+
log_debug {'Fetching GitHub issues'};
125+
126+
my $es_release = MetaCPAN::ES->new( type => "release" );
127+
my $scroll_release = $es_release->scroll(
128+
body => {
129+
query => {
130+
and => [
131+
{ term => { status => 'latest' } },
132+
{
133+
or => [
134+
{
135+
prefix => {
136+
"resources.bugtracker.web" =>
137+
'http://github.com/'
138+
}
139+
},
140+
{
141+
prefix => {
142+
"resources.bugtracker.web" =>
143+
'https://github.com/'
144+
}
145+
},
146+
],
147+
}
148+
],
149+
}
150+
},
151+
);
152+
153+
log_debug { sprintf( "Found %s repos", $scroll_release->total ) };
154+
155+
my $json = JSON::MaybeXS->new( allow_nonref => 1 );
156+
157+
my %summary;
158+
159+
RELEASE: while ( my $r = $scroll_release->next ) {
160+
my $resources = $r->resources;
161+
my ( $user, $repo, $source )
162+
= _gh_user_repo_from_resources($resources);
163+
next unless $user;
164+
165+
log_debug {"Retrieving issues from $user/$repo"};
166+
167+
my $data
168+
= $gh_graphql->query( _gh_graphql_query( $json, $user, $repo ) );
169+
170+
if ( my $error = $data->{errors} ) {
171+
for my $error (@$error) {
172+
my $log_message = sprintf "[%s] %s", $r->{distribution},
173+
$error->{message};
174+
if ( $error->{type} eq 'NOT_FOUND' ) {
175+
delete $summary{ $r->{distribution} }{bugs}{github};
176+
log_info {$log_message};
177+
}
178+
else {
179+
log_error {$log_message};
180+
}
181+
next RELEASE;
182+
}
183+
}
184+
185+
my $open = $data->{data}{repository}{openIssues}{totalCount}
186+
+ $data->{data}{repository}{openPullRequests}{totalCount};
187+
188+
my $closed = $data->{data}{repository}{closedIssues}{totalCount}
189+
+ $data->{data}{repository}{closedPullRequests}{totalCount};
190+
191+
my $rec = {
192+
active => $open,
193+
open => $open,
194+
closed => $closed,
195+
source => $source,
196+
197+
};
198+
199+
$summary{ $r->{distribution} }{bugs}{github} = $rec;
200+
}
201+
202+
log_info {"writing github data"};
203+
204+
_bulk_update( \%summary );
205+
}
206+
207+
# Try (recursively) to find a github url in the resources hash.
208+
# FIXME: This should check bugtracker web exclusively, or at least first.
209+
sub _gh_user_repo_from_resources ($resources) {
210+
my ( $user, $repo, $source );
211+
212+
for my $k ( keys %{$resources} ) {
213+
my $v = $resources->{$k};
214+
215+
if ( !is_ref($v)
216+
&& $v
217+
=~ /^(https?|git):\/\/github\.com\/([^\/]+)\/([^\/]+?)(\.git)?\/?$/
218+
)
219+
{
220+
return ( $2, $3, $v );
221+
}
222+
223+
( $user, $repo, $source ) = _gh_user_repo_from_resources($v)
224+
if is_hashref($v);
225+
226+
return ( $user, $repo, $source ) if $user;
227+
}
228+
229+
return ();
230+
}
231+
232+
sub _gh_graphql_query ( $json, $user, $repo ) {
233+
sprintf <<END_QUERY, map $json->encode($_), $user, $repo;
234+
query {
235+
repository(owner: %s, name: %s) {
236+
openIssues: issues(states: OPEN) {
237+
totalCount
238+
}
239+
closedIssues: issues(states: CLOSED) {
240+
totalCount
241+
}
242+
openPullRequests: pullRequests(states: OPEN) {
243+
totalCount
244+
}
245+
closedPullRequests: pullRequests(states: [CLOSED, MERGED]) {
246+
totalCount
247+
}
248+
}
249+
}
250+
END_QUERY
251+
}
252+
253+
sub _bulk_update ($records) {
254+
for my $d ( keys %$records ) {
255+
$bulk->update( {
256+
id => $d,
257+
doc => $records->{$d},
258+
doc_as_upsert => 1,
259+
} );
260+
}
261+
}
262+
263+
__END__
264+
265+
=pod
266+
267+
=head1 SYNOPSIS
268+
269+
# bin/tickets
270+
271+
=head1 DESCRIPTION
272+
273+
Tracks the number of issues and the source, if the issue
274+
tracker is RT or Github it fetches the info and updates
275+
out ES information.
276+
277+
This can then be accessed here:
278+
279+
http://fastapi.metacpan.org/v1/distribution/Moose
280+
http://fastapi.metacpan.org/v1/distribution/HTTP-BrowserDetect
281+
282+
=cut

0 commit comments

Comments
 (0)