Skip to content

Commit e3ce2c4

Browse files
committed
contributors: fetch all contrib pauseids in one query
Rather than trying to fill in each pause id on a separate query, run one query for all contributors. Significantly speeds up fetches for large contributor lists.
1 parent acbcb5d commit e3ce2c4

File tree

1 file changed

+22
-13
lines changed

1 file changed

+22
-13
lines changed

lib/MetaCPAN/Query/Release.pm

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ sub get_contributors {
153153
$dupe ? () : $info;
154154
} ( @$authors, @$contribs );
155155

156+
my %want_email;
156157
for my $contrib (@contribs) {
157158

158159
# heuristic to autofill pause accounts
@@ -165,20 +166,28 @@ sub get_contributors {
165166

166167
}
167168

168-
# check if contributor's email points to a registered author
169-
if ( !$contrib->{pauseid} ) {
170-
for my $email ( @{ $contrib->{email} } ) {
171-
my $check_author = $self->es->search(
172-
es_doc_path('author'),
173-
body => {
174-
query => { term => { email => $email } },
175-
size => 10,
176-
}
177-
);
169+
push @{ $want_email{$_} }, $contrib
170+
for @{ $contrib->{email} };
171+
}
172+
173+
if (%want_email) {
174+
my $check_author = $self->es->search(
175+
es_doc_path('author'),
176+
body => {
177+
query => { terms => { email => [ sort keys %want_email ] } },
178+
_source => [ 'email', 'pauseid' ],
179+
size => 100,
180+
},
181+
);
178182

179-
if ( hit_total($check_author) ) {
180-
$contrib->{pauseid}
181-
= uc $check_author->{hits}{hits}[0]{_source}{pauseid};
183+
for my $author ( @{ $check_author->{hits}{hits} } ) {
184+
my $emails = $author->{_source}{email};
185+
$emails = [ $emails ]
186+
if !ref $emails;
187+
my $pauseid = uc $author->{_source}{pauseid};
188+
for my $email ( @$emails ) {
189+
for my $contrib ( @{ $want_email{$email} } ) {
190+
$contrib->{pauseid} = $pauseid;
182191
}
183192
}
184193
}

0 commit comments

Comments
 (0)