From fbe6f1a28768d9a137f1396acdb1b0098dce5d29 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Tue, 5 Nov 2024 07:44:57 +0100 Subject: [PATCH 1/2] normalize handling of pagination query params Always accept page and page_size parameters, and pass them around internally in that order. Some end points were accepting size rather than page_size. Support both parameters for these, prioritizing page_size. The search end points were accepting a from parameter rather than page. Add support for a page parameter, and convert the from param into the page as a fallback. --- lib/MetaCPAN/API/Controller/Search.pm | 10 +++++++--- lib/MetaCPAN/Query/Release.pm | 20 +++++++++---------- lib/MetaCPAN/Query/Search.pm | 17 ++++++++-------- lib/MetaCPAN/Server/Controller/Favorite.pm | 2 +- lib/MetaCPAN/Server/Controller/Release.pm | 20 +++++++++++++------ .../Server/Controller/ReverseDependencies.pm | 2 +- lib/MetaCPAN/Server/Controller/Search/Web.pm | 7 ++++++- root/static/requests/search.yml | 16 +++++++++++++-- t/model/search.t | 4 ++-- 9 files changed, 64 insertions(+), 34 deletions(-) diff --git a/lib/MetaCPAN/API/Controller/Search.pm b/lib/MetaCPAN/API/Controller/Search.pm index f0e6ad700..fbc21bdcc 100644 --- a/lib/MetaCPAN/API/Controller/Search.pm +++ b/lib/MetaCPAN/API/Controller/Search.pm @@ -17,9 +17,13 @@ sub web { return unless $c->openapi->valid_input; my $args = $c->validation->output; - my @search = ( @{$args}{qw/q from size/} ); - push @search, $args->{collapsed} if exists $args->{collapsed}; - my $results = $c->model->search->search_web(@search); + my $query = $args->{q}; + my $size = $args->{page_size} // $args->{size} // 20; + my $page = $args->{page} // ( 1 + int( ( $args->{from} // 0 ) / $size ) ); + my $collapsed = $args->{collapsed}; + + my $results + = $c->model->search->search_web( $query, $page, $size, $collapsed ); return $c->render( json => $results ); } diff --git a/lib/MetaCPAN/Query/Release.pm b/lib/MetaCPAN/Query/Release.pm index 8a3a49802..8968affac 100644 --- a/lib/MetaCPAN/Query/Release.pm +++ b/lib/MetaCPAN/Query/Release.pm @@ -395,7 +395,7 @@ sub by_author_and_names { } sub by_author { - my ( $self, $pauseid, $size, $page ) = @_; + my ( $self, $pauseid, $page, $size ) = @_; $size //= 1000; $page //= 1; @@ -487,7 +487,7 @@ sub latest_by_author { } sub all_by_author { - my ( $self, $author, $size, $page ) = @_; + my ( $self, $author, $page, $size ) = @_; $size //= 100; $page //= 1; @@ -623,11 +623,11 @@ sub top_uploaders { sub requires { my ( $self, $module, $page, $page_size, $sort ) = @_; return $self->_get_depended_releases( [$module], $page, $page_size, - $page_size, $sort ); + $sort ); } sub reverse_dependencies { - my ( $self, $distribution, $page, $page_size, $size, $sort ) = @_; + my ( $self, $distribution, $page, $page_size, $sort ) = @_; # get the latest release of given distribution my $release = $self->_get_latest_release($distribution) || return; @@ -637,7 +637,7 @@ sub reverse_dependencies { # return releases depended on those modules return $self->_get_depended_releases( $modules, $page, $page_size, - $size, $sort ); + $sort ); } sub _get_latest_release { @@ -709,7 +709,7 @@ sub _fix_sort_value { } sub _get_depended_releases { - my ( $self, $modules, $page, $page_size, $size, $sort ) = @_; + my ( $self, $modules, $page, $page_size, $sort ) = @_; $page //= 1; $page_size //= 50; @@ -754,8 +754,8 @@ sub _get_depended_releases { ], }, }, - size => $size || $page_size, - from => $page * $page_size - $page_size, + size => $page_size, + from => ( $page - 1 ) * $page_size, sort => $sort, } ); @@ -768,7 +768,7 @@ sub _get_depended_releases { } sub recent { - my ( $self, $page, $page_size, $type ) = @_; + my ( $self, $type, $page, $page_size ) = @_; $page //= 1; $page_size //= 10000; $type //= ''; @@ -813,7 +813,7 @@ sub recent { my $body = { size => $page_size, - from => $from, + from => ( $page - 1 ) * $page_size, query => $query, _source => [qw(name author status abstract date distribution maturity)], diff --git a/lib/MetaCPAN/Query/Search.pm b/lib/MetaCPAN/Query/Search.pm index 95c7f12fc..079e85ca2 100644 --- a/lib/MetaCPAN/Query/Search.pm +++ b/lib/MetaCPAN/Query/Search.pm @@ -54,11 +54,11 @@ sub search_for_first_result { =cut sub search_web { - my ( $self, $search_term, $from, $page_size, $collapsed, + my ( $self, $search_term, $page, $page_size, $collapsed, $max_collapsed_hits ) = @_; $page_size //= 20; - $from //= 0; + $page //= 1; $search_term =~ s{([+=>_search_collapsed( $search_term, $from, $page_size, + ? $self->_search_collapsed( $search_term, $page, $page_size, $max_collapsed_hits ) - : $self->_search_expanded( $search_term, $from, $page_size ); + : $self->_search_expanded( $search_term, $page, $page_size ); return $results; } sub _search_expanded { - my ( $self, $search_term, $from, $page_size ) = @_; + my ( $self, $search_term, $page, $page_size ) = @_; # Used for distribution and module searches, the limit is included in # the query and ES does the right thing (because we are not collapsing @@ -95,7 +95,7 @@ sub _search_expanded { $search_term, { size => $page_size, - from => $from + from => ( $page - 1 ) * $page_size, } ); @@ -122,11 +122,12 @@ sub _search_expanded { } sub _search_collapsed { - my ( $self, $search_term, $from, $page_size, $max_collapsed_hits ) = @_; + my ( $self, $search_term, $page, $page_size, $max_collapsed_hits ) = @_; $max_collapsed_hits ||= 5; - my $total_size = $from + $page_size; + my $from = ( $page - 1 ) * $page_size; + my $total_size = $page * $page_size; my $es_query_opts = { size => 0, diff --git a/lib/MetaCPAN/Server/Controller/Favorite.pm b/lib/MetaCPAN/Server/Controller/Favorite.pm index 083f28b79..43fa2a7e6 100644 --- a/lib/MetaCPAN/Server/Controller/Favorite.pm +++ b/lib/MetaCPAN/Server/Controller/Favorite.pm @@ -43,7 +43,7 @@ sub recent : Path('recent') : Args(0) { $c->stash_or_detach( $self->model($c)->recent( $c->req->param('page') || 1, - $c->req->param('size') || 100 + $c->req->param('page_size') || $c->req->param('size') || 100, ) ); } diff --git a/lib/MetaCPAN/Server/Controller/Release.pm b/lib/MetaCPAN/Server/Controller/Release.pm index 2e333a910..753d88fbf 100644 --- a/lib/MetaCPAN/Server/Controller/Release.pm +++ b/lib/MetaCPAN/Server/Controller/Release.pm @@ -45,14 +45,20 @@ sub modules : Path('modules') : Args(2) { sub recent : Path('recent') : Args(0) { my ( $self, $c ) = @_; - my @params = @{ $c->req->params }{qw( page page_size type )}; - $c->stash_or_detach( $self->model($c)->recent(@params) ); + my $params = $c->req->params; + my $type = $params->{type}; + my $page = $params->{page}; + my $size = $params->{page_size}; + $c->stash_or_detach( $self->model($c)->recent( $type, $page, $size ) ); } sub by_author : Path('by_author') : Args(1) { my ( $self, $c, $pauseid ) = @_; - $c->stash_or_detach( $self->model($c) - ->by_author( $pauseid, @{ $c->req->params }{qw( size page )} ) ); + my $params = $c->req->params; + my $page = $params->{page}; + my $size = $params->{page_size} // $params->{size}; + $c->stash_or_detach( + $self->model($c)->by_author( $pauseid, $page, $size ) ); } sub latest_by_distribution : Path('latest_by_distribution') : Args(1) { @@ -69,9 +75,11 @@ sub latest_by_author : Path('latest_by_author') : Args(1) { sub all_by_author : Path('all_by_author') : Args(1) { my ( $self, $c, $pauseid ) = @_; - my @params = @{ $c->req->params }{qw( page_size page )}; + my $params = $c->req->params; + my $page = $params->{page}; + my $size = $params->{page_size}; $c->stash_or_detach( - $self->model($c)->all_by_author( $pauseid, @params ) ); + $self->model($c)->all_by_author( $pauseid, $page, $size ) ); } sub versions : Path('versions') : Args(1) { diff --git a/lib/MetaCPAN/Server/Controller/ReverseDependencies.pm b/lib/MetaCPAN/Server/Controller/ReverseDependencies.pm index 0549b9ecb..186d6c4dc 100644 --- a/lib/MetaCPAN/Server/Controller/ReverseDependencies.pm +++ b/lib/MetaCPAN/Server/Controller/ReverseDependencies.pm @@ -15,7 +15,7 @@ sub dist : Path('dist') : Args(1) { my ( $self, $c, $dist ) = @_; $c->stash_or_detach( $c->model('ESModel')->doc('release')->reverse_dependencies( - $dist, @{ $c->req->params }{qw< page page_size size sort >} + $dist, @{ $c->req->params }{qw< page page_size sort >} ) ); } diff --git a/lib/MetaCPAN/Server/Controller/Search/Web.pm b/lib/MetaCPAN/Server/Controller/Search/Web.pm index 0121e5945..45a2d69f2 100644 --- a/lib/MetaCPAN/Server/Controller/Search/Web.pm +++ b/lib/MetaCPAN/Server/Controller/Search/Web.pm @@ -30,8 +30,13 @@ sub web : Chained('/search/index') : PathPart('web') : Args(0) { my ( $self, $c ) = @_; my $args = $c->req->params; + my $query = $args->{q}; + my $size = $args->{page_size} // $args->{size} // 20; + my $page = $args->{page} // ( 1 + int( ( $args->{from} // 0 ) / $size ) ); + my $collapsed = $args->{collapsed}; + my $model = $c->model('Search'); - my $results = $model->search_web( @{$args}{qw( q from size collapsed )} ); + my $results = $model->search_web( $query, $page, $size, $collapsed ); $c->stash($results); } diff --git a/root/static/requests/search.yml b/root/static/requests/search.yml index 7234e3662..60f6824af 100644 --- a/root/static/requests/search.yml +++ b/root/static/requests/search.yml @@ -18,14 +18,26 @@ search_web: See also `collapsed` type: string required: true + - name: page + in: query + description: The page of the results to return + type: integer + - name: page_size + in: query + description: Number of results per page + type: integer - name: from in: query - description: The offset to use in the result set + description: | + The offset to use in the result set. Deprecated. Only used if `page` + is not set. type: integer default: 0 - name: size in: query - description: Number of results per page + description: | + Number of results per page. Deprecated. Only used if `page_size` is + not set. type: integer default: 20 - name: collapsed diff --git a/t/model/search.t b/t/model/search.t index 57a109db1..d0e3262a4 100644 --- a/t/model/search.t +++ b/t/model/search.t @@ -35,12 +35,12 @@ ok( $search, 'search' ); ok( $collapsed_search->{collapsed}, 'results are flagged as collapsed' ); - my $from = 0; + my $page = 1; my $page_size = 20; my $collapsed = 0; my $expanded - = $search->search_web( 'Foo', $from, $page_size, $collapsed ); + = $search->search_web( 'Foo', $page, $page_size, $collapsed ); ok( !$expanded->{collapsed}, 'results are flagged as expanded' ); From 453f7708ff4a0a931b8bff6ed8fcbe0352c6861a Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Tue, 5 Nov 2024 07:55:51 +0100 Subject: [PATCH 2/2] limit all pagination to 10000 results Paged results are expensive to compute on Elasticsearch. It will refuse to give results if paged forward far enough. Limit our pagination and give empty results if paged too deep. --- lib/MetaCPAN/Query/Favorite.pm | 10 +++++++++- lib/MetaCPAN/Query/Release.pm | 36 +++++++++++++++++++++++++++------- lib/MetaCPAN/Query/Search.pm | 11 ++++++++++- lib/MetaCPAN/Util.pm | 3 +++ 4 files changed, 51 insertions(+), 9 deletions(-) diff --git a/lib/MetaCPAN/Query/Favorite.pm b/lib/MetaCPAN/Query/Favorite.pm index 721942e05..67d521657 100644 --- a/lib/MetaCPAN/Query/Favorite.pm +++ b/lib/MetaCPAN/Query/Favorite.pm @@ -3,7 +3,7 @@ package MetaCPAN::Query::Favorite; use MetaCPAN::Moose; use MetaCPAN::ESConfig qw( es_doc_path ); -use MetaCPAN::Util qw(hit_total); +use MetaCPAN::Util qw( MAX_RESULT_WINDOW hit_total ); with 'MetaCPAN::Query::Role::Common'; @@ -148,6 +148,14 @@ sub recent { $page //= 1; $size //= 100; + if ( $page * $size >= MAX_RESULT_WINDOW ) { + return +{ + favorites => [], + took => 0, + total => 0, + }; + } + my $favs = $self->es->search( es_doc_path('favorite'), body => { diff --git a/lib/MetaCPAN/Query/Release.pm b/lib/MetaCPAN/Query/Release.pm index 8968affac..485aa647f 100644 --- a/lib/MetaCPAN/Query/Release.pm +++ b/lib/MetaCPAN/Query/Release.pm @@ -4,7 +4,7 @@ use MetaCPAN::Moose; use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Util - qw( hit_total single_valued_arrayref_to_scalar true false ); + qw( MAX_RESULT_WINDOW hit_total single_valued_arrayref_to_scalar true false ); with 'MetaCPAN::Query::Role::Common'; @@ -399,6 +399,14 @@ sub by_author { $size //= 1000; $page //= 1; + if ( $page * $size >= MAX_RESULT_WINDOW ) { + return { + releases => [], + took => 0, + total => 0, + }; + } + my $body = { query => { bool => { @@ -491,6 +499,14 @@ sub all_by_author { $size //= 100; $page //= 1; + if ( $page * $size >= MAX_RESULT_WINDOW ) { + return { + releases => [], + took => 0, + total => 0, + }; + } + my $body = { query => { term => { author => uc($author) } }, sort => [ { date => 'desc' } ], @@ -713,6 +729,14 @@ sub _get_depended_releases { $page //= 1; $page_size //= 50; + if ( $page * $page_size >= MAX_RESULT_WINDOW ) { + return +{ + data => [], + took => 0, + total => 0, + }; + } + $sort = _fix_sort_value($sort); my $dependency_filter = { @@ -773,17 +797,15 @@ sub recent { $page_size //= 10000; $type //= ''; - my $query; - my $from = ( $page - 1 ) * $page_size; - - if ( $from + $page_size > 10000 ) { - return { + if ( $page * $page_size >= MAX_RESULT_WINDOW ) { + return +{ releases => [], - total => 0, took => 0, + total => 0, }; } + my $query; if ( $type eq 'n' ) { $query = { constant_score => { diff --git a/lib/MetaCPAN/Query/Search.pm b/lib/MetaCPAN/Query/Search.pm index 079e85ca2..9a32e87b2 100644 --- a/lib/MetaCPAN/Query/Search.pm +++ b/lib/MetaCPAN/Query/Search.pm @@ -8,7 +8,7 @@ use List::Util qw( min uniq ); use Log::Contextual qw( :log :dlog ); use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Object Str ); -use MetaCPAN::Util qw( hit_total true false ); +use MetaCPAN::Util qw( MAX_RESULT_WINDOW hit_total true false ); use MooseX::StrictConstructor; with 'MetaCPAN::Query::Role::Common'; @@ -60,6 +60,15 @@ sub search_web { $page_size //= 20; $page //= 1; + if ( $page * $page_size >= MAX_RESULT_WINDOW ) { + return { + results => [], + total => 0, + tool => 0, + colapsed => $collapsed ? true : false, + }; + } + $search_term =~ s{([+=> { true false is_bool + MAX_RESULT_WINDOW ) ] }; +use constant MAX_RESULT_WINDOW => 10000; + *true = \&Cpanel::JSON::XS::true; *false = \&Cpanel::JSON::XS::false; *is_bool = \&Cpanel::JSON::XS::is_bool;