Skip to content

Commit b070491

Browse files
committed
Implement q priority sorting of requests
1 parent 70b33f0 commit b070491

File tree

4 files changed

+121
-101
lines changed

4 files changed

+121
-101
lines changed

pygeoapi/api/__init__.py

+22-19
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@
6666
CrsTransformSpec, TEMPLATES, UrlPrefetcher, dategetter,
6767
filter_dict_by_key_value, filter_providers_by_type, get_api_rules,
6868
get_base_url, get_provider_by_type, get_provider_default, get_typed_value,
69-
get_crs_from_uri, get_supported_crs_list, render_j2_template, to_json
69+
get_crs_from_uri, get_supported_crs_list, render_j2_template, to_json,
70+
get_choice_from_headers, get_from_headers
7071
)
7172

7273
LOGGER = logging.getLogger(__name__)
@@ -150,7 +151,8 @@ def apply_gzip(headers: dict, content: Union[str, bytes]) -> Union[str, bytes]:
150151
Compress content if requested in header.
151152
"""
152153
charset = CHARSET[0]
153-
if F_GZIP in headers.get('Content-Encoding', []):
154+
155+
if F_GZIP in get_from_headers(headers, 'content-encoding'):
154156
try:
155157
if isinstance(content, bytes):
156158
# bytes means Content-Type needs to be set upstream
@@ -305,16 +307,18 @@ def _get_locale(self, headers, supported_locales):
305307
raise ValueError(f"{self.__class__.__name__} must be initialized"
306308
f"with a list of valid supported locales")
307309

308-
for func, mapping in ((l10n.locale_from_params, self._args),
309-
(l10n.locale_from_headers, headers)):
310-
loc_str = func(mapping)
311-
if loc_str:
312-
if not raw:
310+
for mapping, field in ((self._args, l10n.QUERY_PARAM),
311+
(headers, 'accept-language')):
312+
313+
loc_strs = get_choice_from_headers(mapping, field, all=True)
314+
if loc_strs:
315+
if raw is None:
313316
# This is the first-found locale string: set as raw
314-
raw = loc_str
317+
raw = get_from_headers(mapping, field)
318+
315319
# Check if locale string is a good match for the UI
316-
loc = l10n.best_match(loc_str, supported_locales)
317-
is_override = func is l10n.locale_from_params
320+
loc = l10n.best_match(loc_strs, supported_locales)
321+
is_override = field is l10n.QUERY_PARAM
318322
if loc != default_locale or is_override:
319323
return raw, loc
320324

@@ -335,17 +339,16 @@ def _get_format(self, headers) -> Union[str, None]:
335339
return format_
336340

337341
# Format not specified: get from Accept headers (MIME types)
338-
# e.g. format_ = 'text/html'
339-
h = headers.get('accept', headers.get('Accept', '')).strip() # noqa
342+
# e.g. Accept: 'text/html;q=0.5,application/ld+json'
343+
types_ = get_choice_from_headers(headers, 'accept', all=True)
344+
if types_ is None:
345+
return
346+
340347
(fmts, mimes) = zip(*FORMAT_TYPES.items())
341-
# basic support for complex types (i.e. with "q=0.x")
342-
for type_ in (t.split(';')[0].strip() for t in h.split(',') if t):
348+
for type_ in types_:
343349
if type_ in mimes:
344350
idx_ = mimes.index(type_)
345-
format_ = fmts[idx_]
346-
break
347-
348-
return format_ or None
351+
return fmts[idx_]
349352

350353
@property
351354
def data(self) -> bytes:
@@ -503,7 +506,7 @@ def get_response_headers(self, force_lang: l10n.Locale = None,
503506
if F_GZIP in FORMAT_TYPES:
504507
if force_encoding:
505508
headers['Content-Encoding'] = force_encoding
506-
elif F_GZIP in self._headers.get('Accept-Encoding', ''):
509+
elif F_GZIP in get_from_headers(self._headers, 'accept-encoding'):
507510
headers['Content-Encoding'] = F_GZIP
508511

509512
return headers

pygeoapi/l10n.py

+13-82
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def locale2str(value: Locale) -> str:
113113

114114
def best_match(accept_languages, available_locales) -> Locale:
115115
"""
116-
Takes an Accept-Languages string (from header or request query params)
116+
Takes an Accept-Languages sorted list (from header or request query params)
117117
and finds the best matching locale from a list of available locales.
118118
119119
This function provides a framework-independent alternative to the
@@ -131,12 +131,12 @@ def best_match(accept_languages, available_locales) -> Locale:
131131
or unknown locale is ignored. However, if no
132132
`available_locales` are specified, a `LocaleError` is raised.
133133
134-
:param accept_languages: A Locale or string with one or more languages.
134+
:param accept_languages: A Locale or list of one or more languages.
135135
This can be as simple as "de" for example,
136136
but it's also possible to include a territory
137137
(e.g. "en-US" or "fr_BE") or even a complex
138-
string with quality values, e.g.
139-
"fr-CH, fr;q=0.9, en;q=0.8, de;q=0.7, *;q=0.5".
138+
list sorted by quality values, e.g.
139+
["fr-CH, "fr", "en", "de", "*"].
140140
:param available_locales: A list containing the available locales.
141141
For example, a pygeoapi provider might only
142142
support ["de", "en"].
@@ -170,49 +170,12 @@ def get_match(locale_, available_locales_):
170170

171171
if isinstance(accept_languages, Locale):
172172
# If a Babel Locale was used as input, transform back into a string
173-
accept_languages = locale2str(accept_languages)
173+
accept_languages = [locale2str(accept_languages)]
174174

175-
if not isinstance(accept_languages, str):
175+
if not isinstance(accept_languages, list):
176176
# If `accept_languages` is not a string, ignore it
177177
LOGGER.debug(f"ignoring invalid accept-languages '{accept_languages}'")
178-
accept_languages = ''
179-
180-
tags = accept_languages.split(',')
181-
num_tags = len(tags)
182-
req_locales = {}
183-
for i, lang in enumerate(tags):
184-
q_raw = None
185-
q_out = None
186-
if not lang:
187-
continue
188-
189-
# Check if complex (i.e. with quality weights)
190-
try:
191-
lang, q_raw = (v.strip() for v in lang.split(';'))
192-
except ValueError:
193-
# Tuple unpacking failed: tag is not complex (or too complex :))
194-
pass
195-
196-
# Validate locale tag
197-
loc = str2locale(lang, True)
198-
if not loc:
199-
LOGGER.debug(f"ignoring invalid accept-language '{lang}'")
200-
continue
201-
202-
# Validate quality weight (e.g. "q=0.7")
203-
if q_raw:
204-
try:
205-
q_out = float([v.strip() for v in q_raw.split('=')][1])
206-
except (ValueError, IndexError):
207-
# Tuple unpacking failed: not a valid q tag
208-
pass
209-
210-
# If there's no actual q, set one based on the language order
211-
if not q_out:
212-
q_out = num_tags - i
213-
214-
# Store locale
215-
req_locales[q_out] = loc
178+
accept_languages = []
216179

217180
# Process supported locales
218181
prv_locales = OrderedDict()
@@ -221,7 +184,11 @@ def get_match(locale_, available_locales_):
221184
prv_locales.setdefault(loc.language, []).append(loc.territory)
222185

223186
# Return best match from accepted languages
224-
for _, loc in sorted(req_locales.items(), reverse=True):
187+
for lang in accept_languages:
188+
loc = str2locale(lang, True)
189+
if not loc:
190+
LOGGER.debug(f"ignoring invalid accept-language '{lang}'")
191+
continue
225192
match = get_match(loc, prv_locales)
226193
if match:
227194
LOGGER.debug(f"'{match}' matches requested '{accept_languages}'")
@@ -281,7 +248,7 @@ def translate(value, language: Union[Locale, str]):
281248
return value
282249

283250
# Find best language match and return value by its key
284-
out_locale = best_match(language, loc_items.keys())
251+
out_locale = best_match([language], loc_items.keys())
285252
return value[loc_items[out_locale]]
286253

287254

@@ -340,42 +307,6 @@ def _translate_dict(obj, level: int = 0):
340307
return result
341308

342309

343-
def locale_from_headers(headers) -> str:
344-
"""
345-
Gets a valid Locale from a request headers dictionary.
346-
Supported are complex strings (e.g. "fr-CH, fr;q=0.9, en;q=0.8"),
347-
web locales (e.g. "en-US") or basic language tags (e.g. "en").
348-
A value of `None` is returned if the locale was not found or invalid.
349-
350-
:param headers: Mapping of request headers.
351-
352-
:returns: locale string or None
353-
"""
354-
355-
lang = {k.lower(): v for k, v in headers.items()}.get('accept-language')
356-
if lang:
357-
LOGGER.debug(f"Got locale '{lang}' from 'Accept-Language' header")
358-
return lang
359-
360-
361-
def locale_from_params(params) -> str:
362-
"""
363-
Gets a valid Locale from a request query parameters dictionary.
364-
Supported are complex strings (e.g. "fr-CH, fr;q=0.9, en;q=0.8"),
365-
web locales (e.g. "en-US") or basic language tags (e.g. "en").
366-
A value of `None` is returned if the locale was not found or invalid.
367-
368-
:param params: Mapping of request query parameters.
369-
370-
:returns: locale string or None
371-
"""
372-
373-
lang = params.get(QUERY_PARAM)
374-
if lang:
375-
LOGGER.debug(f"Got locale '{lang}' from query parameter '{QUERY_PARAM}'") # noqa
376-
return lang
377-
378-
379310
def set_response_language(headers: dict, *locale_: Locale):
380311
"""
381312
Sets the Content-Language on the given HTTP response headers dict.

pygeoapi/util.py

+57
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
from datetime import date, datetime, time, timezone
3939
from decimal import Decimal
4040
from enum import Enum
41+
from heapq import heappush
42+
4143
import json
4244
import logging
4345
import mimetypes
@@ -1054,3 +1056,58 @@ def _inplace_replace_geometry_filter_name(
10541056
else:
10551057
_inplace_replace_geometry_filter_name(
10561058
sub_node, geometry_column_name)
1059+
1060+
1061+
def get_from_headers(headers: dict, header_name: str) -> str:
1062+
"""
1063+
Gets case insensitive value from dictionary.
1064+
This is particularly useful when trying to get
1065+
headers from Starlette and Flask without issue
1066+
1067+
:param headers: `dict` of request headers.
1068+
:param header_name: Name of request header.
1069+
1070+
:returns: `str` value of header
1071+
"""
1072+
1073+
cleaned_headers = {k.strip().lower(): v for k, v in headers.items()}
1074+
return cleaned_headers.get(header_name.lower(), '')
1075+
1076+
1077+
def get_choice_from_headers(headers: dict,
1078+
header_name: str,
1079+
all: bool = False) -> Any:
1080+
"""
1081+
Gets choices from a request dictionary,
1082+
considering numerical ordering of preferences.
1083+
Supported are complex preference strings (e.g. "fr-CH, fr;q=0.9, en;q=0.8")
1084+
1085+
:param headers: `dict` of request headers.
1086+
:param header_name: Name of request header.
1087+
:param all: bool to return one or all header values.
1088+
1089+
:returns: Sorted choices from header
1090+
"""
1091+
1092+
# Select header of interest
1093+
header = get_from_headers(headers=headers, header_name=header_name)
1094+
if header == '':
1095+
return
1096+
1097+
# Parse choices, extracting optional q values (defaults to 1.0)
1098+
choices = []
1099+
for i, part in enumerate(header.split(',')):
1100+
match = re.match(r'^([^;]+)(?:;q=([\d.]+))?$', part.strip())
1101+
if match:
1102+
value, q_value = match.groups()
1103+
q_value = float(q_value) if q_value else 1.0
1104+
1105+
# Sort choices by q value and index
1106+
if 0 <= q_value <= 1:
1107+
heappush(choices, (1 / q_value, i, value))
1108+
1109+
# Drop q value
1110+
sorted_choices = [choice[-1] for choice in choices]
1111+
1112+
# Return one or all choices
1113+
return sorted_choices if all else sorted_choices[0]

tests/test_util.py

+29
Original file line numberDiff line numberDiff line change
@@ -549,3 +549,32 @@ def test_modify_pygeofilter(
549549
geometry_column_name=geometry_colum_name
550550
)
551551
assert result == expected
552+
553+
554+
def test_get_choice_from_headers():
555+
_headers = {
556+
'accept': 'text/html;q=0.5,application/ld+json',
557+
'accept-encoding': 'deflate;q=0.5,gzip'
558+
}
559+
560+
# Test various capitalizations
561+
assert util.get_choice_from_headers(_headers, 'accept-language') is None
562+
assert util.get_choice_from_headers(
563+
{**_headers, 'accept-language': 'en;q=0.8,de;q=0.6,fr;q=0.4'},
564+
'accept-language') == 'en'
565+
assert util.get_choice_from_headers(
566+
{**_headers, 'Accept-Language': 'en;q=0.8,de'},
567+
'accept-language') == 'de'
568+
assert util.get_choice_from_headers(
569+
{**_headers, 'Accept-Language': 'en,de'}, 'accept-language') == 'en'
570+
assert util.get_choice_from_headers(
571+
{**_headers, 'ACCEPT-LANGUAGE': 'en;q=0.8,de;q=0.2,fr'},
572+
'accept-language') == 'fr'
573+
assert util.get_choice_from_headers(
574+
{**_headers, 'accept-language': 'en_US'}, 'accept-language') == 'en_US'
575+
576+
assert util.get_choice_from_headers(_headers, 'accept-encoding') == 'gzip'
577+
assert util.get_choice_from_headers(_headers,
578+
'accept') == 'application/ld+json'
579+
assert util.get_choice_from_headers(
580+
{'accept-language': 'en_US', 'accept': '*/*'}, 'accept') == '*/*'

0 commit comments

Comments
 (0)