Skip to content

Commit ab276df

Browse files
authored
feat: ✨ handle grouped errors under the enum constraint (#183)
# Description This PR handles grouped errors at `$.resources[x].schema.fields[x].constraints.enum`. It's the worst one yet! This schema node is deeply nested, so handling errors here is a little more complex. The situation is the following: for each field, you can list the allowed values for that field under `"constraints": {"enum": [...]}`. Depending on the type of the field, the type of the values in `enum` can be different. Sometimes it's straightforward (e.g. for string fields, enum values have to be strings), but sometimes the schema for `enum` is more permissive (e.g. for number fields, enum values can be either numbers or strings). However, you cannot mix types. If `enum` has mixed types or if some of the values are the wrong type, all sub-schemas under `enum` flag errors. These are usually contradictory, e.g., for a number field `[1, "inf"]` will alert both because 1 is not a string and because "inf" is not a number... The logic in the function: - if the error is not even for the current field type -> remove all errors - if there are any errors on the `enum` property itself (not an array, duplicate items, etc.) -> keep only these - the remaining errors are all about the individual values in the enum array being the wrong type - if the values are not all the same type -> flag only this - if they are all the same type -> flag that this is the wrong type Part of #15 Needs an in-depth review. ## Checklist - [x] Formatted Markdown - [x] Ran `just run-all`
1 parent 1dadd9d commit ab276df

File tree

2 files changed

+165
-22
lines changed

2 files changed

+165
-22
lines changed

src/check_datapackage/check.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ class SchemaError:
156156
Path components are separated by '/'.
157157
jsonpath (str): The JSON path to the field that violates the check.
158158
instance (Any): The part of the object that failed the check.
159+
schema_value (Optional[Any]): The expected value that is checked against,
160+
which is part of the schema violated by this error.
159161
parent (Optional[SchemaError]): The error group the error belongs to, if any.
160162
"""
161163

@@ -164,6 +166,7 @@ class SchemaError:
164166
schema_path: str
165167
jsonpath: str
166168
instance: Any
169+
schema_value: Optional[Any] = None
167170
parent: Optional["SchemaError"] = None
168171

169172

@@ -304,6 +307,64 @@ def _handle_S_resources_x_schema_fields_x(
304307
return edits
305308

306309

310+
def _handle_S_resources_x_schema_fields_x_constraints_enum(
311+
parent_error: SchemaError,
312+
schema_errors: list[SchemaError],
313+
) -> SchemaErrorEdits:
314+
"""Only flag errors for the relevant field type and simplify errors."""
315+
edits = SchemaErrorEdits(remove=[parent_error])
316+
errors_in_group = _get_errors_in_group(schema_errors, parent_error)
317+
318+
# Remove errors for other field types
319+
if _not_field_type_error(parent_error):
320+
edits.remove.extend(errors_in_group)
321+
return edits
322+
323+
value_errors = _filter(
324+
errors_in_group,
325+
lambda error: not error.jsonpath.endswith("enum"),
326+
)
327+
328+
# If there are only value errors, simplify them
329+
if value_errors == errors_in_group:
330+
edits.add.append(_get_enum_values_error(parent_error, value_errors))
331+
332+
# Otherwise, keep only top-level enum errors
333+
edits.remove.extend(value_errors)
334+
return edits
335+
336+
337+
def _get_enum_values_error(
338+
parent_error: SchemaError,
339+
value_errors: list[SchemaError],
340+
) -> SchemaError:
341+
message = "All enum values must be the same type."
342+
same_type = len(set(_map(parent_error.instance, lambda value: type(value)))) == 1
343+
if same_type:
344+
allowed_types = set(_map(value_errors, lambda error: str(error.schema_value)))
345+
message = (
346+
"The enum value type is not correct. Enum values should be "
347+
f"one of {', '.join(allowed_types)}."
348+
)
349+
return SchemaError(
350+
message=message,
351+
type="type",
352+
schema_path=value_errors[0].schema_path,
353+
jsonpath=_strip_index(value_errors[0].jsonpath),
354+
instance=value_errors[0].instance,
355+
)
356+
357+
358+
def _not_field_type_error(parent_error: SchemaError) -> bool:
359+
if not parent_error.parent:
360+
return True
361+
field_type: str = parent_error.parent.instance.get("type", "string")
362+
if field_type not in FIELD_TYPES:
363+
return True
364+
schema_index = FIELD_TYPES.index(field_type)
365+
return f"fields/items/oneOf/{schema_index}/" not in parent_error.schema_path
366+
367+
307368
def _handle_S_resources_x_schema_primary_key(
308369
parent_error: SchemaError,
309370
schema_errors: list[SchemaError],
@@ -420,6 +481,10 @@ def _handle_licenses(
420481
("resources/items/oneOf", _handle_S_resources_x),
421482
("resources/items/properties/path/oneOf", _handle_S_resources_x_path),
422483
("fields/items/oneOf", _handle_S_resources_x_schema_fields_x),
484+
(
485+
"constraints/properties/enum/oneOf",
486+
_handle_S_resources_x_schema_fields_x_constraints_enum,
487+
),
423488
("primaryKey/oneOf", _handle_S_resources_x_schema_primary_key),
424489
("foreignKeys/items/oneOf", _handle_S_resources_x_schema_foreign_keys),
425490
("licenses/items/anyOf", _handle_licenses),
@@ -495,6 +560,7 @@ def _create_schema_error(error: ValidationError) -> SchemaError:
495560
jsonpath=_get_full_json_path_from_error(error),
496561
schema_path="/".join(_map(error.absolute_schema_path, str)),
497562
instance=error.instance,
563+
schema_value=error.validator_value,
498564
parent=_create_schema_error(error.parent) if error.parent else None, # type: ignore[arg-type]
499565
)
500566

@@ -519,3 +585,7 @@ def _get_errors_in_group(
519585
schema_errors: list[SchemaError], parent_error: SchemaError
520586
) -> list[SchemaError]:
521587
return _filter(schema_errors, lambda error: error.parent == parent_error)
588+
589+
590+
def _strip_index(jsonpath: str) -> str:
591+
return re.sub(r"\[\d+\]$", "", jsonpath)

tests/test_check.py

Lines changed: 95 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,101 @@ def test_fail_unknown_field_with_bad_property():
334334
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type"
335335

336336

337+
def test_fail_package_license_with_no_name_or_path():
338+
properties = example_package_properties()
339+
del properties["licenses"][0]["name"]
340+
341+
issues = check(properties)
342+
343+
assert len(issues) == 1
344+
assert issues[0].type == "required"
345+
assert issues[0].jsonpath == "$.licenses[0]"
346+
347+
348+
def test_fail_resource_license_with_no_name_or_path():
349+
properties = example_package_properties()
350+
properties["resources"][0]["licenses"] = [{}]
351+
352+
issues = check(properties)
353+
354+
assert len(issues) == 1
355+
assert issues[0].type == "required"
356+
assert issues[0].jsonpath == "$.resources[0].licenses[0]"
357+
358+
359+
def test_fail_field_with_non_unique_enum_values():
360+
"""Fail a field whose enum array contains duplicate values."""
361+
properties = example_package_properties()
362+
properties["resources"][0]["schema"]["fields"][0]["type"] = "number"
363+
properties["resources"][0]["schema"]["fields"][0]["constraints"] = {"enum": [1, 1]}
364+
365+
issues = check(properties)
366+
367+
assert len(issues) == 1
368+
assert issues[0].type == "uniqueItems"
369+
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum"
370+
371+
372+
def test_fail_unknown_field_with_bad_enum_constraint():
373+
"""Fail a field whose enum constraint is the wrong type when the field's
374+
type is unknown."""
375+
properties = example_package_properties()
376+
properties["resources"][0]["schema"]["fields"][0]["type"] = "unknown"
377+
properties["resources"][0]["schema"]["fields"][0]["constraints"] = {"enum": {}}
378+
379+
issues = check(properties)
380+
381+
assert len(issues) == 1
382+
assert issues[0].type == "enum"
383+
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type"
384+
385+
386+
def test_fail_simple_field_with_bad_enum_constraint():
387+
"""Fail a field whose enum values are the wrong type when enum values can
388+
have only one type."""
389+
properties = example_package_properties()
390+
# Expecting enum array to contain strings
391+
properties["resources"][0]["schema"]["fields"][0]["constraints"] = {"enum": [1]}
392+
393+
issues = check(properties)
394+
395+
assert len(issues) == 1
396+
assert issues[0].type == "type"
397+
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum[0]"
398+
399+
400+
def test_fail_complex_field_with_bad_enum_constraint():
401+
"""Fail a field whose enum values are the wrong type when enum values can
402+
have multiple types."""
403+
properties = example_package_properties()
404+
properties["resources"][0]["schema"]["fields"][0]["type"] = "number"
405+
# Expecting enum array to contain numbers or strings
406+
properties["resources"][0]["schema"]["fields"][0]["constraints"] = {
407+
"enum": [{}],
408+
}
409+
410+
issues = check(properties)
411+
412+
assert len(issues) == 1
413+
assert issues[0].type == "type"
414+
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum"
415+
416+
417+
def test_fail_field_with_mixed_type_enum_constraint():
418+
"""Fail a field whose enum values are not all the same type."""
419+
properties = example_package_properties()
420+
properties["resources"][0]["schema"]["fields"][0]["type"] = "geopoint"
421+
properties["resources"][0]["schema"]["fields"][0]["constraints"] = {
422+
"enum": [{}, [], "string", 1],
423+
}
424+
425+
issues = check(properties)
426+
427+
assert len(issues) == 1
428+
assert issues[0].type == "type"
429+
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum"
430+
431+
337432
def test_pass_good_foreign_keys():
338433
properties = example_package_properties()
339434
properties["resources"][0]["schema"]["foreignKeys"] = [
@@ -534,28 +629,6 @@ def test_fail_primary_key_with_bad_array_item():
534629
assert issues[0].jsonpath == "$.resources[0].schema.primaryKey[0]"
535630

536631

537-
def test_fail_package_license_with_no_name_or_path():
538-
properties = example_package_properties()
539-
del properties["licenses"][0]["name"]
540-
541-
issues = check(properties)
542-
543-
assert len(issues) == 1
544-
assert issues[0].type == "required"
545-
assert issues[0].jsonpath == "$.licenses[0]"
546-
547-
548-
def test_fail_resource_license_with_no_name_or_path():
549-
properties = example_package_properties()
550-
properties["resources"][0]["licenses"] = [{}]
551-
552-
issues = check(properties)
553-
554-
assert len(issues) == 1
555-
assert issues[0].type == "required"
556-
assert issues[0].jsonpath == "$.resources[0].licenses[0]"
557-
558-
559632
def test_error_as_true():
560633
properties = {
561634
"name": 123,

0 commit comments

Comments
 (0)