Skip to content

Commit c6a4f58

Browse files
authored
Add supplemental format conformance tests (#308)
The current conformance tests for `string.format` are not exhaustive and do not account for all scenarios in the [docs](https://github.com/google/cel-spec/blob/master/doc/extensions/strings.md). One such example is a test for invalid UTF-8. This adds the ability to specify supplemental conformance tests in the form of another textproto file. The content is merged with the actual cel conformance tests and then run against our implementation. This allows us to specify our own tests not yet covered in the official conformance tests. As a result, this includes two tests for invalid UTF-8, which incidentally turned up a bug involving collapsing placeholders for contiguous invalid UTF-8 bytes. Note that a PR has been created [here](google/cel-spec#473) to add these tests to the spec. Once added and released, they can be removed from our supplemental tests. See See bufbuild/protovalidate-java#294 for a similar PR in protovalidate-java. This also renames some functions to make the test implementation more consistent across protovalidate implementations.
1 parent 6196d2e commit c6a4f58

File tree

3 files changed

+53
-17
lines changed

3 files changed

+53
-17
lines changed

protovalidate/internal/string_format.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import math
16+
import re
1617
from decimal import Decimal
1718
from typing import Optional, Union
1819

@@ -186,7 +187,9 @@ def __format_string(self, arg: celtypes.Value) -> str:
186187
# True -> true
187188
return str(arg).lower()
188189
if isinstance(arg, celtypes.BytesType):
189-
return str(arg, "utf-8")
190+
decoded = arg.decode("utf-8", errors="replace")
191+
# Collapse any contiguous placeholders into one
192+
return re.sub("\\ufffd+", "\ufffd", decoded)
190193
if isinstance(arg, celtypes.DoubleType):
191194
result = self.__validate_number(arg)
192195
if result is not None:

tests/format_test.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# limitations under the License.
1414

1515
import unittest
16+
from collections.abc import MutableMapping
17+
from itertools import chain
1618
from typing import Any, Optional
1719

1820
import celpy
@@ -45,15 +47,15 @@
4547
]
4648

4749

48-
def read_textproto() -> simple_pb2.SimpleTestFile:
50+
def load_test_data(file_name: str) -> simple_pb2.SimpleTestFile:
4951
msg = simple_pb2.SimpleTestFile()
50-
with open(f"tests/testdata/string_ext_{CEL_SPEC_VERSION}.textproto") as file:
52+
with open(file_name) as file:
5153
text_data = file.read()
5254
text_format.Parse(text_data, msg)
5355
return msg
5456

5557

56-
def build_binding(bindings: dict[str, eval_pb2.ExprValue]) -> dict[Any, Any]:
58+
def build_variables(bindings: MutableMapping[str, eval_pb2.ExprValue]) -> dict[Any, Any]:
5759
binder = {}
5860
for key, value in bindings.items():
5961
if value.HasField("value"):
@@ -82,25 +84,33 @@ def get_eval_error_message(test: simple_pb2.SimpleTest) -> Optional[str]:
8284
class TestFormat(unittest.TestCase):
8385
@classmethod
8486
def setUpClass(cls):
85-
test_data = read_textproto()
86-
cls._format_test_section = next((x for x in test_data.section if x.name == "format"), None)
87-
cls._format_error_test_section = next((x for x in test_data.section if x.name == "format_errors"), None)
87+
# The test data from the cel-spec conformance tests
88+
cel_test_data = load_test_data(f"tests/testdata/string_ext_{CEL_SPEC_VERSION}.textproto")
89+
# Our supplemental tests of functionality not in the cel conformance file, but defined in the spec.
90+
supplemental_test_data = load_test_data("tests/testdata/string_ext_supplemental.textproto")
91+
92+
# Combine the test data from both files into one
93+
sections = cel_test_data.section
94+
sections.extend(supplemental_test_data.section)
95+
96+
# Find the format tests which test successful formatting
97+
cls._format_tests = chain.from_iterable(x.test for x in sections if x.name == "format")
98+
# Find the format error tests which test errors during formatting
99+
cls._format_error_tests = chain.from_iterable(x.test for x in sections if x.name == "format_errors")
100+
88101
cls._env = celpy.Environment(runner_class=InterpretedRunner)
89102

90103
def test_format_successes(self):
91104
"""
92105
Tests success scenarios for string.format
93106
"""
94-
section = self._format_test_section
95-
if section is None:
96-
return
97-
for test in section.test:
107+
for test in self._format_tests:
98108
if test.name in skipped_tests:
99109
continue
100110
ast = self._env.compile(test.expr)
101111
prog = self._env.program(ast, functions=extra_func.EXTRA_FUNCS)
102112

103-
bindings = build_binding(test.bindings)
113+
bindings = build_variables(test.bindings)
104114
# Ideally we should use pytest parametrize instead of subtests, but
105115
# that would require refactoring other tests also.
106116
with self.subTest(test.name):
@@ -118,16 +128,13 @@ def test_format_errors(self):
118128
"""
119129
Tests error scenarios for string.format
120130
"""
121-
section = self._format_error_test_section
122-
if section is None:
123-
return
124-
for test in section.test:
131+
for test in self._format_error_tests:
125132
if test.name in skipped_error_tests:
126133
continue
127134
ast = self._env.compile(test.expr)
128135
prog = self._env.program(ast, functions=extra_func.EXTRA_FUNCS)
129136

130-
bindings = build_binding(test.bindings)
137+
bindings = build_variables(test.bindings)
131138
# Ideally we should use pytest parametrize instead of subtests, but
132139
# that would require refactoring other tests also.
133140
with self.subTest(test.name):
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# proto-file: ../../../proto/cel/expr/conformance/test/simple.proto
2+
# proto-message: cel.expr.conformance.test.SimpleTestFile
3+
4+
# Ideally these tests should be in the cel-spec conformance test suite.
5+
# Until they are added, we can use this to test for additional functionality
6+
# listed in the spec.
7+
8+
name: "string_ext_supplemental"
9+
description: "Supplemental tests for the strings extension library."
10+
section: {
11+
name: "format"
12+
test: {
13+
name: "bytes support for string with invalid utf-8 encoding"
14+
expr: '"%s".format([b"\\xF0abc\\x8C\\xF0xyz"])'
15+
value: {
16+
string_value: '\ufffdabc\ufffdxyz',
17+
}
18+
}
19+
test: {
20+
name: "bytes support for string with only invalid utf-8 sequences"
21+
expr: '"%s".format([b"\\xF0\\x8C\\xF0"])'
22+
value: {
23+
string_value: '\ufffd',
24+
}
25+
}
26+
}

0 commit comments

Comments
 (0)