Skip to content

Commit 3fc3918

Browse files
authored
Merge pull request #6 from jg-rp/update-cts
Update CTS and fix
2 parents af3c18a + 967e1f9 commit 3fc3918

File tree

9 files changed

+165
-13
lines changed

9 files changed

+165
-13
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# Python JSONPath RFC 9535 Change Log
22

3+
## Version 0.1.3 (unreleased)
4+
5+
**Fixes**
6+
7+
- Fixed decoding of escape sequences in quoted name selectors and string literals. We now raise a `JSONPathSyntaxError` for invalid code points.
8+
- Fixed parsing of number literals with an exponent. We now allow 'e' to be upper case.
9+
- Fixed handling of trailing commas in bracketed segments. We now raise a `JSONPathSyntaxError` in such cases.
10+
- Fixed handling of invalid number literals. We now raise a syntax error for invalid leading zeros and extra negative signs.
11+
312
## Version 0.1.2
413

514
**Fixes**

jsonpath_rfc9535/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.1.2"
1+
__version__ = "0.1.3"

jsonpath_rfc9535/lex.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
RE_PROPERTY = re.compile(r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*")
1919
RE_INDEX = re.compile(r"-?[0-9]+")
2020
RE_INT = re.compile(r"-?[0-9]+")
21-
RE_EXPONENT = re.compile(r"e[+-]?[0-9]+")
22-
RE_NEGATIVE_EXPONENT = re.compile(r"e-[0-9]+")
21+
RE_EXPONENT = re.compile(r"[eE][+-]?[0-9]+")
22+
RE_NEGATIVE_EXPONENT = re.compile(r"[eE]-[0-9]+")
2323
RE_FUNCTION_NAME = re.compile(r"[a-z][a-z_0-9]*")
2424
RE_AND = re.compile(r"&&")
2525
RE_OR = re.compile(r"\|\|")

jsonpath_rfc9535/parse.py

Lines changed: 142 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
from __future__ import annotations
44

5-
import json
65
from typing import TYPE_CHECKING
76
from typing import Callable
87
from typing import Dict
@@ -312,6 +311,7 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto
312311
if stream.peek.type_ != TokenType.RBRACKET:
313312
stream.expect_peek(TokenType.COMMA)
314313
stream.next_token()
314+
stream.expect_peek_not(TokenType.RBRACKET, "unexpected trailing comma")
315315

316316
stream.next_token()
317317

@@ -362,11 +362,29 @@ def parse_string_literal(self, stream: TokenStream) -> Expression:
362362
)
363363

364364
def parse_integer_literal(self, stream: TokenStream) -> Expression:
365+
value = stream.current.value
366+
if value.startswith("0") and len(value) > 1:
367+
raise JSONPathSyntaxError("invalid integer literal", token=stream.current)
368+
365369
# Convert to float first to handle scientific notation.
366-
return IntegerLiteral(stream.current, value=int(float(stream.current.value)))
370+
try:
371+
return IntegerLiteral(stream.current, value=int(float(value)))
372+
except ValueError as err:
373+
raise JSONPathSyntaxError(
374+
"invalid integer literal", token=stream.current
375+
) from err
367376

368377
def parse_float_literal(self, stream: TokenStream) -> Expression:
369-
return FloatLiteral(stream.current, value=float(stream.current.value))
378+
value = stream.current.value
379+
if value.startswith("0") and len(value.split(".")[0]) > 1:
380+
raise JSONPathSyntaxError("invalid float literal", token=stream.current)
381+
382+
try:
383+
return FloatLiteral(stream.current, value=float(stream.current.value))
384+
except ValueError as err:
385+
raise JSONPathSyntaxError(
386+
"invalid float literal", token=stream.current
387+
) from err
370388

371389
def parse_prefix_expression(self, stream: TokenStream) -> Expression:
372390
tok = stream.next_token()
@@ -514,12 +532,127 @@ def _decode_string_literal(self, token: Token) -> str:
514532
value = token.value.replace('"', '\\"').replace("\\'", "'")
515533
else:
516534
value = token.value
517-
try:
518-
rv = json.loads(f'"{value}"')
519-
assert isinstance(rv, str)
520-
return rv
521-
except json.JSONDecodeError as err:
522-
raise JSONPathSyntaxError(str(err).split(":")[1], token=token) from None
535+
536+
return self._unescape_string(value, token)
537+
538+
def _unescape_string(self, value: str, token: Token) -> str:
539+
unescaped: List[str] = []
540+
index = 0
541+
542+
while index < len(value):
543+
ch = value[index]
544+
if ch == "\\":
545+
index += 1
546+
_ch, index = self._decode_escape_sequence(value, index, token)
547+
unescaped.append(_ch)
548+
else:
549+
self._string_from_codepoint(ord(ch), token)
550+
unescaped.append(ch)
551+
index += 1
552+
return "".join(unescaped)
553+
554+
def _decode_escape_sequence( # noqa: PLR0911
555+
self, value: str, index: int, token: Token
556+
) -> Tuple[str, int]:
557+
ch = value[index]
558+
if ch == '"':
559+
return '"', index
560+
if ch == "\\":
561+
return "\\", index
562+
if ch == "/":
563+
return "/", index
564+
if ch == "b":
565+
return "\x08", index
566+
if ch == "f":
567+
return "\x0c", index
568+
if ch == "n":
569+
return "\n", index
570+
if ch == "r":
571+
return "\r", index
572+
if ch == "t":
573+
return "\t", index
574+
if ch == "u":
575+
codepoint, index = self._decode_hex_char(value, index, token)
576+
return self._string_from_codepoint(codepoint, token), index
577+
578+
raise JSONPathSyntaxError(
579+
f"unknown escape sequence at index {token.index + index - 1}",
580+
token=token,
581+
)
582+
583+
def _decode_hex_char(self, value: str, index: int, token: Token) -> Tuple[int, int]:
584+
length = len(value)
585+
586+
if index + 4 >= length:
587+
raise JSONPathSyntaxError(
588+
f"incomplete escape sequence at index {token.index + index - 1}",
589+
token=token,
590+
)
591+
592+
index += 1 # move past 'u'
593+
codepoint = self._parse_hex_digits(value[index : index + 4], token)
594+
595+
if self._is_low_surrogate(codepoint):
596+
raise JSONPathSyntaxError(
597+
f"unexpected low surrogate at index {token.index + index - 1}",
598+
token=token,
599+
)
600+
601+
if self._is_high_surrogate(codepoint):
602+
# expect a surrogate pair
603+
if not (
604+
index + 9 < length
605+
and value[index + 4] == "\\"
606+
and value[index + 5] == "u"
607+
):
608+
raise JSONPathSyntaxError(
609+
f"incomplete escape sequence at index {token.index + index - 2}",
610+
token=token,
611+
)
612+
613+
low_surrogate = self._parse_hex_digits(value[index + 6 : index + 10], token)
614+
615+
if not self._is_low_surrogate(low_surrogate):
616+
raise JSONPathSyntaxError(
617+
f"unexpected codepoint at index {token.index + index + 4}",
618+
token=token,
619+
)
620+
621+
codepoint = 0x10000 + (
622+
((codepoint & 0x03FF) << 10) | (low_surrogate & 0x03FF)
623+
)
624+
625+
return (codepoint, index + 9)
626+
627+
return (codepoint, index + 3)
628+
629+
def _parse_hex_digits(self, digits: str, token: Token) -> int:
630+
codepoint = 0
631+
for digit in digits.encode():
632+
codepoint <<= 4
633+
if digit >= 48 and digit <= 57:
634+
codepoint |= digit - 48
635+
elif digit >= 65 and digit <= 70:
636+
codepoint |= digit - 65 + 10
637+
elif digit >= 97 and digit <= 102:
638+
codepoint |= digit - 97 + 10
639+
else:
640+
raise JSONPathSyntaxError(
641+
"invalid \\uXXXX escape sequence",
642+
token=token,
643+
)
644+
return codepoint
645+
646+
def _string_from_codepoint(self, codepoint: int, token: Token) -> str:
647+
if codepoint <= 0x1F:
648+
raise JSONPathSyntaxError("invalid character", token=token)
649+
return chr(codepoint)
650+
651+
def _is_high_surrogate(self, codepoint: int) -> bool:
652+
return codepoint >= 0xD800 and codepoint <= 0xDBFF
653+
654+
def _is_low_surrogate(self, codepoint: int) -> bool:
655+
return codepoint >= 0xDC00 and codepoint <= 0xDFFF
523656

524657
def _raise_for_non_comparable_function(
525658
self, expr: Expression, token: Token

jsonpath_rfc9535/tokens.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,3 +193,8 @@ def expect_peek(self, *typ: TokenType) -> None:
193193
f"expected {_typ}, found {self.peek.type_.name!r}",
194194
token=self.peek,
195195
)
196+
197+
def expect_peek_not(self, typ: TokenType, message: str) -> None:
198+
"""Raise an exception if the next token type is not one of _type_."""
199+
if self.peek.type_ == typ:
200+
raise JSONPathSyntaxError(message, token=self.peek)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ convention = "google"
179179
"scripts/__init__.py" = ["D104"]
180180
"tests/*" = ["D100", "D101", "D104", "D103"]
181181
"jsonpath_rfc9535/lex.py" = ["E741"]
182+
"jsonpath_rfc9535/parse.py" = ["PLR2004"]
182183
"jsonpath_rfc9535/utils/nondeterministic_descent.py" = [
183184
"D103",
184185
"D101",

tests/test_compliance.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import json
88
import operator
99
from dataclasses import dataclass
10+
from dataclasses import field
1011
from typing import Any
1112
from typing import Dict
1213
from typing import List
@@ -26,6 +27,7 @@ class Case:
2627
result: Any = None
2728
results: Optional[List[Any]] = None
2829
invalid_selector: Optional[bool] = None
30+
tags: List[str] = field(default_factory=list)
2931

3032

3133
SKIP: Dict[str, str] = {}

tests/test_cts_nondeterminism.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import json
88
import operator
99
from dataclasses import dataclass
10+
from dataclasses import field
1011
from typing import Any
1112
from typing import List
1213
from typing import Optional
@@ -26,6 +27,7 @@ class Case:
2627
result: Any = None
2728
results: Optional[List[Any]] = None
2829
invalid_selector: Optional[bool] = None
30+
tags: List[str] = field(default_factory=list)
2931

3032

3133
def cases() -> List[Case]:

0 commit comments

Comments
 (0)