Skip to content

Commit cb80da2

Browse files
authored
Merge pull request #10 from jg-rp/tidy
A general tidy up
2 parents 4da76fa + 5614806 commit cb80da2

File tree

7 files changed

+87
-109
lines changed

7 files changed

+87
-109
lines changed

jsonpath_rfc9535/lex.py

Lines changed: 52 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,11 @@
1717
RE_WHITESPACE = re.compile(r"[ \n\r\t]+")
1818
RE_PROPERTY = re.compile(r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*")
1919
RE_INDEX = re.compile(r"-?[0-9]+")
20-
RE_INT = re.compile(r"-?[0-9]+")
21-
RE_EXPONENT = re.compile(r"[eE][+-]?[0-9]+")
22-
RE_NEGATIVE_EXPONENT = re.compile(r"[eE]-[0-9]+")
20+
RE_INT = re.compile(r"-?[0-9]+(?:[eE]\+?[0-9]+)?")
21+
# RE_FLOAT includes numbers with a negative exponent and no decimal point.
22+
RE_FLOAT = re.compile(r"(:?-?[0-9]+\.[0-9]+(?:[eE][+-]?[0-9]+)?)|(-?[0-9]+[eE]-[0-9]+)")
2323
RE_FUNCTION_NAME = re.compile(r"[a-z][a-z_0-9]*")
24-
RE_AND = re.compile(r"&&")
25-
RE_OR = re.compile(r"\|\|")
26-
RE_TRUE = re.compile(r"true")
27-
RE_FALSE = re.compile(r"false")
28-
RE_NULL = re.compile(r"null")
29-
RE_ESCAPE = re.compile(r"\\[bfnrtu/]")
24+
ESCAPES = frozenset(["b", "f", "n", "r", "t", "u", "/", "\\"])
3025

3126

3227
class Lexer:
@@ -77,13 +72,13 @@ def emit(self, t: TokenType) -> None:
7772

7873
def next(self) -> str:
7974
"""Return the next character, or the empty string if no more characters."""
80-
if self.pos >= len(self.query):
75+
try:
76+
c = self.query[self.pos]
77+
self.pos += 1
78+
return c
79+
except IndexError:
8180
return ""
8281

83-
c = self.query[self.pos]
84-
self.pos += 1
85-
return c
86-
8782
def ignore(self) -> None:
8883
"""Ignore characters up to the pointer."""
8984
self.start = self.pos
@@ -100,18 +95,16 @@ def backup(self) -> None:
10095

10196
def peek(self) -> str:
10297
"""Return the next character without advancing the pointer."""
103-
c = self.next()
104-
if c:
105-
self.backup()
106-
return c
107-
108-
def accept(self, pattern: Pattern[str]) -> bool:
109-
"""Increment the pointer if the current character matches _pattern_."""
110-
c = self.next()
111-
if pattern.match(c):
98+
try:
99+
return self.query[self.pos]
100+
except IndexError:
101+
return ""
102+
103+
def accept(self, s: str) -> bool:
104+
"""Increment the pointer if the current position starts with _s_."""
105+
if self.query.startswith(s, self.pos):
106+
self.pos += len(s)
112107
return True
113-
if c:
114-
self.backup()
115108
return False
116109

117110
def accept_match(self, pattern: Pattern[str]) -> bool:
@@ -140,7 +133,16 @@ def ignore_whitespace(self) -> bool:
140133

141134
def error(self, msg: str) -> None:
142135
"""Emit an error token."""
143-
self.tokens.append(Token(TokenType.ERROR, msg, self.pos, self.query))
136+
# better error messages.
137+
self.tokens.append(
138+
Token(
139+
TokenType.ERROR,
140+
self.query[self.start : self.pos],
141+
self.start,
142+
self.query,
143+
msg,
144+
)
145+
)
144146

145147

146148
StateFn = Callable[[Lexer], Optional["StateFn"]]
@@ -150,7 +152,6 @@ def lex_root(l: Lexer) -> Optional[StateFn]: # noqa: D103
150152
c = l.next()
151153

152154
if c != "$":
153-
l.backup()
154155
l.error(f"expected '$', found {c!r}")
155156
return None
156157

@@ -180,9 +181,8 @@ def lex_segment(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0911
180181
l.emit(TokenType.LBRACKET)
181182
return lex_inside_bracketed_segment
182183

183-
# default
184-
l.backup()
185184
if l.filter_depth:
185+
l.backup()
186186
return lex_inside_filter
187187

188188
l.error(f"expected '.', '..' or a bracketed selection, found {c!r}")
@@ -204,21 +204,21 @@ def lex_descendant_segment(l: Lexer) -> Optional[StateFn]: # noqa: D103
204204
l.emit(TokenType.LBRACKET)
205205
return lex_inside_bracketed_segment
206206

207-
# default
208207
l.backup()
209208

210209
if l.accept_match(RE_PROPERTY):
211210
l.emit(TokenType.PROPERTY)
212211
return lex_segment
213212

213+
l.next()
214214
l.error(f"unexpected descendant selection token {c!r}")
215215
return None
216216

217217

218218
def lex_shorthand_selector(l: Lexer) -> Optional[StateFn]: # noqa: D103
219219
l.ignore() # ignore dot
220220

221-
if l.ignore_whitespace():
221+
if l.accept_match(RE_WHITESPACE):
222222
l.error("unexpected whitespace after dot")
223223
return None
224224

@@ -318,11 +318,9 @@ def lex_inside_filter(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0915, PL
318318
return lex_inside_bracketed_segment
319319

320320
if c == "'":
321-
# String literal
322321
return lex_single_quoted_string_inside_filter_expression
323322

324323
if c == '"':
325-
# String literal
326324
return lex_double_quoted_string_inside_filter_expression
327325

328326
if c == "(":
@@ -388,61 +386,31 @@ def lex_inside_filter(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0915, PL
388386
l.emit(TokenType.GT)
389387
continue
390388

391-
# default
392389
l.backup()
393390

394-
# numbers
395-
if l.accept_match(RE_INT):
396-
if l.peek() == ".":
397-
# A float
398-
l.next()
399-
if not l.accept_match(RE_INT):
400-
l.error("a fractional digit is required after a decimal point")
401-
return None
402-
403-
l.accept_match(RE_EXPONENT)
404-
l.emit(TokenType.FLOAT)
405-
continue
406-
407-
# An int, or float if exponent is negative
408-
if l.accept_match(RE_NEGATIVE_EXPONENT):
409-
l.emit(TokenType.FLOAT)
410-
else:
411-
l.accept_match(RE_EXPONENT)
412-
l.emit(TokenType.INT)
413-
continue
414-
415-
if l.accept_match(RE_AND):
391+
if l.accept("&&"):
416392
l.emit(TokenType.AND)
417-
continue
418-
419-
if l.accept_match(RE_OR):
393+
elif l.accept("||"):
420394
l.emit(TokenType.OR)
421-
continue
422-
423-
if l.accept_match(RE_TRUE):
395+
elif l.accept("true"):
424396
l.emit(TokenType.TRUE)
425-
continue
426-
427-
if l.accept_match(RE_FALSE):
397+
elif l.accept("false"):
428398
l.emit(TokenType.FALSE)
429-
continue
430-
431-
if l.accept_match(RE_NULL):
399+
elif l.accept("null"):
432400
l.emit(TokenType.NULL)
433-
continue
434-
435-
# functions
436-
if l.accept_match(RE_FUNCTION_NAME) and l.peek() == "(":
401+
elif l.accept_match(RE_FLOAT):
402+
l.emit(TokenType.FLOAT)
403+
elif l.accept_match(RE_INT):
404+
l.emit(TokenType.INT)
405+
elif l.accept_match(RE_FUNCTION_NAME) and l.peek() == "(":
437406
# Keep track of parentheses for this function call.
438407
l.paren_stack.append(1)
439408
l.emit(TokenType.FUNCTION)
440409
l.next()
441410
l.ignore() # ignore LPAREN
442-
continue
443-
444-
l.error(f"unexpected filter selector token {c!r}")
445-
return None
411+
else:
412+
l.error(f"unexpected filter selector token {c!r}")
413+
return None
446414

447415

448416
def lex_string_factory(quote: str, state: StateFn) -> StateFn:
@@ -467,16 +435,15 @@ def _lex_string(l: Lexer) -> Optional[StateFn]:
467435
return state
468436

469437
while True:
470-
head = l.query[l.pos : l.pos + 2]
471438
c = l.next()
472439

473-
if head in ("\\\\", f"\\{quote}"):
474-
l.next()
475-
continue
476-
477-
if c == "\\" and not RE_ESCAPE.match(head):
478-
l.error("invalid escape")
479-
return None
440+
if c == "\\":
441+
peeked = l.peek()
442+
if peeked in ESCAPES or peeked == quote:
443+
l.next()
444+
else:
445+
l.error("invalid escape")
446+
return None
480447

481448
if not c:
482449
l.error(f"unclosed string starting at index {l.start}")
@@ -522,6 +489,6 @@ def tokenize(query: str) -> List[Token]:
522489
lexer.run()
523490

524491
if tokens and tokens[-1].type_ == TokenType.ERROR:
525-
raise JSONPathSyntaxError(tokens[-1].value, token=tokens[-1])
492+
raise JSONPathSyntaxError(tokens[-1].message, token=tokens[-1])
526493

527494
return tokens

jsonpath_rfc9535/parse.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from .segments import JSONPathChildSegment
3535
from .segments import JSONPathRecursiveDescentSegment
3636
from .segments import JSONPathSegment
37-
from .selectors import Filter
37+
from .selectors import FilterSelector
3838
from .selectors import IndexSelector
3939
from .selectors import JSONPathSelector
4040
from .selectors import NameSelector
@@ -113,9 +113,6 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
113113
TokenType.TRUE: self.parse_boolean,
114114
}
115115

116-
# TODO: can a function argument be a grouped expression?
117-
# TODO: can a function argument contain a !?
118-
119116
self.function_argument_map: Dict[
120117
TokenType, Callable[[TokenStream], Expression]
121118
] = {
@@ -291,7 +288,7 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto
291288
)
292289
)
293290
elif stream.current.type_ == TokenType.FILTER:
294-
selectors.append(self.parse_filter(stream))
291+
selectors.append(self.parse_filter_selector(stream))
295292
elif stream.current.type_ == TokenType.EOF:
296293
raise JSONPathSyntaxError(
297294
"unexpected end of query", token=stream.current
@@ -320,9 +317,9 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto
320317

321318
return selectors
322319

323-
def parse_filter(self, stream: TokenStream) -> Filter:
320+
def parse_filter_selector(self, stream: TokenStream) -> FilterSelector:
324321
tok = stream.next_token()
325-
expr = self.parse_filter_selector(stream)
322+
expr = self.parse_filter_expression(stream)
326323

327324
if isinstance(expr, FunctionExtension):
328325
func = self.env.function_extensions.get(expr.name)
@@ -342,7 +339,7 @@ def parse_filter(self, stream: TokenStream) -> Filter:
342339
token=expr.token,
343340
)
344341

345-
return Filter(
342+
return FilterSelector(
346343
env=self.env,
347344
token=tok,
348345
expression=FilterExpression(token=expr.token, expression=expr),
@@ -392,15 +389,17 @@ def parse_prefix_expression(self, stream: TokenStream) -> Expression:
392389
return PrefixExpression(
393390
tok,
394391
operator="!",
395-
right=self.parse_filter_selector(stream, precedence=self.PRECEDENCE_PREFIX),
392+
right=self.parse_filter_expression(
393+
stream, precedence=self.PRECEDENCE_PREFIX
394+
),
396395
)
397396

398397
def parse_infix_expression(
399398
self, stream: TokenStream, left: Expression
400399
) -> Expression:
401400
tok = stream.next_token()
402401
precedence = self.PRECEDENCES.get(tok.type_, self.PRECEDENCE_LOWEST)
403-
right = self.parse_filter_selector(stream, precedence)
402+
right = self.parse_filter_expression(stream, precedence)
404403
operator = self.BINARY_OPERATORS[tok.type_]
405404

406405
if operator in self.COMPARISON_OPERATORS:
@@ -425,7 +424,7 @@ def parse_infix_expression(
425424

426425
def parse_grouped_expression(self, stream: TokenStream) -> Expression:
427426
stream.next_token()
428-
expr = self.parse_filter_selector(stream)
427+
expr = self.parse_filter_expression(stream)
429428
stream.next_token()
430429

431430
while stream.current.type_ != TokenType.RPAREN:
@@ -497,7 +496,7 @@ def parse_function_extension(self, stream: TokenStream) -> Expression:
497496
),
498497
)
499498

500-
def parse_filter_selector(
499+
def parse_filter_expression(
501500
self, stream: TokenStream, precedence: int = PRECEDENCE_LOWEST
502501
) -> Expression:
503502
try:

jsonpath_rfc9535/query.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""A compiled JSONPath ready to be applied to a JSON-like value."""
1+
"""A compiled JSONPath expression ready to be applied to JSON-like data."""
22

33
from __future__ import annotations
44

@@ -20,7 +20,7 @@
2020

2121

2222
class JSONPathQuery:
23-
"""A compiled JSONPath expression ready to be applied to a JSON-like value.
23+
"""A compiled JSONPath expression ready to be applied to JSON-like data.
2424
2525
Arguments:
2626
env: The `JSONPathEnvironment` this query is bound to.

jsonpath_rfc9535/selectors.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def resolve(self, node: JSONPathNode) -> Iterable[JSONPathNode]:
213213
yield node.new_child(element, i)
214214

215215

216-
class Filter(JSONPathSelector):
216+
class FilterSelector(JSONPathSelector):
217217
"""Filter array/list items or dict/object values with a filter expression."""
218218

219219
__slots__ = ("expression",)
@@ -233,7 +233,7 @@ def __str__(self) -> str:
233233

234234
def __eq__(self, __value: object) -> bool:
235235
return (
236-
isinstance(__value, Filter)
236+
isinstance(__value, FilterSelector)
237237
and self.expression == __value.expression
238238
and self.token == __value.token
239239
)

jsonpath_rfc9535/tokens.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,24 +67,26 @@ class Token:
6767
token derives.
6868
"""
6969

70-
__slots__ = ("type_", "value", "index", "query")
70+
__slots__ = ("type_", "value", "index", "query", "message")
7171

7272
def __init__(
7373
self,
7474
type_: TokenType,
7575
value: str,
7676
index: int,
7777
query: str,
78+
message: str | None = None,
7879
) -> None:
7980
self.type_ = type_
8081
self.value = value
8182
self.index = index
8283
self.query = query
84+
self.message = message
8385

8486
def __repr__(self) -> str: # pragma: no cover
8587
return (
8688
f"Token(type={self.type_.name!r}, value={self.value!r}, "
87-
f"index={self.index}, query={self.query!r})"
89+
f"index={self.index}, query={self.query!r}, message={self.message!r})"
8890
)
8991

9092
def __eq__(self, other: object) -> bool:

0 commit comments

Comments
 (0)