Skip to content
Merged
4 changes: 4 additions & 0 deletions docs/directives.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ Specifies a regular expression to identify and exclude inline (bracketed) commen

@@comments :: /\(\*((?:.|\n)*?)\*\)/

.. note::
Prior to 5.12.1, comments implicitly had the `(?m) <https://docs.python.org/3/library/re.html#re.MULTILINE>`_ option defined. This is no longer the case.

``@@eol_comments :: <regexp>``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -39,6 +41,8 @@ Specifies a regular expression to identify and exclude end-of-line comments befo

@@eol_comments :: /#([^\n]*?)$/

.. note::
Prior to 5.12.1, eol_comments implicitly had the `(?m) <https://docs.python.org/3/library/re.html#re.MULTILINE>`_ option defined. This is no longer the case.

``@@ignorecase :: <bool>``
~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
8 changes: 4 additions & 4 deletions docs/syntax.rst
Original file line number Diff line number Diff line change
Expand Up @@ -735,11 +735,11 @@ Comments
~~~~~~~~

Parsers will skip over comments specified as a regular expression using
the ``comments_re`` parameter:
the ``comments`` parameter:

.. code:: python

parser = MyParser(text, comments_re="\(\*.*?\*\)")
parser = MyParser(text, comments="\(\*.*?\*\)")

For more complex comment handling, you can override the
``Buffer.eat_comments()`` method.
Expand All @@ -751,8 +751,8 @@ comments separately:

parser = MyParser(
text,
comments_re="\(\*.*?\*\)",
eol_comments_re="#.*?$"
comments="\(\*.*?\*\)",
eol_comments="#.*?$"
)

Both patterns may also be specified within a grammar using the
Expand Down
2 changes: 1 addition & 1 deletion grammar/tatsu.ebnf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
@@grammar :: TatSu
@@whitespace :: /\s+/
@@comments :: ?"(?sm)[(][*](?:.|\n)*?[*][)]"
@@eol_comments :: ?"#[^\n]*$"
@@eol_comments :: ?"(?m)#[^\n]*$"
@@parseinfo :: True
@@left_recursion :: False

Expand Down
8 changes: 4 additions & 4 deletions tatsu/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings):
ignorecase=False,
namechars='',
parseinfo=True,
comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]',
eol_comments_re='#[^\\n]*$',
comments='(?sm)[(][*](?:.|\\n)*?[*][)]',
eol_comments='(?m)#[^\\n]*$',
keywords=KEYWORDS,
start='start',
)
Expand All @@ -55,8 +55,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings):
ignorecase=False,
namechars='',
parseinfo=True,
comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]',
eol_comments_re='#[^\\n]*$',
comments='(?sm)[(][*](?:.|\\n)*?[*][)]',
eol_comments='(?m)#[^\\n]*$',
keywords=KEYWORDS,
start='start',
)
Expand Down
2 changes: 1 addition & 1 deletion tatsu/buffering.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def _scanre(self, pattern):
if isinstance(pattern, RETYPE):
cre = pattern
else:
cre = re.compile(pattern, re.MULTILINE)
cre = re.compile(pattern)
return cre.match(self.text, self.pos)

@property
Expand Down
6 changes: 3 additions & 3 deletions tatsu/codegen/objectmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ def _get_full_name(cls):
# Try to reference the class
try:
idents = name.split('.')
_cls = getattr(module, idents[0])
cls_ = getattr(module, idents[0])
for ident in idents[1:]:
_cls = getattr(_cls, ident)
cls_ = getattr(cls_, ident)

assert _cls == cls
assert cls_ == cls
except AttributeError as e:
raise CodegenError(
"Couldn't find base type, it has to be importable",
Expand Down
16 changes: 8 additions & 8 deletions tatsu/codegen/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,8 @@ def render_fields(self, fields):
left_recursion = self.node.config.left_recursion
parseinfo = self.node.config.parseinfo
namechars = repr(self.node.config.namechars or '')
comments_re = repr(self.node.config.comments_re)
eol_comments_re = repr(self.node.config.eol_comments_re)
comments = repr(self.node.config.comments)
eol_comments = repr(self.node.config.eol_comments)

rules = '\n'.join(
[self.get_renderer(rule).render() for rule in self.node.rules],
Expand All @@ -488,8 +488,8 @@ def render_fields(self, fields):
parseinfo=parseinfo,
keywords=keywords,
namechars=namechars,
comments_re=comments_re,
eol_comments_re=eol_comments_re,
comments=comments,
eol_comments=eol_comments,
)

abstract_rule_template = """
Expand Down Expand Up @@ -535,8 +535,8 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings):
ignorecase={ignorecase},
namechars={namechars},
parseinfo={parseinfo},
comments_re={comments_re},
eol_comments_re={eol_comments_re},
comments={comments},
eol_comments={eol_comments},
keywords=KEYWORDS,
start={start!r},
)
Expand All @@ -554,8 +554,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings):
ignorecase={ignorecase},
namechars={namechars},
parseinfo={parseinfo},
comments_re={comments_re},
eol_comments_re={eol_comments_re},
comments={comments},
eol_comments={eol_comments},
left_recursion={left_recursion},
keywords=KEYWORDS,
start={start!r},
Expand Down
2 changes: 1 addition & 1 deletion tatsu/g2e/semantics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def camel2py(name):
return re.sub(
'([a-z0-9])([A-Z])',
r'([a-z0-9])([A-Z])',
lambda m: m.group(1) + '_' + m.group(2).lower(),
name,
)
Expand Down
2 changes: 1 addition & 1 deletion tatsu/grammars.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ def _to_str(self, lean=False):

if multi:
return '\n|\n'.join(indent(o) for o in options)
elif len(options) and len(single) > PEP8_LLEN:
elif options and len(single) > PEP8_LLEN:
return '| ' + '\n| '.join(o for o in options)
else:
return single
Expand Down
34 changes: 27 additions & 7 deletions tatsu/infos.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import copy
import dataclasses
import re
from collections.abc import Callable, Mapping
from collections.abc import Callable, MutableMapping
from itertools import starmap
from typing import Any, NamedTuple

Expand All @@ -30,8 +30,8 @@ class ParserConfig:
start_rule: str | None = None # FIXME
rule_name: str | None = None # Backward compatibility

comments_re: re.Pattern | None = None
eol_comments_re: re.Pattern | None = None
_comments_re: re.Pattern | None = dataclasses.field(default=None, init=False, repr=False)
_eol_comments_re: re.Pattern | None = dataclasses.field(default=None, init=False, repr=False)

tokenizercls: type[Tokenizer] | None = None # FIXME
semantics: type | None = None
Expand Down Expand Up @@ -64,9 +64,17 @@ def __post_init__(self): # pylint: disable=W0235
if self.ignorecase:
self.keywords = [k.upper() for k in self.keywords]
if self.comments:
self.comments_re = re.compile(self.comments)
self._comments_re = re.compile(self.comments)
if self.eol_comments:
self.eol_comments_re = re.compile(self.eol_comments)
self._eol_comments_re = re.compile(self.eol_comments)

@property
def comments_re(self) -> re.Pattern | None:
return self._comments_re

@property
def eol_comments_re(self) -> re.Pattern | None:
return self._eol_comments_re

@classmethod
def new(
Expand All @@ -84,7 +92,7 @@ def effective_rule_name(self):
# note: there are legacy reasons for this mess
return self.start_rule or self.rule_name or self.start

def _find_common(self, **settings: Any) -> Mapping[str, Any]:
def _find_common(self, **settings: Any) -> MutableMapping[str, Any]:
return {
name: value
for name, value in settings.items()
Expand All @@ -101,8 +109,20 @@ def replace_config(
else:
return self.replace(**vars(other))

# non-init fields cannot be used as arguments in `replace`, however
# they are values returned by `vars` and `dataclass.asdict` so they
# must be filtered out.
# If the `ParserConfig` dataclass drops these fields, then this filter can be removed
def _filter_non_init_fields(self, settings: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
for field in [
field.name for field in dataclasses.fields(self) if not field.init
]:
if field in settings:
del settings[field]
return settings

def replace(self, **settings: Any) -> ParserConfig:
overrides = self._find_common(**settings)
overrides = self._filter_non_init_fields(self._find_common(**settings))
result = dataclasses.replace(self, **overrides)
if 'grammar' in overrides:
result.name = result.grammar
Expand Down
4 changes: 2 additions & 2 deletions tatsu/ngcodegen/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,8 +323,8 @@ def _gen_init(self, grammar: grammars.Grammar):
ignorecase={grammar.config.ignorecase},
namechars={grammar.config.namechars!r},
parseinfo={grammar.config.parseinfo},
comments_re={grammar.config.comments_re!r},
eol_comments_re={grammar.config.eol_comments_re!r},
comments={grammar.config.comments!r},
eol_comments={grammar.config.eol_comments!r},
keywords=KEYWORDS,
start={start!r},
)
Expand Down
2 changes: 1 addition & 1 deletion tatsu/util/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
logger.addHandler(ch)


RETYPE = type(re.compile('.'))
RETYPE = re.Pattern


ESCAPE_SEQUENCE_RE = re.compile(
Expand Down
2 changes: 1 addition & 1 deletion tatsu/walkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def pythonize_match(m):

# walk__pythonic_name with double underscore after walk
pythonic_name = re.sub(
'[A-Z]+', pythonize_match, node_cls.__name__,
r'[A-Z]+', pythonize_match, node_cls.__name__,
)
if pythonic_name != cammelcase_name:
walker = getattr(cls, prefix + pythonic_name, None)
Expand Down
2 changes: 1 addition & 1 deletion test/grammar/pattern_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_patterns_with_newlines(self):

blankline
=
/^[^\\n]*\\n$/
/(?m)^[^\\n]*\\n$/
;
"""

Expand Down
2 changes: 1 addition & 1 deletion test/grammar/syntax_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def test_parse_hash():
start = '#' ;
"""

parser = compile(grammar, eol_comments_re='')
parser = compile(grammar, eol_comments='')
parser.parse('#', trace=True)


Expand Down
7 changes: 3 additions & 4 deletions test/parser_equivalence_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def test_none_whitespace():
output = parser.parse(input, parseinfo=False)
assert output == ('This is a', ' test')


def test_sep_join():
grammar = """
@@grammar::numbers
Expand All @@ -183,9 +184,7 @@ def test_sep_join():
= ~ ( "," )%{ digit }+
;

digit = /\d+/ ;
digit = /\\d+/ ;
"""
parser = generate_and_load_parser('W', grammar)
ast = parser.parse('1,2,3,4', nameguard=False)


parser.parse('1,2,3,4', nameguard=False)
Loading