Skip to content

gh-134675: Add t-string prefixes to tokenizer module, lexical analysis doc, and add a test to make sure we catch this error in the future. #134734

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Doc/reference/lexical_analysis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -489,8 +489,9 @@ String literals are described by the following lexical definitions:

.. productionlist:: python-grammar
stringliteral: [`stringprefix`](`shortstring` | `longstring`)
stringprefix: "r" | "u" | "R" | "U" | "f" | "F"
stringprefix: "r" | "u" | "R" | "U" | "f" | "F" | "t" | "T"
: | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF"
: | "tr" | "Tr" | "tR" | "TR" | "rt" | "rT" | "Rt" | "RT"
shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"'
longstring: "'''" `longstringitem`* "'''" | '"""' `longstringitem`* '"""'
shortstringitem: `shortstringchar` | `stringescapeseq`
Expand Down
56 changes: 56 additions & 0 deletions Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import contextlib
import itertools
import os
import re
import string
import tempfile
import token
import tokenize
Expand Down Expand Up @@ -3238,5 +3240,59 @@ def test_exact_flag(self):
self.check_output(source, expect, flag)


class StringPrefixTest(unittest.TestCase):
def test_prefixes(self):
# Get the list of defined string prefixes. I don't see an
# obvious documented way of doing this, but probably the best
# thing is to split apart tokenize.StringPrefix.

# Make sure StringPrefix begins and ends in parens.
self.assertEqual(tokenize.StringPrefix[0], '(')
self.assertEqual(tokenize.StringPrefix[-1], ')')

# Then split apart everything else by '|'.
defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))

# Now compute the actual string prefixes, by exec-ing all
# valid prefix combinations, followed by an empty string.

# Try all prefix lengths until we find a length that has zero
# valid prefixes. This will miss the case where for example
# there are no valid 3 character prefixes, but there are valid
# 4 character prefixes. That seems extremely unlikely.

# Note that the empty prefix is being included, because length
# starts at 0. That's expected, since StringPrefix includes
# the empty prefix.

valid_prefixes = set()
for length in itertools.count():
num_at_this_length = 0
for prefix in (
"".join(l) for l in list(itertools.combinations(string.ascii_lowercase, length))
):
for t in itertools.permutations(prefix):
for u in itertools.product(*[(c, c.upper()) for c in t]):
p = ''.join(u)
if p == "not":
# 'not' can never be a string prefix,
# because it's a valid expression: not ""
continue
try:
eval(f'{p}""')

# No syntax error, so p is a valid string
# prefix.

valid_prefixes.add(p)
num_at_this_length += 1
except SyntaxError:
pass
if num_at_this_length == 0:
break

self.assertEqual(defined_prefixes, valid_prefixes)


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion Lib/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def _all_string_prefixes():
# The valid string prefixes. Only contain the lower case versions,
# and don't contain any permutations (include 'fr', but not
# 'rf'). The various permutations will be generated.
_valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
_valid_string_prefixes = ['b', 'r', 'u', 'f', 't', 'br', 'fr', 'tr']
# if we add binary f-strings, add: ['fb', 'fbr']
result = {''}
for prefix in _valid_string_prefixes:
Expand Down
Loading