Skip to content

Commit 1f932a2

Browse files
ericvsmithmiss-islington
authored andcommitted
pythongh-134752: Improve speed of test_tokenize.StringPrefixTest.test_prefixes. (pythonGH-134766)
(cherry picked from commit 579686d) Co-authored-by: Eric V. Smith <ericvsmith@users.noreply.github.com>
1 parent 0f93b2f commit 1f932a2

File tree

1 file changed

+45
-27
lines changed

1 file changed

+45
-27
lines changed

Lib/test/test_tokenize.py

Lines changed: 45 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3241,39 +3241,40 @@ def test_exact_flag(self):
32413241

32423242

32433243
class StringPrefixTest(unittest.TestCase):
3244-
def test_prefixes(self):
3245-
# Get the list of defined string prefixes. I don't see an
3246-
# obvious documented way of doing this, but probably the best
3247-
# thing is to split apart tokenize.StringPrefix.
3248-
3249-
# Make sure StringPrefix begins and ends in parens.
3250-
self.assertEqual(tokenize.StringPrefix[0], '(')
3251-
self.assertEqual(tokenize.StringPrefix[-1], ')')
3252-
3253-
# Then split apart everything else by '|'.
3254-
defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))
3255-
3256-
# Now compute the actual string prefixes, by exec-ing all
3257-
# valid prefix combinations, followed by an empty string.
3258-
3259-
# Try all prefix lengths until we find a length that has zero
3260-
# valid prefixes. This will miss the case where for example
3261-
# there are no valid 3 character prefixes, but there are valid
3262-
# 4 character prefixes. That seems extremely unlikely.
3263-
3264-
# Note that the empty prefix is being included, because length
3265-
# starts at 0. That's expected, since StringPrefix includes
3266-
# the empty prefix.
3244+
@staticmethod
3245+
def determine_valid_prefixes():
3246+
# Try all lengths until we find a length that has zero valid
3247+
# prefixes. This will miss the case where for example there
3248+
# are no valid 3 character prefixes, but there are valid 4
3249+
# character prefixes. That seems unlikely.
3250+
3251+
single_char_valid_prefixes = set()
3252+
3253+
# Find all of the single character string prefixes. Just get
3254+
# the lowercase version, we'll deal with combinations of upper
3255+
# and lower case later. I'm using this logic just in case
3256+
# some uppercase-only prefix is added.
3257+
for letter in itertools.chain(string.ascii_lowercase, string.ascii_uppercase):
3258+
try:
3259+
eval(f'{letter}""')
3260+
single_char_valid_prefixes.add(letter.lower())
3261+
except SyntaxError:
3262+
pass
32673263

3264+
# This logic assumes that all combinations of valid prefixes only use
3265+
# the characters that are valid single character prefixes. That seems
3266+
# like a valid assumption, but if it ever changes this will need
3267+
# adjusting.
32683268
valid_prefixes = set()
32693269
for length in itertools.count():
32703270
num_at_this_length = 0
32713271
for prefix in (
3272-
"".join(l) for l in list(itertools.combinations(string.ascii_lowercase, length))
3272+
"".join(l)
3273+
for l in itertools.combinations(single_char_valid_prefixes, length)
32733274
):
32743275
for t in itertools.permutations(prefix):
32753276
for u in itertools.product(*[(c, c.upper()) for c in t]):
3276-
p = ''.join(u)
3277+
p = "".join(u)
32773278
if p == "not":
32783279
# 'not' can never be a string prefix,
32793280
# because it's a valid expression: not ""
@@ -3289,9 +3290,26 @@ def test_prefixes(self):
32893290
except SyntaxError:
32903291
pass
32913292
if num_at_this_length == 0:
3292-
break
3293+
return valid_prefixes
3294+
3295+
3296+
def test_prefixes(self):
3297+
# Get the list of defined string prefixes. I don't see an
3298+
# obvious documented way of doing this, but probably the best
3299+
# thing is to split apart tokenize.StringPrefix.
3300+
3301+
# Make sure StringPrefix begins and ends in parens. We're
3302+
# assuming it's of the form "(a|b|ab)", if a, b, and cd are
3303+
# valid string prefixes.
3304+
self.assertEqual(tokenize.StringPrefix[0], '(')
3305+
self.assertEqual(tokenize.StringPrefix[-1], ')')
3306+
3307+
# Then split apart everything else by '|'.
3308+
defined_prefixes = set(tokenize.StringPrefix[1:-1].split('|'))
32933309

3294-
self.assertEqual(defined_prefixes, valid_prefixes)
3310+
# Now compute the actual allowed string prefixes and compare
3311+
# to what is defined in the tokenize module.
3312+
self.assertEqual(defined_prefixes, self.determine_valid_prefixes())
32953313

32963314

32973315
if __name__ == "__main__":

0 commit comments

Comments
 (0)