Skip to content

Commit 8372a49

Browse files
authored
Merge pull request #214 from bulasevich/GR-68324
[Backport] [Oracle GraalVM] [GR-68324] Backport to 23.1: TRegex: treat nested quantifiers as a bailout instead of a syntax error in OracleDBFlavor.
2 parents 88205d9 + f54edd5 commit 8372a49

File tree

4 files changed

+50
-58
lines changed

4 files changed

+50
-58
lines changed

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java

Lines changed: 42 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -126,30 +126,10 @@ public void generatedTests() {
126126
expectSyntaxError("x{4294967296}", "", "", getTRegexEncoding(), "x{4294967296}", 0, "invalid interval in regular expression");
127127
expectSyntaxError("x{4294967297}", "", "", getTRegexEncoding(), "x{4294967297}", 0, "invalid interval in regular expression");
128128
test("x??", "", "x", 0, true, 0, 0);
129-
expectSyntaxError("x{2}+", "", "", getTRegexEncoding(), "x", 0, "nested quantifier in regular expression");
130-
expectSyntaxError("x{2}+", "", "", getTRegexEncoding(), "xx", 0, "nested quantifier in regular expression");
131-
expectSyntaxError("x{2}+", "", "", getTRegexEncoding(), "xxx", 0, "nested quantifier in regular expression");
132-
expectSyntaxError("x{2}+", "", "", getTRegexEncoding(), "xxxx", 0, "nested quantifier in regular expression");
133-
expectSyntaxError("x{2}*", "", "", getTRegexEncoding(), "xxxx", 0, "nested quantifier in regular expression");
134-
expectSyntaxError("x{2}*?", "", "", getTRegexEncoding(), "xxxx", 0, "nested quantifier in regular expression");
135-
expectSyntaxError("x{2}*???", "", "", getTRegexEncoding(), "xxxx", 0, "nested quantifier in regular expression");
136129
test("\\A*x\\Z+", "", "x", 0, true, 0, 1);
137130
test("\\A*x\\Z+", "", "xx", 0, true, 1, 2);
138131
test("\\A+x\\Z+", "", "xx", 0, false);
139-
expectSyntaxError("x????", "", "", getTRegexEncoding(), "x?", 0, "nested quantifier in regular expression");
140-
expectSyntaxError("x????", "", "", getTRegexEncoding(), "xx?", 0, "nested quantifier in regular expression");
141-
expectSyntaxError("x??????", "", "", getTRegexEncoding(), "x?", 0, "nested quantifier in regular expression");
142-
expectSyntaxError("x??????", "", "", getTRegexEncoding(), "xx?", 0, "nested quantifier in regular expression");
143132
test("x{2}?", "", "xxxxx", 0, true, 0, 2);
144-
expectSyntaxError("x{2}??", "", "", getTRegexEncoding(), "xxxxx", 0, "nested quantifier in regular expression");
145-
expectSyntaxError("x{2}+", "", "", getTRegexEncoding(), "xxxxx", 0, "nested quantifier in regular expression");
146-
expectSyntaxError("x{2}*", "", "", getTRegexEncoding(), "xxxxx", 0, "nested quantifier in regular expression");
147-
expectSyntaxError("x???", "", "", getTRegexEncoding(), "x", 0, "nested quantifier in regular expression");
148-
expectSyntaxError("x{2}*??", "", "", getTRegexEncoding(), "xxxx", 0, "nested quantifier in regular expression");
149-
expectSyntaxError("x???", "", "", getTRegexEncoding(), "x?", 0, "nested quantifier in regular expression");
150-
expectSyntaxError("x???", "", "", getTRegexEncoding(), "xx?", 0, "nested quantifier in regular expression");
151-
expectSyntaxError("x?????", "", "", getTRegexEncoding(), "x?", 0, "nested quantifier in regular expression");
152-
expectSyntaxError("x?????", "", "", getTRegexEncoding(), "xx?", 0, "nested quantifier in regular expression");
153133
test("(a{0,1})*b\\1", "", "aab", 0, true, 0, 3, 2, 2);
154134
test("(a{0,1})*b\\1", "", "aaba", 0, true, 0, 3, 2, 2);
155135
test("(a{0,1})*b\\1", "", "aabaa", 0, true, 0, 3, 2, 2);
@@ -988,7 +968,6 @@ public void generatedTests() {
988968
test("a(()|()|b|()|())*c", "", "abbc", 0, true, 0, 4, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1);
989969
test("a(()|()|()|b|())*c", "", "abbc", 0, true, 0, 4, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1);
990970
test("a(()|()|()|()|b)*c", "", "abbc", 0, true, 0, 4, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1);
991-
expectSyntaxError("a??+", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
992971
test("()??()??()??()??()??()??()??()??\\3\\5\\7", "", "a", 0, true, 0, 0, -1, -1, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1);
993972
test("()*", "", "a", 0, true, 0, 0, 0, 0);
994973
test("(a|)*", "", "a", 0, true, 0, 1, 1, 1);
@@ -1011,24 +990,11 @@ public void generatedTests() {
1011990
expectSyntaxError("[y-\\{][y-\\{]", "", "", getTRegexEncoding(), "I", 0, "invalid range in regular expression");
1012991
test("a?", "", "aaa", 0, true, 0, 1);
1013992
test("a??", "", "aaa", 0, true, 0, 0);
1014-
expectSyntaxError("a???", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1015993
test("a+?", "", "aaa", 0, true, 0, 1);
1016-
expectSyntaxError("a+??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1017-
expectSyntaxError("a??+", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1018-
expectSyntaxError("a?+", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1019-
expectSyntaxError("a?+?", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1020-
expectSyntaxError("a?+??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1021-
expectSyntaxError("a?*??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1022-
expectSyntaxError("(a?)*??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1023994
test("((a?)*)??", "", "aaa", 0, true, 0, 0, -1, -1, -1, -1);
1024995
test("((a?)*?)?", "", "aaa", 0, true, 0, 0, 0, 0, -1, -1);
1025-
expectSyntaxError("a?*?", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1026-
expectSyntaxError("a*??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1027-
expectSyntaxError("a+*?", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1028996
test("(a+)*?", "", "aaa", 0, true, 0, 0, -1, -1);
1029997
test("((a+)*)?", "", "aaa", 0, true, 0, 3, 0, 3, 0, 3);
1030-
expectSyntaxError("a+*??", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1031-
expectSyntaxError("a++?", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1032998
expectSyntaxError("[[.\\a.]]", "", "", getTRegexEncoding(), ".", 0, "invalid collation class in regular expression");
1033999
test("[[...]]", "", ".", 0, true, 0, 1);
10341000
test("[[...]]", "", "[", 0, false);
@@ -1043,12 +1009,6 @@ public void generatedTests() {
10431009
test("[[...]a]a", "", "a", 0, false);
10441010
test("[[...]a]?a", "", "a", 0, true, 0, 1);
10451011
test("[[...]a]|a", "", "a", 0, true, 0, 1);
1046-
expectSyntaxError("a++?", "", "", getTRegexEncoding(), "aaa", 0, "nested quantifier in regular expression");
1047-
expectSyntaxError("\\D|++?", "", "", getTRegexEncoding(), "9", 0, "nested quantifier in regular expression");
1048-
expectSyntaxError("\\D|++?^", "", "", getTRegexEncoding(), "9", 0, "nested quantifier in regular expression");
1049-
expectSyntaxError("\\S|\\D|++?^(3)", "", "", getTRegexEncoding(), "9", 0, "nested quantifier in regular expression");
1050-
expectSyntaxError("\\S|\\D|++?^((3)|[R-_\\(/])t[[:alnum:]]c", "", "", getTRegexEncoding(), "9", 0, "nested quantifier in regular expression");
1051-
expectSyntaxError("(\\d)|5+*?|[[:lower:]][[=l=]]^%", "", "", getTRegexEncoding(), "\u0169\u2113%", 0, "nested quantifier in regular expression");
10521012
test("[[===]]", "", "=", 0, true, 0, 1);
10531013
expectSyntaxError("[[=\\==]]", "", "", getTRegexEncoding(), "=", 0, "invalid equivalence class in regular expression");
10541014
expectSyntaxError("[[=\\==]]", "", "", getTRegexEncoding(), "\\", 0, "invalid equivalence class in regular expression");
@@ -1093,23 +1053,12 @@ public void generatedTests() {
10931053
test("\\[[b-b]", "", "[b-b]", 0, true, 0, 2);
10941054
test("\\[c-b]", "", "[c-b]", 0, true, 0, 5);
10951055
expectSyntaxError("\\[[c-b]", "", "", getTRegexEncoding(), "[c-b]", 0, "invalid range in regular expression");
1096-
expectSyntaxError("()?*", "", "", getTRegexEncoding(), "c", 0, "nested quantifier in regular expression");
1097-
expectSyntaxError("()?*|", "", "", getTRegexEncoding(), "c", 0, "nested quantifier in regular expression");
1098-
expectSyntaxError("()?*||", "", "", getTRegexEncoding(), "c", 0, "nested quantifier in regular expression");
1099-
expectSyntaxError("()?*||a", "", "", getTRegexEncoding(), "b", 0, "nested quantifier in regular expression");
1100-
expectSyntaxError("()?*||^a\\Zb", "", "", getTRegexEncoding(), "c", 0, "nested quantifier in regular expression");
11011056
test("ac??bc?", "", "abc", 0, true, 0, 3);
11021057
test("ac??bc?", "", "acbc", 0, true, 0, 4);
11031058
test("a?", "", "a", 0, true, 0, 1);
11041059
test("a??", "", "a", 0, true, 0, 0);
1105-
expectSyntaxError("a???", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
1106-
expectSyntaxError("(a)???", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
11071060
test("(a?)??", "", "a", 0, true, 0, 0, -1, -1);
11081061
test("(a??)?", "", "a", 0, true, 0, 0, 0, 0);
1109-
expectSyntaxError("(a???)", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
1110-
expectSyntaxError("a{0,1}??", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
1111-
expectSyntaxError("a??{0,1}", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
1112-
expectSyntaxError("a{0,1}?{0,1}", "", "", getTRegexEncoding(), "a", 0, "nested quantifier in regular expression");
11131062
test("(a{0,1})*", "", "aaaaaa", 0, true, 0, 6, 6, 6);
11141063
test("(a{0,2})*", "", "aaaaaa", 0, true, 0, 6, 6, 6);
11151064
test("(a{1,2})*", "", "aaaaaa", 0, true, 0, 6, 4, 6);
@@ -1402,4 +1351,46 @@ public void gr52933() {
14021351
test("\\z{1,6}a", "", "a", 0, false);
14031352
test("\\z{1,7}a", "", "a", 0, false);
14041353
}
1354+
1355+
@Test
1356+
public void testNestedQuantifierBailout() {
1357+
expectUnsupported("()?*");
1358+
expectUnsupported("()?*|");
1359+
expectUnsupported("()?*||");
1360+
expectUnsupported("()?*||a");
1361+
expectUnsupported("(a)???");
1362+
expectUnsupported("(a?)*??");
1363+
expectUnsupported("(a???)");
1364+
expectUnsupported("a*??");
1365+
expectUnsupported("a+*?");
1366+
expectUnsupported("a+*??");
1367+
expectUnsupported("a++?");
1368+
expectUnsupported("a+??");
1369+
expectUnsupported("a?*?");
1370+
expectUnsupported("a?*??");
1371+
expectUnsupported("a?+");
1372+
expectUnsupported("a?+?");
1373+
expectUnsupported("a?+??");
1374+
expectUnsupported("a??+");
1375+
expectUnsupported("a???");
1376+
expectUnsupported("a??{0,1}");
1377+
expectUnsupported("a{0,1}??");
1378+
expectUnsupported("a{0,1}?{0,1}");
1379+
expectUnsupported("()?*||^a\\Zb");
1380+
expectUnsupported("\\D|++?");
1381+
expectUnsupported("\\D|++?^");
1382+
expectUnsupported("(\\d)|5+*?|[[:lower:]][[=l=]]^%");
1383+
expectUnsupported("\\S|\\D|++?^(3)");
1384+
expectUnsupported("\\S|\\D|++?^((3)|[R-_\\(/])t[[:alnum:]]c");
1385+
expectUnsupported("x???");
1386+
expectUnsupported("x????");
1387+
expectUnsupported("x?????");
1388+
expectUnsupported("x??????");
1389+
expectUnsupported("x{2}*");
1390+
expectUnsupported("x{2}*?");
1391+
expectUnsupported("x{2}*??");
1392+
expectUnsupported("x{2}*???");
1393+
expectUnsupported("x{2}+");
1394+
expectUnsupported("x{2}??");
1395+
}
14051396
}

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/RegexTestBase.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,8 @@ private static void validateResult(String pattern, String flags, String options,
215215
// print(pattern, input, fromIndex, result, groupCount, captureGroupBoundsAndLastGroup);
216216
}
217217

218-
void expectUnsupported(String pattern, String flags) {
219-
expectUnsupported(pattern, flags, "");
218+
void expectUnsupported(String pattern) {
219+
expectUnsupported(pattern, "", "");
220220
}
221221

222222
void expectUnsupported(String pattern, String flags, String options) {

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/RubyTests.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -417,9 +417,9 @@ public void nonRecursiveSubexpressionCalls() {
417417

418418
@Test
419419
public void recursiveSubexpressionCalls() {
420-
expectUnsupported("(a\\g<1>?)(b\\g<2>?)", "");
421-
expectUnsupported("(?<a>a\\g<b>?)(?<b>b\\g<a>?)", "");
422-
expectUnsupported("a\\g<0>?", "");
420+
expectUnsupported("(a\\g<1>?)(b\\g<2>?)");
421+
expectUnsupported("(?<a>a\\g<b>?)(?<b>b\\g<a>?)");
422+
expectUnsupported("a\\g<0>?");
423423
}
424424

425425
@Test
@@ -535,7 +535,7 @@ public void gr39214() {
535535

536536
@Test
537537
public void gr41489() {
538-
expectUnsupported("\\((?>[^)(]+|\\g<0>)*\\)", "");
538+
expectUnsupported("\\((?>[^)(]+|\\g<0>)*\\)");
539539
}
540540

541541
@Test

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/flavors/OracleDBRegexParser.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import com.oracle.truffle.regex.RegexLanguage;
4848
import com.oracle.truffle.regex.RegexSource;
4949
import com.oracle.truffle.regex.RegexSyntaxException;
50+
import com.oracle.truffle.regex.UnsupportedRegexException;
5051
import com.oracle.truffle.regex.charset.ClassSetContents;
5152
import com.oracle.truffle.regex.charset.CodePointSet;
5253
import com.oracle.truffle.regex.charset.CodePointSetAccumulator;
@@ -168,7 +169,7 @@ public RegexAST parse() throws RegexSyntaxException {
168169
break;
169170
case quantifier:
170171
if (prevKind == Token.Kind.quantifier) {
171-
throw syntaxError(OracleDBErrorMessages.NESTED_QUANTIFIER);
172+
throw new UnsupportedRegexException(OracleDBErrorMessages.NESTED_QUANTIFIER);
172173
}
173174
if (astBuilder.getCurTerm() == null || prevKind == Token.Kind.captureGroupBegin) {
174175
// quantifiers without target are ignored

0 commit comments

Comments
 (0)