Skip to content

Commit 20d8ca8

Browse files
maskri17copybara-github
authored andcommitted
Aligning replace function in Regex ext with Google SQL
PiperOrigin-RevId: 773880708
1 parent 6d92e81 commit 20d8ca8

File tree

3 files changed

+122
-43
lines changed

3 files changed

+122
-43
lines changed

extensions/src/main/java/dev/cel/extensions/CelRegexExtensions.java

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ enum Function {
7878
String pattern = (String) args[1];
7979
String replaceStr = (String) args[2];
8080
long count = (long) args[3];
81-
return CelRegexExtensions.replace(target, pattern, replaceStr, count);
81+
return CelRegexExtensions.replaceN(target, pattern, replaceStr, count);
8282
}))),
8383
EXTRACT(
8484
CelFunctionDecl.newFunctionDeclaration(
@@ -153,18 +153,20 @@ private static Pattern compileRegexPattern(String regex) {
153153
}
154154

155155
private static String replace(String target, String regex, String replaceStr) {
156-
Pattern pattern = compileRegexPattern(regex);
157-
Matcher matcher = pattern.matcher(target);
158-
return matcher.replaceAll(replaceStr);
156+
return replaceN(target, regex, replaceStr, -1);
159157
}
160158

161-
private static String replace(String target, String regex, String replaceStr, long replaceCount) {
162-
Pattern pattern = compileRegexPattern(regex);
163-
159+
private static String replaceN(
160+
String target, String regex, String replaceStr, long replaceCount) {
164161
if (replaceCount == 0) {
165162
return target;
166163
}
164+
// For all negative replaceCount, do a replaceAll
165+
if (replaceCount < 0) {
166+
replaceCount = -1;
167+
}
167168

169+
Pattern pattern = compileRegexPattern(regex);
168170
Matcher matcher = pattern.matcher(target);
169171
StringBuffer sb = new StringBuffer();
170172
int counter = 0;
@@ -173,14 +175,59 @@ private static String replace(String target, String regex, String replaceStr, lo
173175
if (replaceCount != -1 && counter >= replaceCount) {
174176
break;
175177
}
176-
matcher.appendReplacement(sb, replaceStr);
178+
179+
String processedReplacement = replaceStrValidator(matcher, replaceStr);
180+
matcher.appendReplacement(sb, Matcher.quoteReplacement(processedReplacement));
177181
counter++;
178182
}
179183
matcher.appendTail(sb);
180184

181185
return sb.toString();
182186
}
183187

188+
private static String replaceStrValidator(Matcher matcher, String replacement) {
189+
StringBuilder sb = new StringBuilder();
190+
for (int i = 0; i < replacement.length(); i++) {
191+
char c = replacement.charAt(i);
192+
193+
if (c != '\\') {
194+
sb.append(c);
195+
continue;
196+
}
197+
198+
if (i + 1 >= replacement.length()) {
199+
throw new IllegalArgumentException("Invalid replacement string: \\ not allowed at end");
200+
}
201+
202+
char nextChar = replacement.charAt(++i);
203+
204+
if (Character.isDigit(nextChar)) {
205+
int groupNum = Character.digit(nextChar, 10);
206+
int groupCount = matcher.groupCount();
207+
208+
if (groupNum > groupCount) {
209+
throw new IllegalArgumentException(
210+
"Replacement string references group "
211+
+ groupNum
212+
+ " but regex has only "
213+
+ groupCount
214+
+ " group(s)");
215+
}
216+
217+
String groupValue = matcher.group(groupNum);
218+
if (groupValue != null) {
219+
sb.append(groupValue);
220+
}
221+
} else if (nextChar == '\\') {
222+
sb.append('\\');
223+
} else {
224+
throw new IllegalArgumentException(
225+
"Invalid replacement string: \\ must be followed by a digit");
226+
}
227+
}
228+
return sb.toString();
229+
}
230+
184231
private static Optional<String> extract(String target, String regex) {
185232
Pattern pattern = compileRegexPattern(regex);
186233
Matcher matcher = pattern.matcher(target);
@@ -215,11 +262,12 @@ private static ImmutableList<String> extractAll(String target, String regex) {
215262
while (matcher.find()) {
216263
if (hasOneGroup) {
217264
String group = matcher.group(1);
218-
// Add the captured group's content only if it's not null (e.g. optional group didn't match)
265+
// Add the captured group's content only if it's not null
219266
if (group != null) {
220267
builder.add(group);
221268
}
222-
} else { // No capturing groups (matcher.groupCount() == 0)
269+
} else {
270+
// No capturing groups
223271
builder.add(matcher.group(0));
224272
}
225273
}

extensions/src/main/java/dev/cel/extensions/README.md

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -717,12 +717,15 @@ chance for collision.
717717

718718
### Replace
719719

720-
The `regex.replace` function replaces all occurrences of a regex pattern in a
721-
string with a replacement string. Optionally, you can limit the number of
722-
replacements by providing a count argument. Both numeric ($N) and named
723-
(${name}) capture group references are supported in the replacement string, with
724-
validation for correctness. An error will be thrown for invalid regex or replace
725-
string.
720+
The `regex.replace` function replaces all non-overlapping substring of a regex
721+
pattern in the target string with a replacement string. Optionally, you can
722+
limit the number of replacements by providing a count argument. When the count
723+
is a negative number, the function acts as replace all. Only numeric (\N)
724+
capture group references are supported in the replacement string, with
725+
validation for correctness. Backslashed-escaped digits (\1 to \9) within the
726+
replacement argument can be used to insert text matching the corresponding
727+
parenthesized group in the regexp pattern. An error will be thrown for invalid
728+
regex or replace string.
726729

727730
```
728731
regex.replace(target: string, pattern: string, replacement: string) -> string
@@ -732,14 +735,16 @@ regex.replace(target: string, pattern: string, replacement: string, count: int)
732735
Examples:
733736

734737
```
738+
regex.replace('hello world hello', 'hello', 'hi') == 'hi world hi'
735739
regex.replace('banana', 'a', 'x', 0) == 'banana'
736740
regex.replace('banana', 'a', 'x', 1) == 'bxnana'
737741
regex.replace('banana', 'a', 'x', 2) == 'bxnxna'
738-
regex.replace('foo bar', '(fo)o (ba)r', '$2 $1') == 'ba fo'
742+
regex.replace('banana', 'a', 'x', -12) == 'bxnxnx'
743+
regex.replace('foo bar', '(fo)o (ba)r', '\\2 \\1') == 'ba fo'
739744
740745
regex.replace('test', '(.)', '$2') \\ Runtime Error invalid replace string
741746
regex.replace('foo bar', '(', '$2 $1') \\ Runtime Error invalid regex string
742-
regex.replace('id=123', 'id=(?P<value>\\\\d+)', 'value: ${values}') \\ Runtime Error invalid replace string
747+
regex.replace('id=123', 'id=(?P<value>\\\\d+)', 'value: \\values') \\ Runtime Error invalid replace string
743748
744749
```
745750

extensions/src/test/java/dev/cel/extensions/CelRegexExtensionsTest.java

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,31 @@ public final class CelRegexExtensionsTest {
3939
CelRuntimeFactory.standardCelRuntimeBuilder().addLibraries(CelExtensions.regex()).build();
4040

4141
@Test
42-
@TestParameters("{target: 'foo bar', regex: '(fo)o (ba)r', replaceStr: '$2 $1', res: 'ba fo'}")
42+
@TestParameters("{target: 'abc', regex: '^', replaceStr: 'start_', res: 'start_abc'}")
43+
@TestParameters("{target: 'abc', regex: '$', replaceStr: '_end', res: 'abc_end'}")
44+
@TestParameters("{target: 'a-b', regex: '\\\\b', replaceStr: '|', res: '|a|-|b|'}")
45+
@TestParameters(
46+
"{target: 'foo bar', regex: '(fo)o (ba)r', replaceStr: '\\\\2 \\\\1', res: 'ba fo'}")
47+
@TestParameters("{target: 'foo bar', regex: 'foo', replaceStr: '\\\\\\\\', res: '\\ bar'}")
4348
@TestParameters("{target: 'banana', regex: 'ana', replaceStr: 'x', res: 'bxna'}")
44-
@TestParameters("{target: 'abc', regex: 'b(.)', replaceStr: 'x$1', res: 'axc'}")
49+
@TestParameters("{target: 'abc', regex: 'b(.)', replaceStr: 'x\\\\1', res: 'axc'}")
4550
@TestParameters(
4651
"{target: 'hello world hello', regex: 'hello', replaceStr: 'hi', res: 'hi world hi'}")
52+
@TestParameters("{target: 'ac', regex: 'a(b)?c', replaceStr: '[\\\\1]', res: '[]'}")
4753
@TestParameters("{target: 'apple pie', regex: 'p', replaceStr: 'X', res: 'aXXle Xie'}")
4854
@TestParameters(
4955
"{target: 'remove all spaces', regex: '\\\\s', replaceStr: '', res: 'removeallspaces'}")
5056
@TestParameters("{target: 'digit:99919291992', regex: '\\\\d+', replaceStr: '3', res: 'digit:3'}")
5157
@TestParameters(
52-
"{target: 'foo bar baz', regex: '\\\\w+', replaceStr: '($0)', res: '(foo) (bar) (baz)'}")
58+
"{target: 'foo bar baz', regex: '\\\\w+', replaceStr: '(\\\\0)', res: '(foo) (bar) (baz)'}")
5359
@TestParameters("{target: '', regex: 'a', replaceStr: 'b', res: ''}")
5460
@TestParameters(
5561
"{target: 'User: Alice, Age: 30', regex: 'User: (?P<name>\\\\w+), Age: (?P<age>\\\\d+)',"
56-
+ " replaceStr: '${name} is ${age} years old', res: 'Alice is 30 years old'}")
62+
+ " replaceStr: '${name} is ${age} years old', res: '${name} is ${age} years old'}")
5763
@TestParameters(
58-
"{target: 'abc', regex: '(?P<letter>b)', replaceStr: '[${letter}]', res: 'a[b]c'}")
64+
"{target: 'User: Alice, Age: 30', regex: 'User: (?P<name>\\\\w+), Age: (?P<age>\\\\d+)',"
65+
+ " replaceStr: '\\\\1 is \\\\2 years old', res: 'Alice is 30 years old'}")
66+
@TestParameters("{target: 'hello ☃', regex: '☃', replaceStr: '❄', res: 'hello ❄'}")
5967
public void replaceAll_success(String target, String regex, String replaceStr, String res)
6068
throws Exception {
6169
String expr = String.format("regex.replace('%s', '%s', '%s')", target, regex, replaceStr);
@@ -70,8 +78,8 @@ public void replaceAll_success(String target, String regex, String replaceStr, S
7078
public void replace_nested_success() throws Exception {
7179
String expr =
7280
"regex.replace("
73-
+ " regex.replace('%(foo) %(bar) %2','%\\\\((\\\\w+)\\\\)','\\\\${$1}'),"
74-
+ " '%(\\\\d+)', '\\\\$$1')";
81+
+ " regex.replace('%(foo) %(bar) %2','%\\\\((\\\\w+)\\\\)','${\\\\1}'),"
82+
+ " '%(\\\\d+)', '$\\\\1')";
7583
CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile(expr).getAst());
7684

7785
Object result = program.eval();
@@ -85,19 +93,18 @@ public void replace_nested_success() throws Exception {
8593
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: 2, res: 'bxnxna'}")
8694
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: 100, res: 'bxnxnx'}")
8795
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: -1, res: 'bxnxnx'}")
88-
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: -100, res: 'banana'}")
96+
@TestParameters("{t: 'banana', re: 'a', rep: 'x', i: -100, res: 'bxnxnx'}")
8997
@TestParameters(
90-
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '$2-$1', i: 1,"
98+
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '\\\\2-\\\\1', i: 1,"
9199
+ " res: 'dog-cat dog-cat cat-dog dog-cat'}")
92100
@TestParameters(
93-
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '$2-$1', i: 2, res: 'dog-cat"
94-
+ " dog-cat dog-cat dog-cat'}")
101+
"{t: 'cat-dog dog-cat cat-dog dog-cat', re: '(cat)-(dog)', rep: '\\\\2-\\\\1', i: 2, res:"
102+
+ " 'dog-cat dog-cat dog-cat dog-cat'}")
95103
@TestParameters("{t: 'a.b.c', re: '\\\\.', rep: '-', i: 1, res: 'a-b.c'}")
96104
@TestParameters("{t: 'a.b.c', re: '\\\\.', rep: '-', i: -1, res: 'a-b-c'}")
97105
public void replaceCount_success(String t, String re, String rep, long i, String res)
98106
throws Exception {
99107
String expr = String.format("regex.replace('%s', '%s', '%s', %d)", t, re, rep, i);
100-
System.out.println("expr: " + expr);
101108
CelRuntime.Program program = RUNTIME.createProgram(COMPILER.compile(expr).getAst());
102109

103110
Object result = program.eval();
@@ -108,7 +115,7 @@ public void replaceCount_success(String t, String re, String rep, long i, String
108115
@Test
109116
@TestParameters("{target: 'foo bar', regex: '(', replaceStr: '$2 $1'}")
110117
@TestParameters("{target: 'foo bar', regex: '[a-z', replaceStr: '$2 $1'}")
111-
public void replace_invalid_regex(String target, String regex, String replaceStr)
118+
public void replace_invalidRegex_throwsException(String target, String regex, String replaceStr)
112119
throws Exception {
113120
String expr = String.format("regex.replace('%s', '%s', '%s')", target, regex, replaceStr);
114121
CelAbstractSyntaxTree ast = COMPILER.compile(expr).getAst();
@@ -121,32 +128,48 @@ public void replace_invalid_regex(String target, String regex, String replaceStr
121128
}
122129

123130
@Test
124-
@TestParameters("{target: 'test', regex: '(.)', replaceStr: '$2'}")
125-
public void replace_invalid_captureGroup(String target, String regex, String replaceStr)
126-
throws Exception {
127-
String expr = String.format("regex.replace('%s', '%s', '%s')", target, regex, replaceStr);
131+
public void replace_invalidCaptureGroupReplaceStr_throwsException() throws Exception {
132+
String expr = "regex.replace('test', '(.)', '\\\\2')";
128133
CelAbstractSyntaxTree ast = COMPILER.compile(expr).getAst();
129134

130135
CelEvaluationException e =
131136
assertThrows(CelEvaluationException.class, () -> RUNTIME.createProgram(ast).eval());
132137

133-
assertThat(e).hasCauseThat().isInstanceOf(IndexOutOfBoundsException.class);
134-
assertThat(e).hasCauseThat().hasMessageThat().contains("n > number of groups");
138+
assertThat(e).hasCauseThat().isInstanceOf(IllegalArgumentException.class);
139+
assertThat(e)
140+
.hasCauseThat()
141+
.hasMessageThat()
142+
.contains("Replacement string references group 2 but regex has only 1 group(s)");
135143
}
136144

137145
@Test
138-
@TestParameters(
139-
"{target: 'id=123', regex: 'id=(?P<value>\\\\d+)', replaceStr: 'value: ${values}'}")
140-
public void replace_invalid_replaceStr(String target, String regex, String replaceStr)
141-
throws Exception {
142-
String expr = String.format("regex.replace('%s', '%s', '%s')", target, regex, replaceStr);
146+
public void replace_trailingBackslashReplaceStr_throwsException() throws Exception {
147+
String expr = "regex.replace('id=123', 'id=(?P<value>\\\\d+)', '\\\\')";
148+
CelAbstractSyntaxTree ast = COMPILER.compile(expr).getAst();
149+
150+
CelEvaluationException e =
151+
assertThrows(CelEvaluationException.class, () -> RUNTIME.createProgram(ast).eval());
152+
153+
assertThat(e).hasCauseThat().isInstanceOf(IllegalArgumentException.class);
154+
assertThat(e)
155+
.hasCauseThat()
156+
.hasMessageThat()
157+
.contains("Invalid replacement string: \\ not allowed at end");
158+
}
159+
160+
@Test
161+
public void replace_invalidGroupReferenceReplaceStr_throwsException() throws Exception {
162+
String expr = "regex.replace('id=123', 'id=(?P<value>\\\\d+)', '\\\\a')";
143163
CelAbstractSyntaxTree ast = COMPILER.compile(expr).getAst();
144164

145165
CelEvaluationException e =
146166
assertThrows(CelEvaluationException.class, () -> RUNTIME.createProgram(ast).eval());
147167

148168
assertThat(e).hasCauseThat().isInstanceOf(IllegalArgumentException.class);
149-
assertThat(e).hasCauseThat().hasMessageThat().contains("group 'values' not found");
169+
assertThat(e)
170+
.hasCauseThat()
171+
.hasMessageThat()
172+
.contains("Invalid replacement string: \\ must be followed by a digit");
150173
}
151174

152175
@Test
@@ -208,6 +231,9 @@ private enum ExtractAllTestCase {
208231
NO_MATCH("regex.extractAll('id:123, id:456', 'assa')", ImmutableList.of()),
209232
NO_CAPTURE_GROUP(
210233
"regex.extractAll('id:123, id:456', 'id:\\\\d+')", ImmutableList.of("id:123", "id:456")),
234+
CAPTURE_GROUP(
235+
"regex.extractAll('key=\"\", key=\"val\"', 'key=\"([^\"]*)\"')",
236+
ImmutableList.of("", "val")),
211237
SINGLE_NAMED_GROUP(
212238
"regex.extractAll('testuser@testdomain', '(?P<username>.*)@')",
213239
ImmutableList.of("testuser")),

0 commit comments

Comments
 (0)