Skip to content

Commit 6d92e81

Browse files
maskri17copybara-github
authored andcommitted
Updating Regex ext to align with Google SQL and Adding Regex documentation
PiperOrigin-RevId: 772600803
1 parent 168588b commit 6d92e81

File tree

4 files changed

+200
-140
lines changed

4 files changed

+200
-140
lines changed

extensions/src/main/java/dev/cel/extensions/CelRegexExtensions.java

Lines changed: 39 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
package dev.cel.extensions;
1616

1717
import com.google.common.collect.ImmutableList;
18-
import com.google.common.collect.ImmutableMap;
1918
import com.google.common.collect.ImmutableSet;
2019
import com.google.errorprone.annotations.Immutable;
2120
import com.google.re2j.Matcher;
@@ -25,7 +24,6 @@
2524
import dev.cel.common.CelFunctionDecl;
2625
import dev.cel.common.CelOverloadDecl;
2726
import dev.cel.common.types.ListType;
28-
import dev.cel.common.types.MapType;
2927
import dev.cel.common.types.OptionalType;
3028
import dev.cel.common.types.SimpleType;
3129
import dev.cel.compiler.CelCompilerLibrary;
@@ -40,9 +38,8 @@
4038
final class CelRegexExtensions implements CelCompilerLibrary, CelRuntimeLibrary {
4139

4240
private static final String REGEX_REPLACE_FUNCTION = "regex.replace";
43-
private static final String REGEX_CAPTURE_FUNCTION = "regex.capture";
44-
private static final String REGEX_CAPTUREALL_FUNCTION = "regex.captureAll";
45-
private static final String REGEX_CAPTUREALLNAMED_FUNCTION = "regex.captureAllNamed";
41+
private static final String REGEX_EXTRACT_FUNCTION = "regex.extract";
42+
private static final String REGEX_EXTRACT_ALL_FUNCTION = "regex.extractAll";
4643

4744
enum Function {
4845
REPLACE(
@@ -83,52 +80,36 @@ enum Function {
8380
long count = (long) args[3];
8481
return CelRegexExtensions.replace(target, pattern, replaceStr, count);
8582
}))),
86-
CAPTURE(
83+
EXTRACT(
8784
CelFunctionDecl.newFunctionDeclaration(
88-
REGEX_CAPTURE_FUNCTION,
85+
REGEX_EXTRACT_FUNCTION,
8986
CelOverloadDecl.newGlobalOverload(
90-
"regex_capture_string_string",
87+
"regex_extract_string_string",
9188
"Returns the first substring that matches the regex.",
9289
OptionalType.create(SimpleType.STRING),
9390
SimpleType.STRING,
9491
SimpleType.STRING)),
9592
ImmutableSet.of(
9693
CelFunctionBinding.from(
97-
"regex_capture_string_string",
94+
"regex_extract_string_string",
9895
String.class,
9996
String.class,
100-
CelRegexExtensions::captureFirstMatch))),
101-
CAPTUREALL(
97+
CelRegexExtensions::extract))),
98+
EXTRACTALL(
10299
CelFunctionDecl.newFunctionDeclaration(
103-
REGEX_CAPTUREALL_FUNCTION,
100+
REGEX_EXTRACT_ALL_FUNCTION,
104101
CelOverloadDecl.newGlobalOverload(
105-
"regex_captureAll_string_string",
106-
"Returns an arrat of all substrings that match the regex.",
102+
"regex_extractAll_string_string",
103+
"Returns an array of all substrings that match the regex.",
107104
ListType.create(SimpleType.STRING),
108105
SimpleType.STRING,
109106
SimpleType.STRING)),
110107
ImmutableSet.of(
111108
CelFunctionBinding.from(
112-
"regex_captureAll_string_string",
109+
"regex_extractAll_string_string",
113110
String.class,
114111
String.class,
115-
CelRegexExtensions::captureAllMatches))),
116-
CAPTUREALLNAMED(
117-
CelFunctionDecl.newFunctionDeclaration(
118-
REGEX_CAPTUREALLNAMED_FUNCTION,
119-
CelOverloadDecl.newGlobalOverload(
120-
"regex_captureAllNamed_string_string",
121-
"Returns a map of all named captured groups as <named_group_name, captured_string>."
122-
+ " Ignores the unnamed capture groups.",
123-
MapType.create(SimpleType.STRING, SimpleType.STRING),
124-
SimpleType.STRING,
125-
SimpleType.STRING)),
126-
ImmutableSet.of(
127-
CelFunctionBinding.from(
128-
"regex_captureAllNamed_string_string",
129-
String.class,
130-
String.class,
131-
CelRegexExtensions::captureAllNamedGroups)));
112+
CelRegexExtensions::extractAll)));
132113

133114
private final CelFunctionDecl functionDecl;
134115
private final ImmutableSet<CelFunctionBinding> functionBindings;
@@ -200,67 +181,49 @@ private static String replace(String target, String regex, String replaceStr, lo
200181
return sb.toString();
201182
}
202183

203-
private static Optional<String> captureFirstMatch(String target, String regex) {
184+
private static Optional<String> extract(String target, String regex) {
204185
Pattern pattern = compileRegexPattern(regex);
205186
Matcher matcher = pattern.matcher(target);
206187

207-
if (matcher.find()) {
208-
// If there are capture groups, return the first one.
209-
if (matcher.groupCount() > 0) {
210-
return Optional.ofNullable(matcher.group(1));
211-
} else {
212-
// If there are no capture groups, return the entire match.
213-
return Optional.of(matcher.group(0));
214-
}
188+
if (!matcher.find()) {
189+
return Optional.empty();
215190
}
216191

217-
return Optional.empty();
218-
}
219-
220-
private static ImmutableList<String> captureAllMatches(String target, String regex) {
221-
Pattern pattern = compileRegexPattern(regex);
222-
223-
Matcher matcher = pattern.matcher(target);
224-
ImmutableList.Builder<String> builder = ImmutableList.builder();
225-
226-
while (matcher.find()) {
227-
// If there are capture groups, return all of them. Otherwise, return the entire match.
228-
if (matcher.groupCount() > 0) {
229-
// Add all the capture groups to the result list.
230-
for (int i = 1; i <= matcher.groupCount(); i++) {
231-
String group = matcher.group(i);
232-
if (group != null) {
233-
builder.add(group);
234-
}
235-
}
236-
} else {
237-
builder.add(matcher.group(0));
238-
}
192+
int groupCount = matcher.groupCount();
193+
if (groupCount > 1) {
194+
throw new IllegalArgumentException(
195+
"Regular expression has more than one capturing group: " + regex);
239196
}
240197

241-
return builder.build();
198+
String result = (groupCount == 1) ? matcher.group(1) : matcher.group(0);
199+
200+
return Optional.ofNullable(result);
242201
}
243202

244-
private static ImmutableMap<String, String> captureAllNamedGroups(String target, String regex) {
245-
ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
203+
private static ImmutableList<String> extractAll(String target, String regex) {
246204
Pattern pattern = compileRegexPattern(regex);
205+
Matcher matcher = pattern.matcher(target);
247206

248-
Set<String> groupNames = pattern.namedGroups().keySet();
249-
if (groupNames.isEmpty()) {
250-
return builder.buildOrThrow();
207+
if (matcher.groupCount() > 1) {
208+
throw new IllegalArgumentException(
209+
"Regular expression has more than one capturing group: " + regex);
251210
}
252211

253-
Matcher matcher = pattern.matcher(target);
212+
ImmutableList.Builder<String> builder = ImmutableList.builder();
213+
boolean hasOneGroup = matcher.groupCount() == 1;
254214

255215
while (matcher.find()) {
256-
257-
for (String groupName : groupNames) {
258-
String capturedValue = matcher.group(groupName);
259-
if (capturedValue != null) {
260-
builder.put(groupName, capturedValue);
216+
if (hasOneGroup) {
217+
String group = matcher.group(1);
218+
// Add the captured group's content only if it's not null (e.g. optional group didn't match)
219+
if (group != null) {
220+
builder.add(group);
261221
}
222+
} else { // No capturing groups (matcher.groupCount() == 0)
223+
builder.add(matcher.group(0));
262224
}
263225
}
264-
return builder.buildOrThrow();
226+
227+
return builder.build();
265228
}
266229
}

extensions/src/main/java/dev/cel/extensions/README.md

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,4 +703,81 @@ Examples:
703703
```
704704
lists.range(5) -> [0, 1, 2, 3, 4]
705705
lists.range(0) -> []
706+
```
707+
708+
## Regex
709+
710+
Regex introduces support for regular expressions in CEL.
711+
712+
This library provides functions for capturing groups, replacing strings using
713+
regex patterns, Regex configures namespaced regex helper functions. Note, all
714+
functions use the 'regex' namespace. If you are currently using a variable named
715+
'regex', the macro will likely work just as intended; however, there is some
716+
chance for collision.
717+
718+
### Replace
719+
720+
The `regex.replace` function replaces all occurrences of a regex pattern in a
721+
string with a replacement string. Optionally, you can limit the number of
722+
replacements by providing a count argument. Both numeric ($N) and named
723+
(${name}) capture group references are supported in the replacement string, with
724+
validation for correctness. An error will be thrown for invalid regex or replace
725+
string.
726+
727+
```
728+
regex.replace(target: string, pattern: string, replacement: string) -> string
729+
regex.replace(target: string, pattern: string, replacement: string, count: int) -> string
730+
```
731+
732+
Examples:
733+
734+
```
735+
regex.replace('banana', 'a', 'x', 0) == 'banana'
736+
regex.replace('banana', 'a', 'x', 1) == 'bxnana'
737+
regex.replace('banana', 'a', 'x', 2) == 'bxnxna'
738+
regex.replace('foo bar', '(fo)o (ba)r', '$2 $1') == 'ba fo'
739+
740+
regex.replace('test', '(.)', '$2') \\ Runtime Error invalid replace string
741+
regex.replace('foo bar', '(', '$2 $1') \\ Runtime Error invalid regex string
742+
regex.replace('id=123', 'id=(?P<value>\\\\d+)', 'value: ${values}') \\ Runtime Error invalid replace string
743+
744+
```
745+
746+
### Extract
747+
748+
The `regex.extract` function returns the first match of a regex pattern in a
749+
string. If no match is found, it returns an optional none value. An error will
750+
be thrown for invalid regex or for multiple capture groups.
751+
752+
```
753+
regex.extract(target: string, pattern: string) -> optional<string>
754+
```
755+
756+
Examples:
757+
758+
```
759+
regex.extract('hello world', 'hello(.*)') == optional.of(' world')
760+
regex.extract('item-A, item-B', 'item-(\\w+)') == optional.of('A')
761+
regex.extract('HELLO', 'hello') == optional.empty()
762+
763+
regex.extract('testuser@testdomain', '(.*)@([^.]*)')) \\ Runtime Error multiple extract group
764+
```
765+
766+
### Extract All
767+
768+
The `regex.extractAll` function returns a list of all matches of a regex
769+
pattern in a target string. If no matches are found, it returns an empty list.
770+
An error will be thrown for invalid regex or for multiple capture groups.
771+
772+
```
773+
regex.extractAll(target: string, pattern: string) -> list<string>
774+
```
775+
776+
Examples:
777+
778+
```
779+
regex.extractAll('id:123, id:456', 'id:\\d+') == ['id:123', 'id:456']
780+
regex.extractAll('id:123, id:456', 'assa') == []
781+
782+
regex.extractAll('testuser@testdomain', '(.*)@([^.]*)') \\ Runtime Error multiple capture group
706783
```

extensions/src/test/java/dev/cel/extensions/CelExtensionsTest.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,7 @@ public void getAllFunctionNames() {
180180
"flatten",
181181
"lists.range",
182182
"regex.replace",
183-
"regex.capture",
184-
"regex.captureAll",
185-
"regex.captureAllNamed");
183+
"regex.extract",
184+
"regex.extractAll");
186185
}
187186
}

0 commit comments

Comments
 (0)