Skip to content

Commit b189242

Browse files
authored
refactor: try_parse_re for repetition logic (#79)
* refactor: try_parse_re for repetition logic * fix: clippy
1 parent 6d752e7 commit b189242

File tree

8 files changed

+117
-120
lines changed

8 files changed

+117
-120
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,4 @@ jobs:
7676
override: true
7777
- uses: Swatinem/rust-cache@v2
7878
- run: rustup component add clippy
79-
- run: cargo clippy -- -D warnings
79+
- run: cargo clippy --all-targets -- -D warnings

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "promql-parser"
33
readme = "README.md"
44
description = "Parse PromQL query into AST"
55
repository = "https://github.com/GreptimeTeam/promql-parser"
6-
version = "0.3.0"
6+
version = "0.3.1"
77
edition = "2021"
88
authors = ["The GreptimeDB Project Developers"]
99
keywords = ["prometheus", "promql", "parser"]

src/label/matcher.rs

Lines changed: 82 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use std::hash::{Hash, Hasher};
1717

1818
use regex::Regex;
1919

20-
use crate::parser::token::{TokenId, T_EQL, T_EQL_REGEX, T_NEQ, T_NEQ_REGEX};
20+
use crate::parser::token::{token_display, TokenId, T_EQL, T_EQL_REGEX, T_NEQ, T_NEQ_REGEX};
2121
use crate::util::join_vector;
2222

2323
#[derive(Debug, Clone)]
@@ -95,80 +95,19 @@ impl Matcher {
9595
// in Go the following is valid: `aaa{bbb}ccc`
9696
// in Rust {bbb} is seen as an invalid repeat and must be ecaped \{bbb}
9797
// This escapes the opening { if its not followed by valid repeat pattern (e.g. 4,6).
98-
fn convert_re(re: &str) -> String {
99-
// (true, string) if its a valid repeat pattern (e.g. 1,2 or 2,)
100-
fn is_repeat(chars: &mut std::str::Chars<'_>) -> (bool, String) {
101-
let mut buf = String::new();
102-
let mut comma = false;
103-
for c in chars.by_ref() {
104-
buf.push(c);
105-
106-
if c == ',' {
107-
// two commas or {, are both invalid
108-
if comma || buf == "," {
109-
return (false, buf);
110-
} else {
111-
comma = true;
112-
}
113-
} else if c.is_ascii_digit() {
114-
continue;
115-
} else if c == '}' {
116-
if buf == "}" {
117-
return (false, buf);
118-
} else {
119-
return (true, buf);
120-
}
121-
} else {
122-
return (false, buf);
123-
}
124-
}
125-
(false, buf)
126-
}
127-
128-
let mut result = String::new();
129-
let mut chars = re.chars();
130-
131-
while let Some(c) = chars.next() {
132-
if c != '{' {
133-
result.push(c);
134-
}
135-
136-
// if escaping, just push the next char as well
137-
if c == '\\' {
138-
if let Some(c) = chars.next() {
139-
result.push(c);
140-
}
141-
} else if c == '{' {
142-
match is_repeat(&mut chars) {
143-
(true, s) => {
144-
result.push('{');
145-
result.push_str(&s);
146-
}
147-
(false, s) => {
148-
result.push_str(r"\{");
149-
result.push_str(&s);
150-
}
151-
}
152-
}
153-
}
154-
result
98+
fn try_parse_re(re: &str) -> Result<Regex, String> {
99+
Regex::new(re)
100+
.or_else(|_| Regex::new(&try_escape_for_repeat_re(re)))
101+
.map_err(|_| format!("illegal regex for {re}",))
155102
}
156103

157104
pub fn new_matcher(id: TokenId, name: String, value: String) -> Result<Matcher, String> {
158105
let op = match id {
159106
T_EQL => Ok(MatchOp::Equal),
160107
T_NEQ => Ok(MatchOp::NotEqual),
161-
T_EQL_REGEX => {
162-
let value = Matcher::convert_re(&value);
163-
let re = Regex::new(&value).map_err(|_| format!("illegal regex for {}", &value))?;
164-
Ok(MatchOp::Re(re))
165-
}
166-
T_NEQ_REGEX => {
167-
let value = Matcher::convert_re(&value);
168-
let re = Regex::new(&value).map_err(|_| format!("illegal regex for {}", &value))?;
169-
Ok(MatchOp::NotRe(re))
170-
}
171-
_ => Err(format!("invalid match op {id}")),
108+
T_EQL_REGEX => Ok(MatchOp::Re(Matcher::try_parse_re(&value)?)),
109+
T_NEQ_REGEX => Ok(MatchOp::NotRe(Matcher::try_parse_re(&value)?)),
110+
_ => Err(format!("invalid match op {}", token_display(id))),
172111
};
173112

174113
op.map(|op| Matcher { op, name, value })
@@ -181,6 +120,64 @@ impl fmt::Display for Matcher {
181120
}
182121
}
183122

123+
// Go and Rust handle the repeat pattern differently
124+
// in Go the following is valid: `aaa{bbb}ccc`
125+
// in Rust {bbb} is seen as an invalid repeat and must be ecaped \{bbb}
126+
// This escapes the opening { if its not followed by valid repeat pattern (e.g. 4,6).
127+
fn try_escape_for_repeat_re(re: &str) -> String {
128+
fn is_repeat(chars: &mut std::str::Chars<'_>) -> (bool, String) {
129+
let mut buf = String::new();
130+
let mut comma_seen = false;
131+
for c in chars.by_ref() {
132+
buf.push(c);
133+
match c {
134+
',' if comma_seen => {
135+
return (false, buf); // ,, is invalid
136+
}
137+
',' if buf == "," => {
138+
return (false, buf); // {, is invalid
139+
}
140+
',' if !comma_seen => comma_seen = true,
141+
'}' if buf == "}" => {
142+
return (false, buf); // {} is invalid
143+
}
144+
'}' => {
145+
return (true, buf);
146+
}
147+
_ if c.is_ascii_digit() => continue,
148+
_ => {
149+
return (false, buf); // false if visit non-digit char
150+
}
151+
}
152+
}
153+
(false, buf) // not ended with }
154+
}
155+
156+
let mut result = String::with_capacity(re.len() + 1);
157+
let mut chars = re.chars();
158+
159+
while let Some(c) = chars.next() {
160+
match c {
161+
'\\' => {
162+
if let Some(cc) = chars.next() {
163+
result.push(c);
164+
result.push(cc);
165+
}
166+
}
167+
'{' => {
168+
let (is, s) = is_repeat(&mut chars);
169+
if !is {
170+
result.push('\\');
171+
}
172+
result.push(c);
173+
result.push_str(&s);
174+
}
175+
_ => result.push(c),
176+
}
177+
}
178+
result
179+
}
180+
184181
#[derive(Debug, Clone, PartialEq, Eq)]
185182
pub struct Matchers {
186183
pub matchers: Vec<Matcher>,
@@ -260,7 +257,7 @@ mod tests {
260257
fn test_new_matcher() {
261258
assert_eq!(
262259
Matcher::new_matcher(token::T_ADD, "".into(), "".into()),
263-
Err(format!("invalid match op {}", token::T_ADD))
260+
Err(format!("invalid match op {}", token_display(token::T_ADD)))
264261
)
265262
}
266263

@@ -386,7 +383,7 @@ mod tests {
386383
#[test]
387384
fn test_matcher_re() {
388385
let value = "api/v1/.*";
389-
let re = Regex::new(&value).unwrap();
386+
let re = Regex::new(value).unwrap();
390387
let op = MatchOp::Re(re);
391388
let matcher = Matcher::new(op, "name", value);
392389
assert!(matcher.is_match("api/v1/query"));
@@ -533,20 +530,19 @@ mod tests {
533530

534531
#[test]
535532
fn test_convert_re() {
536-
let convert = |s: &str| Matcher::convert_re(s);
537-
assert_eq!(convert("abc{}"), r#"abc\{}"#);
538-
assert_eq!(convert("abc{def}"), r#"abc\{def}"#);
539-
assert_eq!(convert("abc{def"), r#"abc\{def"#);
540-
assert_eq!(convert("abc{1}"), "abc{1}");
541-
assert_eq!(convert("abc{1,}"), "abc{1,}");
542-
assert_eq!(convert("abc{1,2}"), "abc{1,2}");
543-
assert_eq!(convert("abc{,2}"), r#"abc\{,2}"#);
544-
assert_eq!(convert("abc{{1,2}}"), r#"abc\{{1,2}}"#);
545-
assert_eq!(convert(r#"abc\{abc"#), r#"abc\{abc"#);
546-
assert_eq!(convert("abc{1a}"), r#"abc\{1a}"#);
547-
assert_eq!(convert("abc{1,a}"), r#"abc\{1,a}"#);
548-
assert_eq!(convert("abc{1,2a}"), r#"abc\{1,2a}"#);
549-
assert_eq!(convert("abc{1,2,3}"), r#"abc\{1,2,3}"#);
550-
assert_eq!(convert("abc{1,,2}"), r#"abc\{1,,2}"#);
533+
assert_eq!(try_escape_for_repeat_re("abc{}"), r"abc\{}");
534+
assert_eq!(try_escape_for_repeat_re("abc{def}"), r"abc\{def}");
535+
assert_eq!(try_escape_for_repeat_re("abc{def"), r"abc\{def");
536+
assert_eq!(try_escape_for_repeat_re("abc{1}"), "abc{1}");
537+
assert_eq!(try_escape_for_repeat_re("abc{1,}"), "abc{1,}");
538+
assert_eq!(try_escape_for_repeat_re("abc{1,2}"), "abc{1,2}");
539+
assert_eq!(try_escape_for_repeat_re("abc{,2}"), r"abc\{,2}");
540+
assert_eq!(try_escape_for_repeat_re("abc{{1,2}}"), r"abc\{{1,2}}");
541+
assert_eq!(try_escape_for_repeat_re(r"abc\{abc"), r"abc\{abc");
542+
assert_eq!(try_escape_for_repeat_re("abc{1a}"), r"abc\{1a}");
543+
assert_eq!(try_escape_for_repeat_re("abc{1,a}"), r"abc\{1,a}");
544+
assert_eq!(try_escape_for_repeat_re("abc{1,2a}"), r"abc\{1,2a}");
545+
assert_eq!(try_escape_for_repeat_re("abc{1,2,3}"), r"abc\{1,2,3}");
546+
assert_eq!(try_escape_for_repeat_re("abc{1,,2}"), r"abc\{1,,2}");
551547
}
552548
}

src/parser/ast.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1907,7 +1907,7 @@ task:errors:rate10s{job="s"}))"#,
19071907
];
19081908

19091909
for (input, expect) in cases {
1910-
let expr = crate::parser::parse(&input);
1910+
let expr = crate::parser::parse(input);
19111911
assert_eq!(expect, expr.unwrap().pretty(0, 10));
19121912
}
19131913
}
@@ -1978,7 +1978,7 @@ task:errors:rate10s{job="s"}))"#,
19781978
];
19791979

19801980
for (input, expect) in cases {
1981-
let expr = crate::parser::parse(&input);
1981+
let expr = crate::parser::parse(input);
19821982
assert_eq!(expect, expr.unwrap().pretty(0, 10));
19831983
}
19841984
}
@@ -2059,7 +2059,7 @@ task:errors:rate10s{job="s"}))"#,
20592059
];
20602060

20612061
for (input, expect) in cases {
2062-
let expr = crate::parser::parse(&input);
2062+
let expr = crate::parser::parse(input);
20632063
assert_eq!(expect, expr.unwrap().pretty(0, 10));
20642064
}
20652065
}
@@ -2202,7 +2202,7 @@ task:errors:rate10s{job="s"}))"#,
22022202
];
22032203

22042204
for (input, expect) in cases {
2205-
let expr = crate::parser::parse(&input);
2205+
let expr = crate::parser::parse(input);
22062206
assert_eq!(expect, expr.unwrap().pretty(0, 10));
22072207
}
22082208
}
@@ -2261,7 +2261,7 @@ task:errors:rate10s{job="s"}))"#,
22612261
];
22622262

22632263
for (input, expect) in cases {
2264-
let expr = crate::parser::parse(&input);
2264+
let expr = crate::parser::parse(input);
22652265
assert_eq!(expect, expr.unwrap().pretty(0, 10));
22662266
}
22672267
}
@@ -2403,7 +2403,7 @@ or
24032403
];
24042404

24052405
for (input, expect) in cases {
2406-
let expr = crate::parser::parse(&input);
2406+
let expr = crate::parser::parse(input);
24072407
assert_eq!(expect, expr.unwrap().pretty(0, 10));
24082408
}
24092409
}
@@ -2417,7 +2417,7 @@ or
24172417
];
24182418

24192419
for (input, expect) in cases {
2420-
let expr = crate::parser::parse(&input);
2420+
let expr = crate::parser::parse(input);
24212421
assert_eq!(expect, expr.unwrap().pretty(0, 10));
24222422
}
24232423
}
@@ -2437,7 +2437,7 @@ or
24372437
];
24382438

24392439
for (input, expect) in cases {
2440-
assert_eq!(expect, crate::parser::parse(&input).unwrap().prettify());
2440+
assert_eq!(expect, crate::parser::parse(input).unwrap().prettify());
24412441
}
24422442
}
24432443
}

src/parser/lex.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -703,25 +703,26 @@ mod tests {
703703
/// - MatchTuple.2 is the Err info if the input is invalid PromQL query
704704
type MatchTuple = (&'static str, Vec<LexemeTuple>, Option<&'static str>);
705705

706+
type Case = (
707+
&'static str,
708+
Vec<Result<LexemeType, String>>,
709+
Vec<Result<LexemeType, String>>,
710+
);
711+
706712
fn assert_matches(v: Vec<MatchTuple>) {
707-
let cases: Vec<(
708-
&str,
709-
Vec<Result<LexemeType, String>>,
710-
Vec<Result<LexemeType, String>>,
711-
)> = v
713+
let cases: Vec<Case> = v
712714
.into_iter()
713715
.map(|(input, lexemes, err)| {
714716
let mut expected: Vec<Result<LexemeType, String>> = lexemes
715717
.into_iter()
716718
.map(|(token_id, start, len)| Ok(LexemeType::new(token_id, start, len)))
717719
.collect();
718720

719-
if err.is_some() {
720-
expected.push(Err(err.unwrap().to_string()));
721+
if let Some(s) = err {
722+
expected.push(Err(s.to_string()));
721723
}
722724

723725
let actual: Vec<Result<LexemeType, String>> = Lexer::new(input)
724-
.into_iter()
725726
// in lex test cases, we don't compare the EOF token
726727
.filter(|r| !matches!(r, Ok(l) if l.tok_id() == T_EOF))
727728
.collect();

0 commit comments

Comments
 (0)