@@ -17,7 +17,7 @@ use std::hash::{Hash, Hasher};
17
17
18
18
use regex:: Regex ;
19
19
20
- use crate :: parser:: token:: { TokenId , T_EQL , T_EQL_REGEX , T_NEQ , T_NEQ_REGEX } ;
20
+ use crate :: parser:: token:: { token_display , TokenId , T_EQL , T_EQL_REGEX , T_NEQ , T_NEQ_REGEX } ;
21
21
use crate :: util:: join_vector;
22
22
23
23
#[ derive( Debug , Clone ) ]
@@ -95,80 +95,19 @@ impl Matcher {
95
95
// in Go the following is valid: `aaa{bbb}ccc`
96
96
// in Rust {bbb} is seen as an invalid repeat and must be ecaped \{bbb}
97
97
// This escapes the opening { if its not followed by valid repeat pattern (e.g. 4,6).
98
- fn convert_re ( re : & str ) -> String {
99
- // (true, string) if its a valid repeat pattern (e.g. 1,2 or 2,)
100
- fn is_repeat ( chars : & mut std:: str:: Chars < ' _ > ) -> ( bool , String ) {
101
- let mut buf = String :: new ( ) ;
102
- let mut comma = false ;
103
- for c in chars. by_ref ( ) {
104
- buf. push ( c) ;
105
-
106
- if c == ',' {
107
- // two commas or {, are both invalid
108
- if comma || buf == "," {
109
- return ( false , buf) ;
110
- } else {
111
- comma = true ;
112
- }
113
- } else if c. is_ascii_digit ( ) {
114
- continue ;
115
- } else if c == '}' {
116
- if buf == "}" {
117
- return ( false , buf) ;
118
- } else {
119
- return ( true , buf) ;
120
- }
121
- } else {
122
- return ( false , buf) ;
123
- }
124
- }
125
- ( false , buf)
126
- }
127
-
128
- let mut result = String :: new ( ) ;
129
- let mut chars = re. chars ( ) ;
130
-
131
- while let Some ( c) = chars. next ( ) {
132
- if c != '{' {
133
- result. push ( c) ;
134
- }
135
-
136
- // if escaping, just push the next char as well
137
- if c == '\\' {
138
- if let Some ( c) = chars. next ( ) {
139
- result. push ( c) ;
140
- }
141
- } else if c == '{' {
142
- match is_repeat ( & mut chars) {
143
- ( true , s) => {
144
- result. push ( '{' ) ;
145
- result. push_str ( & s) ;
146
- }
147
- ( false , s) => {
148
- result. push_str ( r"\{" ) ;
149
- result. push_str ( & s) ;
150
- }
151
- }
152
- }
153
- }
154
- result
98
+ fn try_parse_re ( re : & str ) -> Result < Regex , String > {
99
+ Regex :: new ( re)
100
+ . or_else ( |_| Regex :: new ( & try_escape_for_repeat_re ( re) ) )
101
+ . map_err ( |_| format ! ( "illegal regex for {re}" , ) )
155
102
}
156
103
157
104
pub fn new_matcher ( id : TokenId , name : String , value : String ) -> Result < Matcher , String > {
158
105
let op = match id {
159
106
T_EQL => Ok ( MatchOp :: Equal ) ,
160
107
T_NEQ => Ok ( MatchOp :: NotEqual ) ,
161
- T_EQL_REGEX => {
162
- let value = Matcher :: convert_re ( & value) ;
163
- let re = Regex :: new ( & value) . map_err ( |_| format ! ( "illegal regex for {}" , & value) ) ?;
164
- Ok ( MatchOp :: Re ( re) )
165
- }
166
- T_NEQ_REGEX => {
167
- let value = Matcher :: convert_re ( & value) ;
168
- let re = Regex :: new ( & value) . map_err ( |_| format ! ( "illegal regex for {}" , & value) ) ?;
169
- Ok ( MatchOp :: NotRe ( re) )
170
- }
171
- _ => Err ( format ! ( "invalid match op {id}" ) ) ,
108
+ T_EQL_REGEX => Ok ( MatchOp :: Re ( Matcher :: try_parse_re ( & value) ?) ) ,
109
+ T_NEQ_REGEX => Ok ( MatchOp :: NotRe ( Matcher :: try_parse_re ( & value) ?) ) ,
110
+ _ => Err ( format ! ( "invalid match op {}" , token_display( id) ) ) ,
172
111
} ;
173
112
174
113
op. map ( |op| Matcher { op, name, value } )
@@ -181,6 +120,64 @@ impl fmt::Display for Matcher {
181
120
}
182
121
}
183
122
123
+ // Go and Rust handle the repeat pattern differently
124
+ // in Go the following is valid: `aaa{bbb}ccc`
125
+ // in Rust {bbb} is seen as an invalid repeat and must be ecaped \{bbb}
126
+ // This escapes the opening { if its not followed by valid repeat pattern (e.g. 4,6).
127
+ fn try_escape_for_repeat_re ( re : & str ) -> String {
128
+ fn is_repeat ( chars : & mut std:: str:: Chars < ' _ > ) -> ( bool , String ) {
129
+ let mut buf = String :: new ( ) ;
130
+ let mut comma_seen = false ;
131
+ for c in chars. by_ref ( ) {
132
+ buf. push ( c) ;
133
+ match c {
134
+ ',' if comma_seen => {
135
+ return ( false , buf) ; // ,, is invalid
136
+ }
137
+ ',' if buf == "," => {
138
+ return ( false , buf) ; // {, is invalid
139
+ }
140
+ ',' if !comma_seen => comma_seen = true ,
141
+ '}' if buf == "}" => {
142
+ return ( false , buf) ; // {} is invalid
143
+ }
144
+ '}' => {
145
+ return ( true , buf) ;
146
+ }
147
+ _ if c. is_ascii_digit ( ) => continue ,
148
+ _ => {
149
+ return ( false , buf) ; // false if visit non-digit char
150
+ }
151
+ }
152
+ }
153
+ ( false , buf) // not ended with }
154
+ }
155
+
156
+ let mut result = String :: with_capacity ( re. len ( ) + 1 ) ;
157
+ let mut chars = re. chars ( ) ;
158
+
159
+ while let Some ( c) = chars. next ( ) {
160
+ match c {
161
+ '\\' => {
162
+ if let Some ( cc) = chars. next ( ) {
163
+ result. push ( c) ;
164
+ result. push ( cc) ;
165
+ }
166
+ }
167
+ '{' => {
168
+ let ( is, s) = is_repeat ( & mut chars) ;
169
+ if !is {
170
+ result. push ( '\\' ) ;
171
+ }
172
+ result. push ( c) ;
173
+ result. push_str ( & s) ;
174
+ }
175
+ _ => result. push ( c) ,
176
+ }
177
+ }
178
+ result
179
+ }
180
+
184
181
#[ derive( Debug , Clone , PartialEq , Eq ) ]
185
182
pub struct Matchers {
186
183
pub matchers : Vec < Matcher > ,
@@ -260,7 +257,7 @@ mod tests {
260
257
fn test_new_matcher ( ) {
261
258
assert_eq ! (
262
259
Matcher :: new_matcher( token:: T_ADD , "" . into( ) , "" . into( ) ) ,
263
- Err ( format!( "invalid match op {}" , token:: T_ADD ) )
260
+ Err ( format!( "invalid match op {}" , token_display ( token:: T_ADD ) ) )
264
261
)
265
262
}
266
263
@@ -386,7 +383,7 @@ mod tests {
386
383
#[ test]
387
384
fn test_matcher_re ( ) {
388
385
let value = "api/v1/.*" ;
389
- let re = Regex :: new ( & value) . unwrap ( ) ;
386
+ let re = Regex :: new ( value) . unwrap ( ) ;
390
387
let op = MatchOp :: Re ( re) ;
391
388
let matcher = Matcher :: new ( op, "name" , value) ;
392
389
assert ! ( matcher. is_match( "api/v1/query" ) ) ;
@@ -533,20 +530,19 @@ mod tests {
533
530
534
531
#[ test]
535
532
fn test_convert_re ( ) {
536
- let convert = |s : & str | Matcher :: convert_re ( s) ;
537
- assert_eq ! ( convert( "abc{}" ) , r#"abc\{}"# ) ;
538
- assert_eq ! ( convert( "abc{def}" ) , r#"abc\{def}"# ) ;
539
- assert_eq ! ( convert( "abc{def" ) , r#"abc\{def"# ) ;
540
- assert_eq ! ( convert( "abc{1}" ) , "abc{1}" ) ;
541
- assert_eq ! ( convert( "abc{1,}" ) , "abc{1,}" ) ;
542
- assert_eq ! ( convert( "abc{1,2}" ) , "abc{1,2}" ) ;
543
- assert_eq ! ( convert( "abc{,2}" ) , r#"abc\{,2}"# ) ;
544
- assert_eq ! ( convert( "abc{{1,2}}" ) , r#"abc\{{1,2}}"# ) ;
545
- assert_eq ! ( convert( r#"abc\{abc"# ) , r#"abc\{abc"# ) ;
546
- assert_eq ! ( convert( "abc{1a}" ) , r#"abc\{1a}"# ) ;
547
- assert_eq ! ( convert( "abc{1,a}" ) , r#"abc\{1,a}"# ) ;
548
- assert_eq ! ( convert( "abc{1,2a}" ) , r#"abc\{1,2a}"# ) ;
549
- assert_eq ! ( convert( "abc{1,2,3}" ) , r#"abc\{1,2,3}"# ) ;
550
- assert_eq ! ( convert( "abc{1,,2}" ) , r#"abc\{1,,2}"# ) ;
533
+ assert_eq ! ( try_escape_for_repeat_re( "abc{}" ) , r"abc\{}" ) ;
534
+ assert_eq ! ( try_escape_for_repeat_re( "abc{def}" ) , r"abc\{def}" ) ;
535
+ assert_eq ! ( try_escape_for_repeat_re( "abc{def" ) , r"abc\{def" ) ;
536
+ assert_eq ! ( try_escape_for_repeat_re( "abc{1}" ) , "abc{1}" ) ;
537
+ assert_eq ! ( try_escape_for_repeat_re( "abc{1,}" ) , "abc{1,}" ) ;
538
+ assert_eq ! ( try_escape_for_repeat_re( "abc{1,2}" ) , "abc{1,2}" ) ;
539
+ assert_eq ! ( try_escape_for_repeat_re( "abc{,2}" ) , r"abc\{,2}" ) ;
540
+ assert_eq ! ( try_escape_for_repeat_re( "abc{{1,2}}" ) , r"abc\{{1,2}}" ) ;
541
+ assert_eq ! ( try_escape_for_repeat_re( r"abc\{abc" ) , r"abc\{abc" ) ;
542
+ assert_eq ! ( try_escape_for_repeat_re( "abc{1a}" ) , r"abc\{1a}" ) ;
543
+ assert_eq ! ( try_escape_for_repeat_re( "abc{1,a}" ) , r"abc\{1,a}" ) ;
544
+ assert_eq ! ( try_escape_for_repeat_re( "abc{1,2a}" ) , r"abc\{1,2a}" ) ;
545
+ assert_eq ! ( try_escape_for_repeat_re( "abc{1,2,3}" ) , r"abc\{1,2,3}" ) ;
546
+ assert_eq ! ( try_escape_for_repeat_re( "abc{1,,2}" ) , r"abc\{1,,2}" ) ;
551
547
}
552
548
}
0 commit comments