Skip to content

Commit 96de8f7

Browse files
author
Hana Dusíková
committed
fix for #77
- added support for \1...\9 backreferences - backslash and character without meaning is now hard error
1 parent 274202f commit 96de8f7

File tree

5 files changed

+21
-8
lines changed

5 files changed

+21
-8
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ ctre::match<"REGEX">(subject); // C++20
1818
* Matching
1919
* Searching
2020
* Capturing content (named captures are supported too)
21-
* Back-Reference (only \g{N} syntax)
21+
* Back-Reference (\g{N} syntax, and \1...\9 syntax too)
2222

2323
The library is implementing most of the PCRE syntax with a few exceptions:
2424

include/ctre/pcre.gram

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ equal_sign={=}
2222
exclamation_mark={!}
2323

2424
escape_alphanum={b,c,h,i,j,k,l,m,o,q,v,y,z,A,B,C,E,F,G,H,I,J,K,L,M,O,Q,U,V,X,Y,Z,1,2,3,4,5,6,7,8,9}
25+
escape_backreference={1,2,3,4,5,6,7,8,9}
2526
hexdec={0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f,A,B,C,D,E,F}
2627
octal={0,1,2,3,4,5,6,7}
2728
nonspecial_characters={a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,0,1,2,3,4,5,6,7,8,9,\,,:,!,=,_}
@@ -110,7 +111,7 @@ range->minus,backslash,<backslash_range>,[make_range]|minus,nonspecial_character
110111

111112
backslash_set-><special>,[push_character]
112113
special->dot|sopen|sclose|copen|cclose|open|close|backslash|star|plus|questionmark|pipe|caret|dolar|minus
113-
backslash_set->escape_alphanum,[push_character]
114+
backslash_set->escape_backreference,[create_number],[make_back_reference]
114115

115116
hexdec_repeat->hexdec,[push_hexdec],<hexdec_repeat>|epsilon
116117

include/ctre/pcre.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,13 @@ struct pcre {
156156
static constexpr auto rule(backslash, ctll::term<'W'>) -> ctll::push<ctll::anything, class_nonword>;
157157
static constexpr auto rule(backslash, ctll::term<'s'>) -> ctll::push<ctll::anything, class_space>;
158158
static constexpr auto rule(backslash, ctll::term<'w'>) -> ctll::push<ctll::anything, class_word>;
159+
static constexpr auto rule(backslash, ctll::set<'1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, create_number, make_back_reference>;
159160
static constexpr auto rule(backslash, ctll::term<'g'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, m>;
160161
static constexpr auto rule(backslash, ctll::term<'p'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property>;
161162
static constexpr auto rule(backslash, ctll::term<'P'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property_negative>;
162163
static constexpr auto rule(backslash, ctll::term<'u'>) -> ctll::push<ctll::anything, k>;
163164
static constexpr auto rule(backslash, ctll::term<'x'>) -> ctll::push<ctll::anything, l>;
164-
static constexpr auto rule(backslash, ctll::set<'$','\x28','\x29','*','+','-','.','?','A','B','C','E','F','G','H','I','J','K','L','M','O','Q','U','V','X','Y','Z','[','\\',']','^','b','c','h','i','j','k','l','m','o','q','v','y','z','\x7B','|','\x7D','1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, push_character>;
165+
static constexpr auto rule(backslash, ctll::set<'$','\x28','\x29','*','+','-','.','?','[','\\',']','^','\x7B','|','\x7D'>) -> ctll::push<ctll::anything, push_character>;
165166
static constexpr auto rule(backslash, ctll::term<'a'>) -> ctll::push<ctll::anything, push_character_alarm>;
166167
static constexpr auto rule(backslash, ctll::term<'e'>) -> ctll::push<ctll::anything, push_character_escape>;
167168
static constexpr auto rule(backslash, ctll::term<'f'>) -> ctll::push<ctll::anything, push_character_formfeed>;
@@ -244,12 +245,13 @@ struct pcre {
244245
static constexpr auto rule(e, ctll::term<'W'>) -> ctll::push<ctll::anything, class_nonword>;
245246
static constexpr auto rule(e, ctll::term<'s'>) -> ctll::push<ctll::anything, class_space>;
246247
static constexpr auto rule(e, ctll::term<'w'>) -> ctll::push<ctll::anything, class_word>;
248+
static constexpr auto rule(e, ctll::set<'1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, create_number, make_back_reference>;
247249
static constexpr auto rule(e, ctll::term<'p'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property>;
248250
static constexpr auto rule(e, ctll::term<'P'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property_negative>;
249251
static constexpr auto rule(e, ctll::term<'u'>) -> ctll::push<ctll::anything, k, range>;
250252
static constexpr auto rule(e, ctll::term<'x'>) -> ctll::push<ctll::anything, l, range>;
251253
static constexpr auto rule(e, ctll::term<'-'>) -> ctll::push<ctll::anything, p>;
252-
static constexpr auto rule(e, ctll::set<'$','\x28','\x29','*','+','.','?','A','B','C','E','F','G','H','I','J','K','L','M','O','Q','U','V','X','Y','Z','[','\\',']','^','b','c','h','i','j','k','l','m','o','q','v','y','z','\x7B','|','\x7D','1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, push_character>;
254+
static constexpr auto rule(e, ctll::set<'$','\x28','\x29','*','+','.','?','[','\\',']','^','\x7B','|','\x7D'>) -> ctll::push<ctll::anything, push_character>;
253255
static constexpr auto rule(e, ctll::term<'a'>) -> ctll::push<ctll::anything, push_character_alarm, range>;
254256
static constexpr auto rule(e, ctll::term<'e'>) -> ctll::push<ctll::anything, push_character_escape, range>;
255257
static constexpr auto rule(e, ctll::term<'f'>) -> ctll::push<ctll::anything, push_character_formfeed, range>;
@@ -265,11 +267,12 @@ struct pcre {
265267
static constexpr auto rule(f, ctll::term<'W'>) -> ctll::push<ctll::anything, class_nonword>;
266268
static constexpr auto rule(f, ctll::term<'s'>) -> ctll::push<ctll::anything, class_space>;
267269
static constexpr auto rule(f, ctll::term<'w'>) -> ctll::push<ctll::anything, class_word>;
270+
static constexpr auto rule(f, ctll::set<'1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, create_number, make_back_reference>;
268271
static constexpr auto rule(f, ctll::term<'p'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property>;
269272
static constexpr auto rule(f, ctll::term<'P'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property_negative>;
270273
static constexpr auto rule(f, ctll::term<'u'>) -> ctll::push<ctll::anything, k, range>;
271274
static constexpr auto rule(f, ctll::term<'x'>) -> ctll::push<ctll::anything, l, range>;
272-
static constexpr auto rule(f, ctll::set<'$','\x28','\x29','*','+','.','?','A','B','C','E','F','G','H','I','J','K','L','M','O','Q','U','V','X','Y','Z','[','\\',']','^','b','c','h','i','j','k','l','m','o','q','v','y','z','\x7B','|','\x7D','1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, push_character>;
275+
static constexpr auto rule(f, ctll::set<'$','\x28','\x29','*','+','.','?','[','\\',']','^','\x7B','|','\x7D'>) -> ctll::push<ctll::anything, push_character>;
273276
static constexpr auto rule(f, ctll::term<'a'>) -> ctll::push<ctll::anything, push_character_alarm, range>;
274277
static constexpr auto rule(f, ctll::term<'e'>) -> ctll::push<ctll::anything, push_character_escape, range>;
275278
static constexpr auto rule(f, ctll::term<'f'>) -> ctll::push<ctll::anything, push_character_formfeed, range>;

single-header/ctre.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,12 +1011,13 @@ struct pcre {
10111011
static constexpr auto rule(backslash, ctll::term<'W'>) -> ctll::push<ctll::anything, class_nonword>;
10121012
static constexpr auto rule(backslash, ctll::term<'s'>) -> ctll::push<ctll::anything, class_space>;
10131013
static constexpr auto rule(backslash, ctll::term<'w'>) -> ctll::push<ctll::anything, class_word>;
1014+
static constexpr auto rule(backslash, ctll::set<'1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, create_number, make_back_reference>;
10141015
static constexpr auto rule(backslash, ctll::term<'g'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, m>;
10151016
static constexpr auto rule(backslash, ctll::term<'p'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property>;
10161017
static constexpr auto rule(backslash, ctll::term<'P'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property_negative>;
10171018
static constexpr auto rule(backslash, ctll::term<'u'>) -> ctll::push<ctll::anything, k>;
10181019
static constexpr auto rule(backslash, ctll::term<'x'>) -> ctll::push<ctll::anything, l>;
1019-
static constexpr auto rule(backslash, ctll::set<'$','\x28','\x29','*','+','-','.','?','A','B','C','E','F','G','H','I','J','K','L','M','O','Q','U','V','X','Y','Z','[','\\',']','^','b','c','h','i','j','k','l','m','o','q','v','y','z','\x7B','|','\x7D','1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, push_character>;
1020+
static constexpr auto rule(backslash, ctll::set<'$','\x28','\x29','*','+','-','.','?','[','\\',']','^','\x7B','|','\x7D'>) -> ctll::push<ctll::anything, push_character>;
10201021
static constexpr auto rule(backslash, ctll::term<'a'>) -> ctll::push<ctll::anything, push_character_alarm>;
10211022
static constexpr auto rule(backslash, ctll::term<'e'>) -> ctll::push<ctll::anything, push_character_escape>;
10221023
static constexpr auto rule(backslash, ctll::term<'f'>) -> ctll::push<ctll::anything, push_character_formfeed>;
@@ -1099,12 +1100,13 @@ struct pcre {
10991100
static constexpr auto rule(e, ctll::term<'W'>) -> ctll::push<ctll::anything, class_nonword>;
11001101
static constexpr auto rule(e, ctll::term<'s'>) -> ctll::push<ctll::anything, class_space>;
11011102
static constexpr auto rule(e, ctll::term<'w'>) -> ctll::push<ctll::anything, class_word>;
1103+
static constexpr auto rule(e, ctll::set<'1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, create_number, make_back_reference>;
11021104
static constexpr auto rule(e, ctll::term<'p'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property>;
11031105
static constexpr auto rule(e, ctll::term<'P'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property_negative>;
11041106
static constexpr auto rule(e, ctll::term<'u'>) -> ctll::push<ctll::anything, k, range>;
11051107
static constexpr auto rule(e, ctll::term<'x'>) -> ctll::push<ctll::anything, l, range>;
11061108
static constexpr auto rule(e, ctll::term<'-'>) -> ctll::push<ctll::anything, p>;
1107-
static constexpr auto rule(e, ctll::set<'$','\x28','\x29','*','+','.','?','A','B','C','E','F','G','H','I','J','K','L','M','O','Q','U','V','X','Y','Z','[','\\',']','^','b','c','h','i','j','k','l','m','o','q','v','y','z','\x7B','|','\x7D','1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, push_character>;
1109+
static constexpr auto rule(e, ctll::set<'$','\x28','\x29','*','+','.','?','[','\\',']','^','\x7B','|','\x7D'>) -> ctll::push<ctll::anything, push_character>;
11081110
static constexpr auto rule(e, ctll::term<'a'>) -> ctll::push<ctll::anything, push_character_alarm, range>;
11091111
static constexpr auto rule(e, ctll::term<'e'>) -> ctll::push<ctll::anything, push_character_escape, range>;
11101112
static constexpr auto rule(e, ctll::term<'f'>) -> ctll::push<ctll::anything, push_character_formfeed, range>;
@@ -1120,11 +1122,12 @@ struct pcre {
11201122
static constexpr auto rule(f, ctll::term<'W'>) -> ctll::push<ctll::anything, class_nonword>;
11211123
static constexpr auto rule(f, ctll::term<'s'>) -> ctll::push<ctll::anything, class_space>;
11221124
static constexpr auto rule(f, ctll::term<'w'>) -> ctll::push<ctll::anything, class_word>;
1125+
static constexpr auto rule(f, ctll::set<'1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, create_number, make_back_reference>;
11231126
static constexpr auto rule(f, ctll::term<'p'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property>;
11241127
static constexpr auto rule(f, ctll::term<'P'>) -> ctll::push<ctll::anything, ctll::term<'\x7B'>, property_name, ctll::term<'\x7D'>, make_property_negative>;
11251128
static constexpr auto rule(f, ctll::term<'u'>) -> ctll::push<ctll::anything, k, range>;
11261129
static constexpr auto rule(f, ctll::term<'x'>) -> ctll::push<ctll::anything, l, range>;
1127-
static constexpr auto rule(f, ctll::set<'$','\x28','\x29','*','+','.','?','A','B','C','E','F','G','H','I','J','K','L','M','O','Q','U','V','X','Y','Z','[','\\',']','^','b','c','h','i','j','k','l','m','o','q','v','y','z','\x7B','|','\x7D','1','2','3','4','5','6','7','8','9'>) -> ctll::push<ctll::anything, push_character>;
1130+
static constexpr auto rule(f, ctll::set<'$','\x28','\x29','*','+','.','?','[','\\',']','^','\x7B','|','\x7D'>) -> ctll::push<ctll::anything, push_character>;
11281131
static constexpr auto rule(f, ctll::term<'a'>) -> ctll::push<ctll::anything, push_character_alarm, range>;
11291132
static constexpr auto rule(f, ctll::term<'e'>) -> ctll::push<ctll::anything, push_character_escape, range>;
11301133
static constexpr auto rule(f, ctll::term<'f'>) -> ctll::push<ctll::anything, push_character_formfeed, range>;

tests/parsing.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,9 @@ static_assert(CTRE_TEST("(?=test)"));
153153
static_assert(CTRE_TEST("^(?=.*(.)\\g{1}+.*)[a-z]+"));
154154
static_assert(CTRE_TEST("^(?=.*(a)\\g{1}.*)$"));
155155

156+
static_assert(!CTRE_TEST("\\b"));
157+
static_assert(!CTRE_TEST("\\A"));
158+
static_assert(!CTRE_TEST("[\\A]"));
159+
static_assert(CTRE_TEST("(.*)\\1"));
160+
161+

0 commit comments

Comments
 (0)