Skip to content

Commit a454654

Browse files
committed
S_scan_ident: Add a check-only option
There is a bug here in which this function is called from S_intuit_more just to see if there is an identifier in the string it is looking at. But that call can have "subtle implications on parsing" (according to the long-standing comments in it). We need a way to call scan_ident without side-effects. This commit adds that capability. The next will use it.
1 parent 9b263a2 commit a454654

File tree

1 file changed

+55
-14
lines changed

1 file changed

+55
-14
lines changed

toke.c

Lines changed: 55 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10768,13 +10768,17 @@ Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STR
1076810768
* It returns a pointer into the input buffer pointing to just after all the
1076910769
* bytes this function consumed; or croaks if an invalid identifier is found.
1077010770
*
10771-
* XXX: This function has subtle implications on parsing, and
10772-
* changing how it behaves can cause a variable to change from
10773-
* being a run time rv2sv call or a compile time binding to a
10774-
* specific variable name.
10771+
* XXX: This function normally has subtle implications on parsing, and
10772+
* changing how it behaves can cause a variable to change from being a run
10773+
* time rv2sv call or a compile time binding to a specific variable name.
1077510774
*
10776-
* Use the CHECK_UNARY flag to cause this to look for ambiguities with unary
10777-
* operators.
10775+
* However, it can be called with the CHECK_ONLY flag which keeps it from
10776+
* making any changes besides populating the memory 'dest' points to. If the
10777+
* identifier is illegal, it returns NULL instead of croaking.
10778+
*
10779+
* And use the CHECK_UNARY flag to cause this to look for ambiguities with
10780+
* unary operators. This is silently overriden if CHECK_ONLY is also
10781+
* specified.
1077810782
*/
1077910783
STATIC char *
1078010784
S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
@@ -10791,22 +10795,29 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1079110795
char * const e = dest_end - 3; /* two-character token, ending NUL */
1079210796
bool is_utf8 = cBOOL(UTF);
1079310797
line_t orig_copline = 0, tmp_copline = 0;
10794-
const bool chk_unary = (flags & CHECK_UNARY);
1079510798

10799+
/* Leave the flag in its position, so can pass this on without needing to
10800+
* anything extra */
10801+
const U32 check_only = flags & CHECK_ONLY;
10802+
10803+
const bool chk_unary = ! check_only && (flags & CHECK_UNARY);
1079610804

1079710805
if (isSPACE(*s) || !*s)
1079810806
s = skipspace(s);
1079910807

1080010808
/* See if it is a "normal" identifier */
1080110809
s = parse_ident(s, PL_bufend, &d, e, is_utf8,
10802-
(ALLOW_PACKAGE | STOP_AT_FIRST_NON_DIGIT));
10803-
d = dest;
10810+
(ALLOW_PACKAGE | STOP_AT_FIRST_NON_DIGIT | check_only));
10811+
if (s == NULL) {
10812+
return NULL;
10813+
}
1080410814

10815+
d = dest;
1080510816
if (*d) {
1080610817

1080710818
/* Here parse_ident() found a digit variable or an identifier
1080810819
(anything valid as a bareword), so job done and return. */
10809-
if (PL_lex_state != LEX_NORMAL)
10820+
if (! check_only && PL_lex_state != LEX_NORMAL)
1081010821
PL_lex_state = LEX_INTERPENDMAYBE;
1081110822
return s;
1081210823
}
@@ -10860,7 +10871,6 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1086010871
* Because all ASCII characters have the same representation whether
1086110872
* encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
1086210873
* '{' without knowing if is UTF-8 or not. */
10863-
1086410874
STRLEN advance = 1;
1086510875
if ( s < PL_bufend
1086610876
&& ( isGRAPH_A(*s)
@@ -10883,7 +10893,10 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1088310893
if (isDIGIT(*d)) {
1088410894
assert(bracket != NO_BRACE);
1088510895
s = parse_ident(s - 1, PL_bufend, &d, e, is_utf8,
10886-
STOP_AT_FIRST_NON_DIGIT);
10896+
STOP_AT_FIRST_NON_DIGIT | check_only);
10897+
if (s == NULL) {
10898+
return NULL;
10899+
}
1088710900

1088810901
/* The code below is expecting d to point to the final digit */
1088910902
d--;
@@ -10913,7 +10926,8 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1091310926
*/
1091410927

1091510928
if (bracket == NO_BRACE) {
10916-
if ( PL_lex_state == LEX_INTERPNORMAL
10929+
if ( ! check_only
10930+
&& PL_lex_state == LEX_INTERPNORMAL
1091710931
&& ! PL_lex_brackets
1091810932
&& ! intuit_more(s, PL_bufend, FROM_IDENT, NULL, 0))
1091910933
PL_lex_state = LEX_INTERPEND;
@@ -10939,7 +10953,10 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1093910953
* */
1094010954
d += advance;
1094110955
s = parse_ident(s, PL_bufend, &d, e, is_utf8,
10942-
(ALLOW_PACKAGE|CHECK_DOLLAR)|IDCONT_first_OK);
10956+
( ALLOW_PACKAGE
10957+
| CHECK_DOLLAR
10958+
| IDCONT_first_OK
10959+
| check_only));
1094310960
}
1094410961
else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
1094510962

@@ -10955,12 +10972,23 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1095510972
IDCONT_first_OK);
1095610973
}
1095710974

10975+
if (s == NULL) { /* Can't be NULL unless is check_only */
10976+
return NULL;
10977+
}
10978+
1095810979
tmp_copline = CopLINE(PL_curcop);
1095910980
if (s < PL_bufend && isSPACE(*s)) {
1096010981
s = skipspace(s);
1096110982
}
1096210983

1096310984
if (*s == '[' || (*s == '{' && strNE(dest, "sub"))) {
10985+
10986+
/* In this branch, 's' is not changed further. If only
10987+
* checking validity, return now before any state changes */
10988+
if (check_only) {
10989+
return s;
10990+
}
10991+
1096410992
/* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
1096510993

1096610994
if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
@@ -11004,6 +11032,12 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1100411032
* restore the state such that the next thing to process is the
1100511033
* opening '{' and let the parser handle it */
1100611034
s = SvPVX(PL_linestr) + bracket;
11035+
11036+
/* The final change to 's' has just been made. If only validity
11037+
* checking, return before making any state changes */
11038+
if (check_only) {
11039+
return s;
11040+
}
1100711041
CopLINE_set(PL_curcop, orig_copline);
1100811042
PL_parser->herelines = herelines;
1100911043
*dest = '\0';
@@ -11013,6 +11047,13 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1101311047
if (skip)
1101411048
s = skipspace(s);
1101511049
s++;
11050+
11051+
/* The final change to 's' has just been made. If only validity
11052+
* checking, return before making any state changes */
11053+
if (check_only) {
11054+
return s;
11055+
}
11056+
1101611057
if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
1101711058
PL_lex_state = LEX_INTERPEND;
1101811059
PL_expect = XREF;

0 commit comments

Comments
 (0)