@@ -10768,13 +10768,17 @@ Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STR
1076810768 * It returns a pointer into the input buffer pointing to just after all the
1076910769 * bytes this function consumed; or croaks if an invalid identifier is found.
1077010770 *
10771- * XXX: This function has subtle implications on parsing, and
10772- * changing how it behaves can cause a variable to change from
10773- * being a run time rv2sv call or a compile time binding to a
10774- * specific variable name.
10771+ * XXX: This function normally has subtle implications on parsing, and
10772+ * changing how it behaves can cause a variable to change from being a run
10773+ * time rv2sv call or a compile time binding to a specific variable name.
1077510774 *
10776- * Use the CHECK_UNARY flag to cause this to look for ambiguities with unary
10777- * operators.
10775+ * However, it can be called with the CHECK_ONLY flag which keeps it from
10776+ * making any changes besides populating the memory 'dest' points to. If the
10777+ * identifier is illegal, it returns NULL instead of croaking.
10778+ *
10779+ * And use the CHECK_UNARY flag to cause this to look for ambiguities with
10780+ * unary operators. This is silently overriden if CHECK_ONLY is also
10781+ * specified.
1077810782 */
1077910783STATIC char *
1078010784S_scan_ident (pTHX_ char * s , char * dest , char * dest_end , U32 flags )
@@ -10791,22 +10795,29 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1079110795 char * const e = dest_end - 3 ; /* two-character token, ending NUL */
1079210796 bool is_utf8 = cBOOL (UTF );
1079310797 line_t orig_copline = 0 , tmp_copline = 0 ;
10794- const bool chk_unary = (flags & CHECK_UNARY );
1079510798
10799+ /* Leave the flag in its position, so can pass this on without needing to
10800+ * anything extra */
10801+ const U32 check_only = flags & CHECK_ONLY ;
10802+
10803+ const bool chk_unary = ! check_only && (flags & CHECK_UNARY );
1079610804
1079710805 if (isSPACE (* s ) || !* s )
1079810806 s = skipspace (s );
1079910807
1080010808 /* See if it is a "normal" identifier */
1080110809 s = parse_ident (s , PL_bufend , & d , e , is_utf8 ,
10802- (ALLOW_PACKAGE | STOP_AT_FIRST_NON_DIGIT ));
10803- d = dest ;
10810+ (ALLOW_PACKAGE | STOP_AT_FIRST_NON_DIGIT | check_only ));
10811+ if (s == NULL ) {
10812+ return NULL ;
10813+ }
1080410814
10815+ d = dest ;
1080510816 if (* d ) {
1080610817
1080710818 /* Here parse_ident() found a digit variable or an identifier
1080810819 (anything valid as a bareword), so job done and return. */
10809- if (PL_lex_state != LEX_NORMAL )
10820+ if (! check_only && PL_lex_state != LEX_NORMAL )
1081010821 PL_lex_state = LEX_INTERPENDMAYBE ;
1081110822 return s ;
1081210823 }
@@ -10860,7 +10871,6 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1086010871 * Because all ASCII characters have the same representation whether
1086110872 * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
1086210873 * '{' without knowing if is UTF-8 or not. */
10863-
1086410874 STRLEN advance = 1 ;
1086510875 if ( s < PL_bufend
1086610876 && ( isGRAPH_A (* s )
@@ -10883,7 +10893,10 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1088310893 if (isDIGIT (* d )) {
1088410894 assert (bracket != NO_BRACE );
1088510895 s = parse_ident (s - 1 , PL_bufend , & d , e , is_utf8 ,
10886- STOP_AT_FIRST_NON_DIGIT );
10896+ STOP_AT_FIRST_NON_DIGIT | check_only );
10897+ if (s == NULL ) {
10898+ return NULL ;
10899+ }
1088710900
1088810901 /* The code below is expecting d to point to the final digit */
1088910902 d -- ;
@@ -10913,7 +10926,8 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1091310926 */
1091410927
1091510928 if (bracket == NO_BRACE ) {
10916- if ( PL_lex_state == LEX_INTERPNORMAL
10929+ if ( ! check_only
10930+ && PL_lex_state == LEX_INTERPNORMAL
1091710931 && ! PL_lex_brackets
1091810932 && ! intuit_more (s , PL_bufend , FROM_IDENT , NULL , 0 ))
1091910933 PL_lex_state = LEX_INTERPEND ;
@@ -10939,7 +10953,10 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1093910953 * */
1094010954 d += advance ;
1094110955 s = parse_ident (s , PL_bufend , & d , e , is_utf8 ,
10942- (ALLOW_PACKAGE |CHECK_DOLLAR )|IDCONT_first_OK );
10956+ ( ALLOW_PACKAGE
10957+ | CHECK_DOLLAR
10958+ | IDCONT_first_OK
10959+ | check_only ));
1094310960 }
1094410961 else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
1094510962
@@ -10955,12 +10972,23 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1095510972 IDCONT_first_OK );
1095610973 }
1095710974
10975+ if (s == NULL ) { /* Can't be NULL unless is check_only */
10976+ return NULL ;
10977+ }
10978+
1095810979 tmp_copline = CopLINE (PL_curcop );
1095910980 if (s < PL_bufend && isSPACE (* s )) {
1096010981 s = skipspace (s );
1096110982 }
1096210983
1096310984 if (* s == '[' || (* s == '{' && strNE (dest , "sub" ))) {
10985+
10986+ /* In this branch, 's' is not changed further. If only
10987+ * checking validity, return now before any state changes */
10988+ if (check_only ) {
10989+ return s ;
10990+ }
10991+
1096410992 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation. */
1096510993
1096610994 if (ckWARN (WARN_AMBIGUOUS ) && keyword (dest , d - dest , 0 )) {
@@ -11004,6 +11032,12 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1100411032 * restore the state such that the next thing to process is the
1100511033 * opening '{' and let the parser handle it */
1100611034 s = SvPVX (PL_linestr ) + bracket ;
11035+
11036+ /* The final change to 's' has just been made. If only validity
11037+ * checking, return before making any state changes */
11038+ if (check_only ) {
11039+ return s ;
11040+ }
1100711041 CopLINE_set (PL_curcop , orig_copline );
1100811042 PL_parser -> herelines = herelines ;
1100911043 * dest = '\0' ;
@@ -11013,6 +11047,13 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
1101311047 if (skip )
1101411048 s = skipspace (s );
1101511049 s ++ ;
11050+
11051+ /* The final change to 's' has just been made. If only validity
11052+ * checking, return before making any state changes */
11053+ if (check_only ) {
11054+ return s ;
11055+ }
11056+
1101611057 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets ) {
1101711058 PL_lex_state = LEX_INTERPEND ;
1101811059 PL_expect = XREF ;
0 commit comments