From 1113d80aa0c175276df8f6d83058b56f577452dd Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Wed, 11 Jun 2025 14:20:07 -0700 Subject: [PATCH] Fix recursive root-accessible grammar check This fixes a bug in how the `@root` grammar validation was done. It was not properly handling recursive rules because when walking the tree it would find itself, and thus mark it as "used". What we really want is to only walk starting from the root set, and don't remove recursive definitions from the root set. --- mdbook-spec/src/grammar.rs | 34 ++++++++++++++++++++++++++++------ src/attributes.md | 2 +- src/comments.md | 2 +- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/mdbook-spec/src/grammar.rs b/mdbook-spec/src/grammar.rs index cc5e8a24d..98d3cd5d9 100644 --- a/mdbook-spec/src/grammar.rs +++ b/mdbook-spec/src/grammar.rs @@ -195,10 +195,31 @@ fn check_undefined_nt(grammar: &Grammar, diag: &mut Diagnostics) { /// This is intended to help catch any unexpected misspellings, orphaned /// productions, or general mistakes. fn check_unexpected_roots(grammar: &Grammar, diag: &mut Diagnostics) { + // `set` starts with every production name. let mut set: HashSet<_> = grammar.name_order.iter().map(|s| s.as_str()).collect(); - grammar.visit_nt(&mut |nt| { - set.remove(nt); - }); + fn remove(set: &mut HashSet<&str>, grammar: &Grammar, prod: &Production, root_name: &str) { + prod.expression.visit_nt(&mut |nt| { + // Leave the root name in the set if we find it recursively. + if nt == root_name { + return; + } + if !set.remove(nt) { + return; + } + if let Some(nt_prod) = grammar.productions.get(nt) { + remove(set, grammar, nt_prod, root_name); + } + }); + } + // Walk the productions starting from the root nodes, and remove every + // non-terminal from `set`. What's left must be the set of roots. + grammar + .productions + .values() + .filter(|prod| prod.is_root) + .for_each(|root| { + remove(&mut set, grammar, root, &root.name); + }); let expected: HashSet<_> = grammar .productions .values() @@ -210,17 +231,18 @@ fn check_unexpected_roots(grammar: &Grammar, diag: &mut Diagnostics) { if !new.is_empty() { warn_or_err!( diag, - "New grammar production detected that is not used in any other\n\ + "New grammar production detected that is not used in any root-accessible\n\ production. If this is expected, mark the production with\n\ `@root`. If not, make sure it is spelled correctly and used in\n\ - another production.\n\ + another root-accessible production.\n\ \n\ The new names are: {new:?}\n" ); } else if !removed.is_empty() { warn_or_err!( diag, - "Old grammar production root seems to have been removed.\n\ + "Old grammar production root seems to have been removed\n\ + (it is used in some other production that is root-accessible).\n\ If this is expected, remove `@root` from the production.\n\ \n\ The removed names are: {removed:?}\n" diff --git a/src/attributes.md b/src/attributes.md index c316be2cc..c39ce4d65 100644 --- a/src/attributes.md +++ b/src/attributes.md @@ -115,7 +115,7 @@ attributes]. It has the following grammar: r[attributes.meta.syntax] ```grammar,attributes -MetaItem -> +@root MetaItem -> SimplePath | SimplePath `=` Expression | SimplePath `(` MetaSeq? `)` diff --git a/src/comments.md b/src/comments.md index 8ffa56551..8027b44d7 100644 --- a/src/comments.md +++ b/src/comments.md @@ -30,7 +30,7 @@ OUTER_BLOCK_DOC -> ( BlockCommentOrDoc | ~[`*/` CR] )* `*/` -BlockCommentOrDoc -> +@root BlockCommentOrDoc -> BLOCK_COMMENT | OUTER_BLOCK_DOC | INNER_BLOCK_DOC