From a5ced00241cbc5416f1cf6084759a0003c285706 Mon Sep 17 00:00:00 2001 From: Tsung-Han Yu <14802181+johan456789@users.noreply.github.com> Date: Sun, 27 Apr 2025 01:14:04 +0800 Subject: [PATCH 1/4] add symbol information support --- extension.js | 43 +++++++++++++++++++++++++++++++++++++++++++ package.json | 6 +++++- syntaxes/lark.json | 2 +- 3 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 extension.js diff --git a/extension.js b/extension.js new file mode 100644 index 0000000..2a807c6 --- /dev/null +++ b/extension.js @@ -0,0 +1,43 @@ +const vscode = require('vscode'); + +function activate(context) { + const selector = { language: 'lark', scheme: 'file' }; + const provider = new LarkSymbolProvider(); + context.subscriptions.push( + vscode.languages.registerDocumentSymbolProvider(selector, provider) + ); +} + +class LarkSymbolProvider { + provideDocumentSymbols(document) { + const symbols = []; + const termRe = /^\s*(?:[?!])?([A-Z0-9_]+)(?:\.\d+)?\s*:/; // optional ?/! prefix, uppercase identifiers, optional .n suffix + const ruleRe = /^\s*(?:[?!])?([a-z0-9_]+)(?:\.\d+)?\s*:/; // optional ?/! prefix, lowercase identifiers, optional .n suffix + + for (let i = 0; i < document.lineCount; i++) { + const text = document.lineAt(i).text; // text content of the current line + + const termMatch = termRe.exec(text); + if (termMatch) { + const name = termMatch[1]; // terminal name + const kind = vscode.SymbolKind.Constant; + const range = new vscode.Range(i, 0, i, text.length); + symbols.push(new vscode.DocumentSymbol(name, '', kind, range, range)); + continue; + } + + const ruleMatch = ruleRe.exec(text); + if (ruleMatch) { + const name = ruleMatch[1]; // rule name + const kind = vscode.SymbolKind.Function; + const range = new vscode.Range(i, 0, i, text.length); + symbols.push(new vscode.DocumentSymbol(name, '', kind, range, range)); + } + } + return symbols; + } +} + +function deactivate() { } + +module.exports = { activate, deactivate }; diff --git a/package.json b/package.json index 93086f9..3b68813 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "vscode-lark", - "version": "0.1.0", + "version": "0.2.0", "publisher": "dirk-thomas", "engines": { "vscode": "^1.2.0" @@ -19,6 +19,10 @@ "bugs": { "url": "https://github.com/dirk-thomas/vscode-lark/issues" }, + "main": "./extension.js", + "activationEvents": [ + "onLanguage:lark" + ], "contributes": { "languages": [ { diff --git a/syntaxes/lark.json b/syntaxes/lark.json index 3672310..e4498b9 100644 --- a/syntaxes/lark.json +++ b/syntaxes/lark.json @@ -78,4 +78,4 @@ } ], "scopeName": "source.lark" -} +} \ No newline at end of file From 049f0dbdae605137362b8e1986b9df0115f50d6b Mon Sep 17 00:00:00 2001 From: Tsung-Han Yu <14802181+johan456789@users.noreply.github.com> Date: Mon, 28 Apr 2025 03:45:37 +0800 Subject: [PATCH 2/4] show warning for unused rules and terminals --- extension.js | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++- package.json | 2 +- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/extension.js b/extension.js index 2a807c6..799f682 100644 --- a/extension.js +++ b/extension.js @@ -3,8 +3,24 @@ const vscode = require('vscode'); function activate(context) { const selector = { language: 'lark', scheme: 'file' }; const provider = new LarkSymbolProvider(); + context.subscriptions.push(vscode.languages.registerDocumentSymbolProvider(selector, provider)); + // Diagnostics for unused grammar symbols + const diagnosticCollection = vscode.languages.createDiagnosticCollection('lark-unused'); + context.subscriptions.push(diagnosticCollection); + // Validate currently open Lark document + if (vscode.window.activeTextEditor) { + const doc = vscode.window.activeTextEditor.document; + if (doc.languageId === 'lark') validateTextDocument(doc, diagnosticCollection); + } + // Re-validate on change, open, close + context.subscriptions.push( + vscode.workspace.onDidChangeTextDocument(e => validateTextDocument(e.document, diagnosticCollection)) + ); + context.subscriptions.push( + vscode.workspace.onDidOpenTextDocument(doc => validateTextDocument(doc, diagnosticCollection)) + ); context.subscriptions.push( - vscode.languages.registerDocumentSymbolProvider(selector, provider) + vscode.workspace.onDidCloseTextDocument(doc => diagnosticCollection.delete(doc.uri)) ); } @@ -38,6 +54,51 @@ class LarkSymbolProvider { } } +// Check for unused grammar symbols and report warnings +function validateTextDocument(document, diagnosticCollection) { + if (document.languageId !== 'lark') return; + const diagnostics = []; + const termRe = /^\s*(?:[?!])?([A-Z0-9_]+)(?:\.\d+)?\s*:/; + const ruleRe = /^\s*(?:[?!])?([a-z0-9_]+)(?:\.\d+)?\s*:/; + const defs = {}; + // Collect definitions + for (let i = 0; i < document.lineCount; i++) { + const text = document.lineAt(i).text; + let match = termRe.exec(text); + if (match) { + defs[match[1]] = { line: i, used: false }; + continue; + } + match = ruleRe.exec(text); + if (match) { + defs[match[1]] = { line: i, used: false }; + } + } + // Search for references + for (let i = 0; i < document.lineCount; i++) { + const text = document.lineAt(i).text; + for (const name in defs) { + if (defs[name].used || i === defs[name].line) continue; + if (new RegExp(`\\b${name}\\b`).test(text)) { + defs[name].used = true; + } + } + } + // Create diagnostics for unused + for (const name in defs) { + if (!defs[name].used && name !== 'start') { + const lineNum = defs[name].line; + const line = document.lineAt(lineNum); + const startChar = line.text.indexOf(name); + if (startChar >= 0) { + const range = new vscode.Range(lineNum, startChar, lineNum, startChar + name.length); + diagnostics.push(new vscode.Diagnostic(range, `Unused grammar symbol '${name}'`, vscode.DiagnosticSeverity.Warning)); + } + } + } + diagnosticCollection.set(document.uri, diagnostics); +} + function deactivate() { } module.exports = { activate, deactivate }; diff --git a/package.json b/package.json index 3b68813..d670d62 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "vscode-lark", - "version": "0.2.0", + "version": "0.2.1", "publisher": "dirk-thomas", "engines": { "vscode": "^1.2.0" From a4f0e08c4767355e42da2f9e0ecd185d7ccbc54e Mon Sep 17 00:00:00 2001 From: Tsung-Han Yu <14802181+johan456789@users.noreply.github.com> Date: Tue, 29 Apr 2025 23:36:50 +0800 Subject: [PATCH 3/4] show errors for undefined rules and terminals --- extension.js | 132 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 100 insertions(+), 32 deletions(-) diff --git a/extension.js b/extension.js index 799f682..cba7c94 100644 --- a/extension.js +++ b/extension.js @@ -1,11 +1,17 @@ const vscode = require('vscode'); +// regex patterns +const SYMBOL_PREFIX = '^\\s*(?:[?!])?'; +const SYMBOL_SUFFIX = '(?:\\.\\d+)?'; +const TERM_BODY = '([A-Z_][A-Z_0-9]*)'; +const RULE_BODY = '([a-z_][a-z_0-9]*)'; + function activate(context) { const selector = { language: 'lark', scheme: 'file' }; const provider = new LarkSymbolProvider(); context.subscriptions.push(vscode.languages.registerDocumentSymbolProvider(selector, provider)); - // Diagnostics for unused grammar symbols - const diagnosticCollection = vscode.languages.createDiagnosticCollection('lark-unused'); + // Diagnostics for unused or undefined symbols + const diagnosticCollection = vscode.languages.createDiagnosticCollection('lark-diagnostics'); context.subscriptions.push(diagnosticCollection); // Validate currently open Lark document if (vscode.window.activeTextEditor) { @@ -27,53 +33,57 @@ function activate(context) { class LarkSymbolProvider { provideDocumentSymbols(document) { const symbols = []; - const termRe = /^\s*(?:[?!])?([A-Z0-9_]+)(?:\.\d+)?\s*:/; // optional ?/! prefix, uppercase identifiers, optional .n suffix - const ruleRe = /^\s*(?:[?!])?([a-z0-9_]+)(?:\.\d+)?\s*:/; // optional ?/! prefix, lowercase identifiers, optional .n suffix + const termRe = new RegExp(`${SYMBOL_PREFIX}${TERM_BODY}${SYMBOL_SUFFIX}\\s*`); // optional ?/! prefix, uppercase identifiers, optional .n suffix + const ruleRe = new RegExp(`${SYMBOL_PREFIX}${RULE_BODY}${SYMBOL_SUFFIX}\\s*`); // optional ?/! prefix, lowercase identifiers, optional .n suffix for (let i = 0; i < document.lineCount; i++) { const text = document.lineAt(i).text; // text content of the current line - const termMatch = termRe.exec(text); - if (termMatch) { - const name = termMatch[1]; // terminal name - const kind = vscode.SymbolKind.Constant; + function pushSymbol(match, kind) { + if (!match) return false; + const name = match[1]; const range = new vscode.Range(i, 0, i, text.length); symbols.push(new vscode.DocumentSymbol(name, '', kind, range, range)); - continue; + return true; } - const ruleMatch = ruleRe.exec(text); - if (ruleMatch) { - const name = ruleMatch[1]; // rule name - const kind = vscode.SymbolKind.Function; - const range = new vscode.Range(i, 0, i, text.length); - symbols.push(new vscode.DocumentSymbol(name, '', kind, range, range)); - } + if (pushSymbol(termRe.exec(text), vscode.SymbolKind.Constant)) continue; + pushSymbol(ruleRe.exec(text), vscode.SymbolKind.Function); } return symbols; } } // Check for unused grammar symbols and report warnings -function validateTextDocument(document, diagnosticCollection) { +async function validateTextDocument(document, diagnosticCollection) { if (document.languageId !== 'lark') return; - const diagnostics = []; - const termRe = /^\s*(?:[?!])?([A-Z0-9_]+)(?:\.\d+)?\s*:/; - const ruleRe = /^\s*(?:[?!])?([a-z0-9_]+)(?:\.\d+)?\s*:/; - const defs = {}; - // Collect definitions + + // Collect definitions via DocumentSymbolProvider + const symbols = await vscode.commands.executeCommand('vscode.executeDocumentSymbolProvider', document.uri) || []; + // Add imported terminals to symbols list + const importRe = /^\s*%import[^A-Z0-9_]*([A-Z0-9_]+)/; for (let i = 0; i < document.lineCount; i++) { - const text = document.lineAt(i).text; - let match = termRe.exec(text); - if (match) { - defs[match[1]] = { line: i, used: false }; - continue; - } - match = ruleRe.exec(text); - if (match) { - defs[match[1]] = { line: i, used: false }; + const lineText = document.lineAt(i).text; + const importMatch = importRe.exec(lineText); + if (importMatch) { + const name = importMatch[1]; + const kind = vscode.SymbolKind.Constant; + const range = new vscode.Range(i, 0, i, lineText.length); + symbols.push(new vscode.DocumentSymbol(name, '', kind, range, range)); } } + /** + * Mapping of symbol names to their definition lines and usage status. + * + * Example: { 'MyTerminal': { line: 5, used: true } } + */ + const defs = {}; + (function flatten(list) { + for (const sym of list) { + defs[sym.name] = { line: sym.range.start.line, used: false }; + if (sym.children && sym.children.length) flatten(sym.children); + } + })(symbols); // Search for references for (let i = 0; i < document.lineCount; i++) { const text = document.lineAt(i).text; @@ -84,7 +94,9 @@ function validateTextDocument(document, diagnosticCollection) { } } } - // Create diagnostics for unused + + // Detect unused symbols and report warnings + const diagnostics = []; for (const name in defs) { if (!defs[name].used && name !== 'start') { const lineNum = defs[name].line; @@ -96,6 +108,62 @@ function validateTextDocument(document, diagnosticCollection) { } } } + + // Detect undefined symbols and report errors + const termRe = new RegExp(`${SYMBOL_PREFIX}${TERM_BODY}\\s*:`); + const ruleRe = new RegExp(`${SYMBOL_PREFIX}${RULE_BODY}\\s*:`); + for (let i = 0; i < document.lineCount; i++) { + const text = document.lineAt(i).text; + // skip directive lines and comments + if (text.trim().startsWith('%')) continue; + if (text.trim().startsWith('//')) continue; + + let searchText = text; + const termDefMatch = termRe.exec(text); + const ruleDefMatch = ruleRe.exec(text); + let offset = 0; + if (termDefMatch || ruleDefMatch) { + const colonIndex = text.indexOf(':'); + if (colonIndex >= 0) { + offset = colonIndex + 1; + searchText = text.slice(offset); + } + } + // strip literal strings in quotes + searchText = searchText.replace(/"[^"]*"/g, (match) => ' '.repeat(match.length)); + // strip comments and aliases + searchText = searchText.split(/\/\/|->/)[0]; + // strip regex + searchText = searchText.replace(/(\/(?:\\.|[^\/\\])*\/[gimsuy]*)/g, (match) => ' '.repeat(match.length)); + + /** + * Search for occurrences of a given regex pattern in the current line's text content, + * and report errors for any undefined symbols found. + * + * @param {RegExp} usageRe - Regex pattern to search for. It should have one capture group + * that matches the symbol name. + * @param {string} type - Whether the symbol is a terminal or rule. + */ + function checkUsages(usageRe, type) { + let usageMatch; + while ((usageMatch = usageRe.exec(searchText)) !== null) { + const name = usageMatch[1]; + if (type === 'rule' && name === 'start') { + continue; + } + if (!defs.hasOwnProperty(name)) { + const realStart = offset + usageMatch.index; + const range = new vscode.Range(i, realStart, i, realStart + name.length); + const message = type === 'terminal' + ? `Undefined terminal '${name}'` + : `Undefined rule '${name}'`; + diagnostics.push(new vscode.Diagnostic(range, message, vscode.DiagnosticSeverity.Error)); + } + } + } + checkUsages(new RegExp(`\\b${TERM_BODY}${SYMBOL_SUFFIX}\\b`, 'g'), 'terminal'); + checkUsages(new RegExp(`\\b${RULE_BODY}${SYMBOL_SUFFIX}\\b`, 'g'), 'rule'); + } diagnosticCollection.set(document.uri, diagnostics); } From 1f4bf12be1270c8a0bcd393e1c3b4246e34c2195 Mon Sep 17 00:00:00 2001 From: Tsung-Han Yu <14802181+johan456789@users.noreply.github.com> Date: Wed, 30 Apr 2025 03:24:31 +0800 Subject: [PATCH 4/4] support showing error for invalid rule/terminal name --- extension.js | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/extension.js b/extension.js index cba7c94..8805aa1 100644 --- a/extension.js +++ b/extension.js @@ -110,8 +110,8 @@ async function validateTextDocument(document, diagnosticCollection) { } // Detect undefined symbols and report errors - const termRe = new RegExp(`${SYMBOL_PREFIX}${TERM_BODY}\\s*:`); - const ruleRe = new RegExp(`${SYMBOL_PREFIX}${RULE_BODY}\\s*:`); + const termRe = new RegExp(`${SYMBOL_PREFIX}${TERM_BODY}${SYMBOL_SUFFIX}\\s*:`); + const ruleRe = new RegExp(`${SYMBOL_PREFIX}${RULE_BODY}${SYMBOL_SUFFIX}\\s*:`); for (let i = 0; i < document.lineCount; i++) { const text = document.lineAt(i).text; // skip directive lines and comments @@ -121,6 +121,7 @@ async function validateTextDocument(document, diagnosticCollection) { let searchText = text; const termDefMatch = termRe.exec(text); const ruleDefMatch = ruleRe.exec(text); + const defHeadMatch = /^\s*([^:\s]+)\s*:/.exec(text); // match text before ':' let offset = 0; if (termDefMatch || ruleDefMatch) { const colonIndex = text.indexOf(':'); @@ -128,7 +129,15 @@ async function validateTextDocument(document, diagnosticCollection) { offset = colonIndex + 1; searchText = text.slice(offset); } + } else if (defHeadMatch) { + // Report error for invalid definition head before ':' + const head = defHeadMatch[1]; + const start = text.indexOf(head); + const end = start + head.length; + const range = new vscode.Range(i, start, i, end); + diagnostics.push(new vscode.Diagnostic(range, `Invalid definition name '${head}'`, vscode.DiagnosticSeverity.Error)); } + // strip literal strings in quotes searchText = searchText.replace(/"[^"]*"/g, (match) => ' '.repeat(match.length)); // strip comments and aliases