Skip to content

add symbol information support #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 181 additions & 0 deletions extension.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
const vscode = require('vscode');

// regex patterns
const SYMBOL_PREFIX = '^\\s*(?:[?!])?';
const SYMBOL_SUFFIX = '(?:\\.\\d+)?';
const TERM_BODY = '([A-Z_][A-Z_0-9]*)';
const RULE_BODY = '([a-z_][a-z_0-9]*)';

function activate(context) {
const selector = { language: 'lark', scheme: 'file' };
const provider = new LarkSymbolProvider();
context.subscriptions.push(vscode.languages.registerDocumentSymbolProvider(selector, provider));
// Diagnostics for unused or undefined symbols
const diagnosticCollection = vscode.languages.createDiagnosticCollection('lark-diagnostics');
context.subscriptions.push(diagnosticCollection);
// Validate currently open Lark document
if (vscode.window.activeTextEditor) {
const doc = vscode.window.activeTextEditor.document;
if (doc.languageId === 'lark') validateTextDocument(doc, diagnosticCollection);
}
// Re-validate on change, open, close
context.subscriptions.push(
vscode.workspace.onDidChangeTextDocument(e => validateTextDocument(e.document, diagnosticCollection))
);
context.subscriptions.push(
vscode.workspace.onDidOpenTextDocument(doc => validateTextDocument(doc, diagnosticCollection))
);
context.subscriptions.push(
vscode.workspace.onDidCloseTextDocument(doc => diagnosticCollection.delete(doc.uri))
);
}

class LarkSymbolProvider {
provideDocumentSymbols(document) {
const symbols = [];
const termRe = new RegExp(`${SYMBOL_PREFIX}${TERM_BODY}${SYMBOL_SUFFIX}\\s*`); // optional ?/! prefix, uppercase identifiers, optional .n suffix
const ruleRe = new RegExp(`${SYMBOL_PREFIX}${RULE_BODY}${SYMBOL_SUFFIX}\\s*`); // optional ?/! prefix, lowercase identifiers, optional .n suffix

for (let i = 0; i < document.lineCount; i++) {
const text = document.lineAt(i).text; // text content of the current line

function pushSymbol(match, kind) {
if (!match) return false;
const name = match[1];
const range = new vscode.Range(i, 0, i, text.length);
symbols.push(new vscode.DocumentSymbol(name, '', kind, range, range));
return true;
}

if (pushSymbol(termRe.exec(text), vscode.SymbolKind.Constant)) continue;
pushSymbol(ruleRe.exec(text), vscode.SymbolKind.Function);
}
return symbols;
}
}

// Check for unused grammar symbols and report warnings
async function validateTextDocument(document, diagnosticCollection) {
if (document.languageId !== 'lark') return;

// Collect definitions via DocumentSymbolProvider
const symbols = await vscode.commands.executeCommand('vscode.executeDocumentSymbolProvider', document.uri) || [];
// Add imported terminals to symbols list
const importRe = /^\s*%import[^A-Z0-9_]*([A-Z0-9_]+)/;
for (let i = 0; i < document.lineCount; i++) {
const lineText = document.lineAt(i).text;
const importMatch = importRe.exec(lineText);
if (importMatch) {
const name = importMatch[1];
const kind = vscode.SymbolKind.Constant;
const range = new vscode.Range(i, 0, i, lineText.length);
symbols.push(new vscode.DocumentSymbol(name, '', kind, range, range));
}
}
/**
* Mapping of symbol names to their definition lines and usage status.
*
* Example: { 'MyTerminal': { line: 5, used: true } }
*/
const defs = {};
(function flatten(list) {
for (const sym of list) {
defs[sym.name] = { line: sym.range.start.line, used: false };
if (sym.children && sym.children.length) flatten(sym.children);
}
})(symbols);
// Search for references
for (let i = 0; i < document.lineCount; i++) {
const text = document.lineAt(i).text;
for (const name in defs) {
if (defs[name].used || i === defs[name].line) continue;
if (new RegExp(`\\b${name}\\b`).test(text)) {
defs[name].used = true;
}
}
}

// Detect unused symbols and report warnings
const diagnostics = [];
for (const name in defs) {
if (!defs[name].used && name !== 'start') {
const lineNum = defs[name].line;
const line = document.lineAt(lineNum);
const startChar = line.text.indexOf(name);
if (startChar >= 0) {
const range = new vscode.Range(lineNum, startChar, lineNum, startChar + name.length);
diagnostics.push(new vscode.Diagnostic(range, `Unused grammar symbol '${name}'`, vscode.DiagnosticSeverity.Warning));
}
}
}

// Detect undefined symbols and report errors
const termRe = new RegExp(`${SYMBOL_PREFIX}${TERM_BODY}${SYMBOL_SUFFIX}\\s*:`);
const ruleRe = new RegExp(`${SYMBOL_PREFIX}${RULE_BODY}${SYMBOL_SUFFIX}\\s*:`);
for (let i = 0; i < document.lineCount; i++) {
const text = document.lineAt(i).text;
// skip directive lines and comments
if (text.trim().startsWith('%')) continue;
if (text.trim().startsWith('//')) continue;

let searchText = text;
const termDefMatch = termRe.exec(text);
const ruleDefMatch = ruleRe.exec(text);
const defHeadMatch = /^\s*([^:\s]+)\s*:/.exec(text); // match text before ':'
let offset = 0;
if (termDefMatch || ruleDefMatch) {
const colonIndex = text.indexOf(':');
if (colonIndex >= 0) {
offset = colonIndex + 1;
searchText = text.slice(offset);
}
} else if (defHeadMatch) {
// Report error for invalid definition head before ':'
const head = defHeadMatch[1];
const start = text.indexOf(head);
const end = start + head.length;
const range = new vscode.Range(i, start, i, end);
diagnostics.push(new vscode.Diagnostic(range, `Invalid definition name '${head}'`, vscode.DiagnosticSeverity.Error));
}

// strip literal strings in quotes
searchText = searchText.replace(/"[^"]*"/g, (match) => ' '.repeat(match.length));
// strip comments and aliases
searchText = searchText.split(/\/\/|->/)[0];
// strip regex
searchText = searchText.replace(/(\/(?:\\.|[^\/\\])*\/[gimsuy]*)/g, (match) => ' '.repeat(match.length));

/**
* Search for occurrences of a given regex pattern in the current line's text content,
* and report errors for any undefined symbols found.
*
* @param {RegExp} usageRe - Regex pattern to search for. It should have one capture group
* that matches the symbol name.
* @param {string} type - Whether the symbol is a terminal or rule.
*/
function checkUsages(usageRe, type) {
let usageMatch;
while ((usageMatch = usageRe.exec(searchText)) !== null) {
const name = usageMatch[1];
if (type === 'rule' && name === 'start') {
continue;
}
if (!defs.hasOwnProperty(name)) {
const realStart = offset + usageMatch.index;
const range = new vscode.Range(i, realStart, i, realStart + name.length);
const message = type === 'terminal'
? `Undefined terminal '${name}'`
: `Undefined rule '${name}'`;
diagnostics.push(new vscode.Diagnostic(range, message, vscode.DiagnosticSeverity.Error));
}
}
}
checkUsages(new RegExp(`\\b${TERM_BODY}${SYMBOL_SUFFIX}\\b`, 'g'), 'terminal');
checkUsages(new RegExp(`\\b${RULE_BODY}${SYMBOL_SUFFIX}\\b`, 'g'), 'rule');
}
diagnosticCollection.set(document.uri, diagnostics);
}

function deactivate() { }

module.exports = { activate, deactivate };
6 changes: 5 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "vscode-lark",
"version": "0.1.0",
"version": "0.2.1",
"publisher": "dirk-thomas",
"engines": {
"vscode": "^1.2.0"
Expand All @@ -19,6 +19,10 @@
"bugs": {
"url": "https://github.com/dirk-thomas/vscode-lark/issues"
},
"main": "./extension.js",
"activationEvents": [
"onLanguage:lark"
],
"contributes": {
"languages": [
{
Expand Down
2 changes: 1 addition & 1 deletion syntaxes/lark.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,4 @@
}
],
"scopeName": "source.lark"
}
}