diff --git a/package-lock.json b/package-lock.json index 6527d4ca..6021b280 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@microsoft/powerquery-parser", - "version": "0.15.11", + "version": "0.16.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@microsoft/powerquery-parser", - "version": "0.15.11", + "version": "0.16.0", "license": "MIT", "dependencies": { "grapheme-splitter": "^1.0.4", diff --git a/package.json b/package.json index 5e69fd8a..53f4a668 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@microsoft/powerquery-parser", - "version": "0.15.11", + "version": "0.16.0", "description": "A parser for the Power Query/M formula language.", "author": "Microsoft", "license": "MIT", diff --git a/src/powerquery-parser/language/identifierUtils.ts b/src/powerquery-parser/language/identifierUtils.ts new file mode 100644 index 00000000..92154e01 --- /dev/null +++ b/src/powerquery-parser/language/identifierUtils.ts @@ -0,0 +1,163 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import { Assert, Pattern, StringUtils } from "../common"; + +export enum IdentifierKind { + Generalized = "Generalized", + Invalid = "Invalid", + Quote = "Quote", + QuoteRequired = "QuoteRequired", + Regular = "Regular", +} + +// Assuming the text is a quoted identifier, finds the quotes that enclose the identifier. +// Otherwise returns undefined. +export function findQuotedIdentifierQuotes(text: string, index: number): StringUtils.FoundQuotes | undefined { + if (text[index] !== "#") { + return undefined; + } + + return StringUtils.findQuotes(text, index + 1); +} + +// Determines what kind of identifier the text is. +// It's possible that the text is a partially completed identifier, +// which is why we have the `allowTrailingPeriod` parameter. +export function getIdentifierKind(text: string, allowTrailingPeriod: boolean): IdentifierKind { + if (isRegularIdentifier(text, allowTrailingPeriod)) { + return IdentifierKind.Regular; + } else if (isQuotedIdentifier(text)) { + return isRegularIdentifier(text.slice(2, -1), false) ? IdentifierKind.Quote : IdentifierKind.QuoteRequired; + } else if (isGeneralizedIdentifier(text)) { + return IdentifierKind.Generalized; + } else { + return IdentifierKind.Invalid; + } +} + +// Assuming the text is an identifier, returns the length of the identifier. +export function getIdentifierLength(text: string, index: number, allowTrailingPeriod: boolean): number | undefined { + const startingIndex: number = index; + const textLength: number = text.length; + + let state: IdentifierRegexpState = IdentifierRegexpState.Start; + let matchLength: number | undefined; + + while (state !== IdentifierRegexpState.Done) { + if (index === textLength) { + return index - startingIndex; + } + + switch (state) { + case IdentifierRegexpState.Start: + matchLength = StringUtils.regexMatchLength(Pattern.IdentifierStartCharacter, text, index); + + if (matchLength === undefined) { + state = IdentifierRegexpState.Done; + } else { + state = IdentifierRegexpState.RegularIdentifier; + index += matchLength; + } + + break; + + case IdentifierRegexpState.RegularIdentifier: + // Don't consider `..` or `...` part of an identifier. + if (allowTrailingPeriod && text[index] === "." && text[index + 1] !== ".") { + index += 1; + } + + matchLength = StringUtils.regexMatchLength(Pattern.IdentifierPartCharacters, text, index); + + if (matchLength === undefined) { + state = IdentifierRegexpState.Done; + } else { + index += matchLength; + + // Don't consider `..` or `...` part of an identifier. + if (allowTrailingPeriod && text[index] === "." && text[index + 1] !== ".") { + index += 1; + } + } + + break; + + default: + throw Assert.isNever(state); + } + } + + return index !== startingIndex ? index - startingIndex : undefined; +} + +// Assuming the text is a generalized identifier, returns the length of the identifier. +export function getGeneralizedIdentifierLength(text: string, index: number): number | undefined { + const startingIndex: number = index; + const textLength: number = text.length; + + let continueMatching: boolean = true; + + while (continueMatching) { + const currentChr: string = text[index]; + + if (currentChr === " ") { + index += 1; + } else if (currentChr === ".") { + if (text[index - 1] === ".") { + continueMatching = false; + break; + } + + index += 1; + } else { + const matchLength: number | undefined = StringUtils.regexMatchLength( + Pattern.IdentifierPartCharacters, + text, + index, + ); + + if (matchLength === undefined) { + continueMatching = false; + break; + } + + index += matchLength; + } + + if (index >= textLength) { + continueMatching = false; + } + } + + return index !== startingIndex ? index - startingIndex : undefined; +} + +export function isGeneralizedIdentifier(text: string): boolean { + return getGeneralizedIdentifierLength(text, 0) === text.length; +} + +export function isRegularIdentifier(text: string, allowTrailingPeriod: boolean): boolean { + return getIdentifierLength(text, 0, allowTrailingPeriod) === text.length; +} + +export function isQuotedIdentifier(text: string): boolean { + return findQuotedIdentifierQuotes(text, 0) !== undefined; +} + +// Removes the quotes from a quoted identifier if possible. +export function normalizeIdentifier(text: string): string { + if (isQuotedIdentifier(text)) { + const stripped: string = text.slice(2, -1); + + return isRegularIdentifier(stripped, false) ? stripped : text; + } else { + return text; + } +} + +const enum IdentifierRegexpState { + Done = "Done", + RegularIdentifier = "RegularIdentifier", + Start = "Start", +} diff --git a/src/powerquery-parser/language/index.ts b/src/powerquery-parser/language/index.ts index d9cde535..ccf37cba 100644 --- a/src/powerquery-parser/language/index.ts +++ b/src/powerquery-parser/language/index.ts @@ -2,6 +2,7 @@ // Licensed under the MIT license. import * as Comment from "./comment"; +export * as IdentifierUtils from "./identifierUtils"; export * as TextUtils from "./textUtils"; import * as Token from "./token"; diff --git a/src/powerquery-parser/language/textUtils.ts b/src/powerquery-parser/language/textUtils.ts index 058e3483..8d83998f 100644 --- a/src/powerquery-parser/language/textUtils.ts +++ b/src/powerquery-parser/language/textUtils.ts @@ -1,16 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -import { Assert, Pattern, StringUtils } from "../common"; - -export enum IdentifierKind { - Generalized = "Generalized", - Invalid = "Invalid", - Quote = "Quote", - QuoteRequired = "QuoteRequired", - Regular = "Regular", -} - export function escape(text: string): string { let result: string = text; @@ -21,143 +11,6 @@ export function escape(text: string): string { return result; } -export function identifierKind(text: string, allowTrailingPeriod: boolean): IdentifierKind { - if (isRegularIdentifier(text, allowTrailingPeriod)) { - return IdentifierKind.Regular; - } else if (isQuotedIdentifier(text)) { - return isRegularIdentifier(text.slice(2, -1), false) ? IdentifierKind.Quote : IdentifierKind.QuoteRequired; - } else if (isGeneralizedIdentifier(text)) { - return IdentifierKind.Generalized; - } else { - return IdentifierKind.Invalid; - } -} - -export function isGeneralizedIdentifier(text: string): boolean { - return generalizedIdentifierLength(text, 0) === text.length; -} - -export function isRegularIdentifier(text: string, allowTrailingPeriod: boolean): boolean { - return identifierLength(text, 0, allowTrailingPeriod) === text.length; -} - -export function isQuotedIdentifier(text: string): boolean { - return quotedIdentifier(text, 0) !== undefined; -} - -export function identifierLength(text: string, index: number, allowTrailingPeriod: boolean): number | undefined { - const startingIndex: number = index; - const textLength: number = text.length; - - let state: IdentifierRegexpState = IdentifierRegexpState.Start; - let matchLength: number | undefined; - - while (state !== IdentifierRegexpState.Done) { - if (index === textLength) { - return index - startingIndex; - } - - switch (state) { - case IdentifierRegexpState.Start: - matchLength = StringUtils.regexMatchLength(Pattern.IdentifierStartCharacter, text, index); - - if (matchLength === undefined) { - state = IdentifierRegexpState.Done; - } else { - state = IdentifierRegexpState.RegularIdentifier; - index += matchLength; - } - - break; - - case IdentifierRegexpState.RegularIdentifier: - // Don't consider `..` or `...` part of an identifier. - if (allowTrailingPeriod && text[index] === "." && text[index + 1] !== ".") { - index += 1; - } - - matchLength = StringUtils.regexMatchLength(Pattern.IdentifierPartCharacters, text, index); - - if (matchLength === undefined) { - state = IdentifierRegexpState.Done; - } else { - index += matchLength; - - // Don't consider `..` or `...` part of an identifier. - if (allowTrailingPeriod && text[index] === "." && text[index + 1] !== ".") { - index += 1; - } - } - - break; - - default: - throw Assert.isNever(state); - } - } - - return index !== startingIndex ? index - startingIndex : undefined; -} - -export function generalizedIdentifierLength(text: string, index: number): number | undefined { - const startingIndex: number = index; - const textLength: number = text.length; - - let continueMatching: boolean = true; - - while (continueMatching) { - const currentChr: string = text[index]; - - if (currentChr === " ") { - index += 1; - } else if (currentChr === ".") { - if (text[index - 1] === ".") { - continueMatching = false; - break; - } - - index += 1; - } else { - const matchLength: number | undefined = StringUtils.regexMatchLength( - Pattern.IdentifierPartCharacters, - text, - index, - ); - - if (matchLength === undefined) { - continueMatching = false; - break; - } - - index += matchLength; - } - - if (index >= textLength) { - continueMatching = false; - } - } - - return index !== startingIndex ? index - startingIndex : undefined; -} - -export function quotedIdentifier(text: string, index: number): StringUtils.FoundQuotes | undefined { - if (text[index] !== "#") { - return undefined; - } - - return StringUtils.findQuotes(text, index + 1); -} - -export function normalizeIdentifier(text: string): string { - if (isQuotedIdentifier(text)) { - const stripped: string = text.slice(2, -1); - - return isRegularIdentifier(stripped, false) ? stripped : text; - } else { - return text; - } -} - export function unescape(text: string): string { let result: string = text; @@ -168,12 +21,6 @@ export function unescape(text: string): string { return result; } -const enum IdentifierRegexpState { - Done = "Done", - RegularIdentifier = "RegularIdentifier", - Start = "Start", -} - const EscapedWhitespaceRegexp: ReadonlyArray<[RegExp, string]> = [ [/#\(cr,lf\)/gm, "\r\n"], [/#\(cr\)/gm, "\r"], diff --git a/src/powerquery-parser/lexer/lexer.ts b/src/powerquery-parser/lexer/lexer.ts index 3e7e0af5..494db56d 100644 --- a/src/powerquery-parser/lexer/lexer.ts +++ b/src/powerquery-parser/lexer/lexer.ts @@ -14,7 +14,7 @@ import { ResultUtils, StringUtils, } from "../common"; -import { Keyword, TextUtils, Token } from "../language"; +import { IdentifierUtils, Keyword, Token } from "../language"; import { LexError } from "."; import { LexSettings } from "./lexSettings"; @@ -1137,7 +1137,7 @@ function indexOfRegexEnd(pattern: RegExp, text: string, positionStart: number): } function indexOfIdentifierEnd(text: string, positionStart: number): number | undefined { - const length: number | undefined = TextUtils.identifierLength(text, positionStart, true); + const length: number | undefined = IdentifierUtils.getIdentifierLength(text, positionStart, true); return length !== undefined ? positionStart + length : undefined; } diff --git a/src/powerquery-parser/parser/nodeIdMap/nodeIdMapIterator.ts b/src/powerquery-parser/parser/nodeIdMap/nodeIdMapIterator.ts index 918f16ad..7f4d565b 100644 --- a/src/powerquery-parser/parser/nodeIdMap/nodeIdMapIterator.ts +++ b/src/powerquery-parser/parser/nodeIdMap/nodeIdMapIterator.ts @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -import { Ast, Constant, TextUtils } from "../../language"; +import { Ast, Constant, IdentifierUtils } from "../../language"; import { NodeIdMap, NodeIdMapUtils, TXorNode, XorNodeKind, XorNodeUtils } from "."; import { Assert } from "../../common"; import { parameterIdentifier } from "./nodeIdMapUtils"; @@ -342,7 +342,7 @@ export function iterFieldSpecificationList( keyLiteral, optional, value, - normalizedKeyLiteral: TextUtils.normalizeIdentifier(keyLiteral), + normalizedKeyLiteral: IdentifierUtils.normalizeIdentifier(keyLiteral), pairKind: PairKind.FieldSpecification, source: fieldSpecification, }); @@ -450,7 +450,7 @@ export function iterSection( source: XorNodeUtils.boxAst(namePairedExpression), key: namePairedExpression.key, keyLiteral, - normalizedKeyLiteral: TextUtils.normalizeIdentifier(keyLiteral), + normalizedKeyLiteral: IdentifierUtils.normalizeIdentifier(keyLiteral), value: XorNodeUtils.boxAst(namePairedExpression.value), pairKind: PairKind.SectionMember, }; @@ -504,7 +504,7 @@ export function iterSection( source: keyValuePair, key, keyLiteral, - normalizedKeyLiteral: TextUtils.normalizeIdentifier(keyLiteral), + normalizedKeyLiteral: IdentifierUtils.normalizeIdentifier(keyLiteral), value: NodeIdMapUtils.nthChildXor(nodeIdMapCollection, keyValuePairNodeId, 2), pairKind: PairKind.SectionMember, }); @@ -539,7 +539,7 @@ function iterKeyValuePairs< source: keyValuePair, key, keyLiteral, - normalizedKeyLiteral: TextUtils.normalizeIdentifier(keyLiteral), + normalizedKeyLiteral: IdentifierUtils.normalizeIdentifier(keyLiteral), value: NodeIdMapUtils.nthChildXor(nodeIdMapCollection, keyValuePair.node.id, 2), pairKind, } as KVP); diff --git a/src/powerquery-parser/parser/parsers/naiveParseSteps.ts b/src/powerquery-parser/parser/parsers/naiveParseSteps.ts index dfa9130f..f6833858 100644 --- a/src/powerquery-parser/parser/parsers/naiveParseSteps.ts +++ b/src/powerquery-parser/parser/parsers/naiveParseSteps.ts @@ -2,7 +2,7 @@ // Licensed under the MIT license. import { Assert, CommonError, Result, ResultUtils } from "../../common"; -import { Ast, AstUtils, Constant, ConstantUtils, TextUtils, Token } from "../../language"; +import { Ast, AstUtils, Constant, ConstantUtils, IdentifierUtils, Token } from "../../language"; import { Disambiguation, DisambiguationUtils } from "../disambiguation"; import { NaiveParseSteps, ParseContext, ParseContextUtils, ParseError } from ".."; import { Parser, ParseStateCheckpoint } from "../parser"; @@ -121,9 +121,9 @@ export async function readGeneralizedIdentifier( const contiguousIdentifierStartIndex: number = tokens[tokenRangeStartIndex].positionStart.codeUnit; const contiguousIdentifierEndIndex: number = tokens[tokenRangeEndIndex - 1].positionEnd.codeUnit; const literal: string = lexerSnapshot.text.slice(contiguousIdentifierStartIndex, contiguousIdentifierEndIndex); - const literalKind: TextUtils.IdentifierKind = TextUtils.identifierKind(literal, true); + const literalKind: IdentifierUtils.IdentifierKind = IdentifierUtils.getIdentifierKind(literal, true); - if (literalKind === TextUtils.IdentifierKind.Invalid) { + if (literalKind === IdentifierUtils.IdentifierKind.Invalid) { trace.exit({ [NaiveTraceConstant.TokenIndex]: state.tokenIndex, [TraceConstant.IsThrowing]: true, diff --git a/src/test/libraryTest/identifierUtils.test.ts b/src/test/libraryTest/identifierUtils.test.ts new file mode 100644 index 00000000..92c08098 --- /dev/null +++ b/src/test/libraryTest/identifierUtils.test.ts @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import "mocha"; +import { expect } from "chai"; + +import { IdentifierUtils } from "../../powerquery-parser/language"; + +describe("TextUtils", () => { + describe(`isRegularIdentifier`, () => { + describe(`valid`, () => { + it(`foo`, () => expect(IdentifierUtils.isRegularIdentifier("foo", false), "should be true").to.be.true); + it(`foo`, () => expect(IdentifierUtils.isRegularIdentifier("foo", true), "should be true").to.be.true); + it(`foo.`, () => expect(IdentifierUtils.isRegularIdentifier("foo.", true), "should be true").to.be.true); + it(`foo.1`, () => expect(IdentifierUtils.isRegularIdentifier("foo.1", true), "should be true").to.be.true); + + it(`foo.bar123`, () => + expect(IdentifierUtils.isRegularIdentifier("foo.bar123", true), "should be true").to.be.true); + }); + + describe(`invalid`, () => { + it(`foo.`, () => expect(IdentifierUtils.isRegularIdentifier("foo.", false), "should be false").to.be.false); + }); + }); + + describe(`isGeneralizedIdentifier`, () => { + describe(`valid`, () => { + it("a", () => expect(IdentifierUtils.isGeneralizedIdentifier("a"), "should be true").to.be.true); + it("a.1", () => expect(IdentifierUtils.isGeneralizedIdentifier("a.1"), "should be true").to.be.true); + it("a b", () => expect(IdentifierUtils.isGeneralizedIdentifier("a b"), "should be true").to.be.true); + }); + + describe(`invalid`, () => { + it("a..1", () => expect(IdentifierUtils.isGeneralizedIdentifier("a..1"), "should be false").to.be.false); + }); + }); + + describe(`isQuotedIdentifier`, () => { + describe(`valid`, () => { + it(`#"foo"`, () => expect(IdentifierUtils.isQuotedIdentifier(`#"foo"`), "should be true").to.be.true); + it(`#""`, () => expect(IdentifierUtils.isQuotedIdentifier(`#""`), "should be true").to.be.true); + it(`#""""`, () => expect(IdentifierUtils.isQuotedIdentifier(`#""""`), "should be true").to.be.true); + + it(`#"a""b""c"`, () => + expect(IdentifierUtils.isQuotedIdentifier(`#"a""b""c"`), "should be true").to.be.true); + + it(`#"""b""c"`, () => expect(IdentifierUtils.isQuotedIdentifier(`#"""b""c"`), "should be true").to.be.true); + it(`#"a""b"""`, () => expect(IdentifierUtils.isQuotedIdentifier(`#"a""b"""`), "should be true").to.be.true); + it(`#"bar.1"`, () => expect(IdentifierUtils.isQuotedIdentifier(`#"foo"`), "should be true").to.be.true); + }); + + describe(`invalid`, () => { + it(`#"`, () => expect(IdentifierUtils.isGeneralizedIdentifier(`#"`), "should be false").to.be.false); + it(`""`, () => expect(IdentifierUtils.isGeneralizedIdentifier(`""`), "should be false").to.be.false); + }); + }); +}); diff --git a/src/test/libraryTest/textUtils.test.ts b/src/test/libraryTest/textUtils.test.ts new file mode 100644 index 00000000..52eb8e1e --- /dev/null +++ b/src/test/libraryTest/textUtils.test.ts @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +import "mocha"; +import { expect } from "chai"; + +import { TextUtils } from "../../powerquery-parser/language"; + +describe("TextUtils", () => { + it(`escape`, () => { + const unescaped: string = 'Encode \t\t and \r\n and "quotes" but not this #(tab)'; + const escaped: string = 'Encode #(tab)#(tab) and #(cr,lf) and ""quotes"" but not this #(#)(tab)'; + + expect(TextUtils.escape(unescaped)).to.equal(escaped); + expect(TextUtils.unescape(escaped)).to.equal(unescaped); + }); +}); diff --git a/src/test/libraryTest/textUtils.ts b/src/test/libraryTest/textUtils.ts deleted file mode 100644 index a02c5b64..00000000 --- a/src/test/libraryTest/textUtils.ts +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -import "mocha"; -import { expect } from "chai"; - -import { TextUtils } from "../../powerquery-parser/language"; - -describe("TextUtils", () => { - it(`escape`, () => { - const unescaped: string = 'Encode \t\t and \r\n and "quotes" but not this #(tab)'; - const escaped: string = 'Encode #(tab)#(tab) and #(cr,lf) and ""quotes"" but not this #(#)(tab)'; - - expect(TextUtils.escape(unescaped)).to.equal(escaped); - expect(TextUtils.unescape(escaped)).to.equal(unescaped); - }); - - describe(`isRegularIdentifier`, () => { - describe(`valid`, () => { - it(`foo`, () => expect(TextUtils.isRegularIdentifier("foo", false), "should be true").to.be.true); - it(`foo`, () => expect(TextUtils.isRegularIdentifier("foo", true), "should be true").to.be.true); - it(`foo.`, () => expect(TextUtils.isRegularIdentifier("foo.", true), "should be true").to.be.true); - it(`foo.1`, () => expect(TextUtils.isRegularIdentifier("foo.1", true), "should be true").to.be.true); - - it(`foo.bar123`, () => - expect(TextUtils.isRegularIdentifier("foo.bar123", true), "should be true").to.be.true); - }); - - describe(`invalid`, () => { - it(`foo.`, () => expect(TextUtils.isRegularIdentifier("foo.", false), "should be false").to.be.false); - }); - }); - - describe(`isGeneralizedIdentifier`, () => { - describe(`valid`, () => { - it("a", () => expect(TextUtils.isGeneralizedIdentifier("a"), "should be true").to.be.true); - it("a.1", () => expect(TextUtils.isGeneralizedIdentifier("a.1"), "should be true").to.be.true); - it("a b", () => expect(TextUtils.isGeneralizedIdentifier("a b"), "should be true").to.be.true); - }); - - describe(`invalid`, () => { - it("a..1", () => expect(TextUtils.isGeneralizedIdentifier("a..1"), "should be false").to.be.false); - }); - }); - - describe(`isQuotedIdentifier`, () => { - describe(`valid`, () => { - it(`#"foo"`, () => expect(TextUtils.isQuotedIdentifier(`#"foo"`), "should be true").to.be.true); - it(`#""`, () => expect(TextUtils.isQuotedIdentifier(`#""`), "should be true").to.be.true); - it(`#""""`, () => expect(TextUtils.isQuotedIdentifier(`#""""`), "should be true").to.be.true); - it(`#"a""b""c"`, () => expect(TextUtils.isQuotedIdentifier(`#"a""b""c"`), "should be true").to.be.true); - it(`#"""b""c"`, () => expect(TextUtils.isQuotedIdentifier(`#"""b""c"`), "should be true").to.be.true); - it(`#"a""b"""`, () => expect(TextUtils.isQuotedIdentifier(`#"a""b"""`), "should be true").to.be.true); - it(`#"bar.1"`, () => expect(TextUtils.isQuotedIdentifier(`#"foo"`), "should be true").to.be.true); - }); - - describe(`invalid`, () => { - it(`#"`, () => expect(TextUtils.isGeneralizedIdentifier(`#"`), "should be false").to.be.false); - it(`""`, () => expect(TextUtils.isGeneralizedIdentifier(`""`), "should be false").to.be.false); - }); - }); -});