Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@microsoft/powerquery-parser",
"version": "0.17.0",
"version": "0.18.0",
"description": "A parser for the Power Query/M formula language.",
"author": "Microsoft",
"license": "MIT",
Expand Down
255 changes: 218 additions & 37 deletions src/powerquery-parser/language/identifierUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,93 @@ import { Assert, Pattern, StringUtils } from "../common";

export enum IdentifierKind {
Generalized = "Generalized",
GeneralizedWithQuotes = "GeneralizedWithQuotes",
Invalid = "Invalid",
Quote = "Quote",
QuoteRequired = "QuoteRequired",
Regular = "Regular",
RegularWithQuotes = "RegularWithQuotes",
RegularWithRequiredQuotes = "RegularWithRequiredQuotes",
}

// Assuming the text is a quoted identifier, finds the quotes that enclose the identifier.
// Otherwise returns undefined.
export function findQuotedIdentifierQuotes(text: string, index: number): StringUtils.FoundQuotes | undefined {
if (text[index] !== "#") {
return undefined;
export interface IdentifierUtilsOptions {
readonly allowGeneralizedIdentifier?: boolean;
readonly allowTrailingPeriod?: boolean;
}

// Identifiers have multiple forms that can be used interchangeably.
// For example, if you have `[key = 1]`, you can use `key` or `#""key""`.
// The `getAllowedIdentifiers` function returns all the forms of the identifier that are allowed in the current context.
export function getAllowedIdentifiers(text: string, options?: IdentifierUtilsOptions): ReadonlyArray<string> {
const allowGeneralizedIdentifier: boolean =
options?.allowGeneralizedIdentifier ?? DefaultallowGeneralizedIdentifier;

const quotedAndUnquoted: TQuotedAndUnquoted | undefined = getQuotedAndUnquoted(text, options);

if (quotedAndUnquoted === undefined) {
return [];
}

return StringUtils.findQuotes(text, index + 1);
switch (quotedAndUnquoted.identifierKind) {
case IdentifierKind.Generalized:
case IdentifierKind.GeneralizedWithQuotes:
return allowGeneralizedIdentifier ? [quotedAndUnquoted.withQuotes, quotedAndUnquoted.withoutQuotes] : [];

case IdentifierKind.Invalid:
return [];

case IdentifierKind.RegularWithQuotes:
return [quotedAndUnquoted.withQuotes, quotedAndUnquoted.withoutQuotes];

case IdentifierKind.RegularWithRequiredQuotes:
return [quotedAndUnquoted.withQuotes];

case IdentifierKind.Regular:
return [quotedAndUnquoted.withoutQuotes, quotedAndUnquoted.withQuotes];

default:
throw Assert.isNever(quotedAndUnquoted);
}
}

// Determines what kind of identifier the text is.
// It's possible that the text is a partially completed identifier,
// which is why we have the `allowTrailingPeriod` parameter.
export function getIdentifierKind(text: string, allowTrailingPeriod: boolean): IdentifierKind {
if (isRegularIdentifier(text, allowTrailingPeriod)) {
// An identifier can have multiple forms:
// - Regular: `foo`
// - Regular with quotes: `#""foo""`
// - Regular with required quotes: `#""foo bar""`
// - Regular with required quotes is used when the identifier has spaces or special characters,
// and when generalized identifiers are not allowed.
// - Generalized: `foo bar`
// - Generalized with quotes: `#""foo bar""`
// - Invalid: `foo..bar`
export function getIdentifierKind(text: string, options?: IdentifierUtilsOptions): IdentifierKind {
const allowGeneralizedIdentifier: boolean =
options?.allowGeneralizedIdentifier ?? DefaultallowGeneralizedIdentifier;

if (isRegularIdentifier(text, options)) {
return IdentifierKind.Regular;
} else if (isQuotedIdentifier(text)) {
return isRegularIdentifier(text.slice(2, -1), false) ? IdentifierKind.Quote : IdentifierKind.QuoteRequired;
} else if (isGeneralizedIdentifier(text)) {
} else if (allowGeneralizedIdentifier && isGeneralizedIdentifier(text)) {
return IdentifierKind.Generalized;
}
// If the identifier is quoted it's either:
// - a regular identifier with quotes,
// - a generalized identifier with quotes,
else if (isQuotedIdentifier(text)) {
const stripped: string = stripQuotes(text);

if (isRegularIdentifier(stripped, options)) {
return IdentifierKind.RegularWithQuotes;
} else if (isGeneralizedIdentifier(stripped) && allowGeneralizedIdentifier) {
return IdentifierKind.GeneralizedWithQuotes;
} else {
return IdentifierKind.RegularWithRequiredQuotes;
}
} else {
return IdentifierKind.Invalid;
}
}

// Assuming the text is an identifier, returns the length of the identifier.
export function getIdentifierLength(text: string, index: number, allowTrailingPeriod: boolean): number | undefined {
// I'd prefer if this was internal, but it's used by the lexer so it's marked as public.
// Returns the length of the identifier starting at the given index.
export function getIdentifierLength(text: string, index: number, options?: IdentifierUtilsOptions): number | undefined {
const allowTrailingPeriod: boolean = options?.allowTrailingPeriod ?? DefaultAllowTrailingPeriod;
const startingIndex: number = index;
const textLength: number = text.length;

Expand All @@ -63,6 +117,32 @@ export function getIdentifierLength(text: string, index: number, allowTrailingPe
break;

case IdentifierRegexpState.RegularIdentifier:
if (text[index] === ".") {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you give an example of some of the states here? for example, i don't understand the "2 sequential periods == done" thing

const nextChr: string | undefined = text[index + 1];

// If the last character is a period
if (nextChr === undefined) {
// If we allow trailing period, we can consider it part of the identifier.
if (allowTrailingPeriod) {
index += 1;
}
// Else we are done.
else {
state = IdentifierRegexpState.Done;
}
}
// Else if it's two sequential periods, we are done.
else if (nextChr === ".") {
state = IdentifierRegexpState.Done;
}
// Else if it's a single period followed by a potentially valid identifier character.
else {
index += 1;
}

break;
}

// Don't consider `..` or `...` part of an identifier.
if (allowTrailingPeriod && text[index] === "." && text[index + 1] !== ".") {
index += 1;
Expand Down Expand Up @@ -91,8 +171,63 @@ export function getIdentifierLength(text: string, index: number, allowTrailingPe
return index !== startingIndex ? index - startingIndex : undefined;
}

// Removes the quotes from a quoted identifier if possible.
// When given an invalid identifier, returns undefined.
export function getNormalizedIdentifier(text: string, options?: IdentifierUtilsOptions): string | undefined {
const allowGeneralizedIdentifier: boolean =
options?.allowGeneralizedIdentifier ?? DefaultallowGeneralizedIdentifier;

const quotedAndUnquoted: TQuotedAndUnquoted = getQuotedAndUnquoted(text, options);

if (quotedAndUnquoted.identifierKind === IdentifierKind.Invalid) {
return undefined;
}

// Validate a generalized identifier is allowed in this context.
if (quotedAndUnquoted.identifierKind === IdentifierKind.Generalized && !allowGeneralizedIdentifier) {
return undefined;
}

// Prefer without quotes if it exists.
return quotedAndUnquoted.withoutQuotes ?? quotedAndUnquoted.withQuotes;
}

interface IQuotedAndUnquoted<
TKind extends IdentifierKind,
TWithQuotes extends string | undefined,
TWithoutQuotes extends string | undefined,
> {
readonly identifierKind: TKind;
readonly withQuotes: TWithQuotes;
readonly withoutQuotes: TWithoutQuotes;
}

type TQuotedAndUnquoted =
| IQuotedAndUnquoted<IdentifierKind.Generalized, string, string>
| IQuotedAndUnquoted<IdentifierKind.GeneralizedWithQuotes, string, string>
| IQuotedAndUnquoted<IdentifierKind.Invalid, undefined, undefined>
| IQuotedAndUnquoted<IdentifierKind.RegularWithQuotes, string, string>
| IQuotedAndUnquoted<IdentifierKind.RegularWithRequiredQuotes, string, undefined>
| IQuotedAndUnquoted<IdentifierKind.Regular, string, string>;

const enum IdentifierRegexpState {
Done = "Done",
RegularIdentifier = "RegularIdentifier",
Start = "Start",
}

// Finds the locations of quotes in a quoted identifier.
// Returns undefined if the identifier is not quoted.
function findQuotedIdentifierQuotes(text: string, index: number): StringUtils.FoundQuotes | undefined {
if (text[index] !== "#") {
return undefined;
}

return StringUtils.findQuotes(text, index + 1);
}

// Assuming the text is a generalized identifier, returns the length of the identifier.
export function getGeneralizedIdentifierLength(text: string, index: number): number | undefined {
function getGeneralizedIdentifierLength(text: string, index: number): number | undefined {
const startingIndex: number = index;
const textLength: number = text.length;

Expand Down Expand Up @@ -133,31 +268,77 @@ export function getGeneralizedIdentifierLength(text: string, index: number): num
return index !== startingIndex ? index - startingIndex : undefined;
}

export function isGeneralizedIdentifier(text: string): boolean {
return getGeneralizedIdentifierLength(text, 0) === text.length;
// Returns the quoted and unquoted versions of the identifier (if applicable).
function getQuotedAndUnquoted(text: string, options?: IdentifierUtilsOptions): TQuotedAndUnquoted {
const identifierKind: IdentifierKind = getIdentifierKind(text, options);

switch (identifierKind) {
case IdentifierKind.Generalized:
return {
identifierKind,
withoutQuotes: text,
withQuotes: insertQuotes(text),
};

case IdentifierKind.GeneralizedWithQuotes:
return {
identifierKind,
withoutQuotes: stripQuotes(text),
withQuotes: text,
};

case IdentifierKind.Invalid:
return {
identifierKind,
withoutQuotes: undefined,
withQuotes: undefined,
};

case IdentifierKind.RegularWithQuotes:
return {
identifierKind,
withoutQuotes: stripQuotes(text),
withQuotes: text,
};

case IdentifierKind.RegularWithRequiredQuotes:
return {
identifierKind,
withoutQuotes: undefined,
withQuotes: text,
};

case IdentifierKind.Regular:
return {
identifierKind,
withoutQuotes: text,
withQuotes: insertQuotes(text),
};

default:
throw Assert.isNever(identifierKind);
}
}

export function isRegularIdentifier(text: string, allowTrailingPeriod: boolean): boolean {
return getIdentifierLength(text, 0, allowTrailingPeriod) === text.length;
function insertQuotes(text: string): string {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we name this function "makeQuoted" or something instead? "insertQuotes" makes it sound like it will insert anywhere.

return `#"${text}"`;
}

export function isQuotedIdentifier(text: string): boolean {
return findQuotedIdentifierQuotes(text, 0) !== undefined;
function isGeneralizedIdentifier(text: string): boolean {
return text.length > 0 && getGeneralizedIdentifierLength(text, 0) === text.length;
}

// Removes the quotes from a quoted identifier if possible.
export function normalizeIdentifier(text: string): string {
if (isQuotedIdentifier(text)) {
const stripped: string = text.slice(2, -1);
function isRegularIdentifier(text: string, options?: IdentifierUtilsOptions): boolean {
return text.length > 0 && getIdentifierLength(text, 0, options) === text.length;
}

return isRegularIdentifier(stripped, false) ? stripped : text;
} else {
return text;
}
function isQuotedIdentifier(text: string): boolean {
return findQuotedIdentifierQuotes(text, 0) !== undefined;
}

const enum IdentifierRegexpState {
Done = "Done",
RegularIdentifier = "RegularIdentifier",
Start = "Start",
function stripQuotes(text: string): string {
return text.slice(2, -1);
}

const DefaultAllowTrailingPeriod: boolean = false;
const DefaultallowGeneralizedIdentifier: boolean = false;
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export function isEqualType(left: Type.TPowerQueryType, right: Type.TPowerQueryT
}

export function isEqualFunctionParameter(left: Type.FunctionParameter, right: Type.FunctionParameter): boolean {
return left.isNullable !== right.isNullable || left.isOptional !== right.isOptional || left.type !== right.type;
return left.isNullable === right.isNullable && left.isOptional === right.isOptional && left.type === right.type;
}

export function isEqualFunctionSignature(
Expand Down
5 changes: 3 additions & 2 deletions src/powerquery-parser/lexer/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,6 @@ function readKeyword(text: string, lineNumber: number, positionStart: number, lo

function readKeywordHelper(text: string, currentPosition: number): Token.LineToken | undefined {
const identifierPositionStart: number = text[currentPosition] === "#" ? currentPosition + 1 : currentPosition;

const identifierPositionEnd: number | undefined = indexOfIdentifierEnd(text, identifierPositionStart);

if (identifierPositionEnd === undefined) {
Expand Down Expand Up @@ -1137,7 +1136,9 @@ function indexOfRegexEnd(pattern: RegExp, text: string, positionStart: number):
}

function indexOfIdentifierEnd(text: string, positionStart: number): number | undefined {
const length: number | undefined = IdentifierUtils.getIdentifierLength(text, positionStart, true);
const length: number | undefined = IdentifierUtils.getIdentifierLength(text, positionStart, {
allowTrailingPeriod: true,
});

return length !== undefined ? positionStart + length : undefined;
}
Expand Down
Loading
Loading