diff --git a/src/project/plugins/file-plugins/flowr-analyzer-file-plugin.ts b/src/project/plugins/file-plugins/flowr-analyzer-file-plugin.ts index 1e8c217457a..c908bc6faed 100644 --- a/src/project/plugins/file-plugins/flowr-analyzer-file-plugin.ts +++ b/src/project/plugins/file-plugins/flowr-analyzer-file-plugin.ts @@ -9,7 +9,7 @@ import type { FlowrAnalyzerContext } from '../../context/flowr-analyzer-context' * Different from other plugins, these plugins trigger for each file that is loaded (if they {@link applies} to the file). * See the {@link FlowrAnalyzerFilesContext.addFile} for more information on how files are loaded and managed. * - * It is upt to the construction to ensure that no two file plugins {@link applies} to the same file, otherwise, the loading order + * It is up to the construction to ensure that no two file plugins {@link applies} to the same file, otherwise, the loading order * of these plugins will determine which plugin gets to process the file. * * See {@link DefaultFlowrAnalyzerFilePlugin} for the no-op default implementation. diff --git a/src/project/plugins/file-plugins/notebooks/flowr-analyzer-qmd-file-plugin.ts b/src/project/plugins/file-plugins/notebooks/flowr-analyzer-qmd-file-plugin.ts new file mode 100644 index 00000000000..80e905067bd --- /dev/null +++ b/src/project/plugins/file-plugins/notebooks/flowr-analyzer-qmd-file-plugin.ts @@ -0,0 +1,26 @@ +import type { PathLike } from 'fs'; +import { SemVer } from 'semver'; +import type { FlowrAnalyzerContext } from '../../../context/flowr-analyzer-context'; +import type { FlowrFileProvider } from '../../../context/flowr-file'; +import { FlowrAnalyzerFilePlugin } from '../flowr-analyzer-file-plugin'; +import { FlowrRMarkdownFile } from './flowr-rmarkdown-file'; + + +const QmdPattern = /\.qmd$/i; + +/** + * The Plugin provides support for Quarto R Markdown (.qmd) files + */ +export class FlowrAnalyzerRmdFilePlugin extends FlowrAnalyzerFilePlugin { + public readonly name = 'qmd-file-plugin'; + public readonly description = 'Parses Quarto R Markdown files'; + public readonly version = new SemVer('0.1.0'); + + public applies(file: PathLike): boolean { + return QmdPattern.test(file.toString()); + } + + protected process(analyzer: FlowrAnalyzerContext, args: FlowrFileProvider): FlowrRMarkdownFile { + return new FlowrRMarkdownFile(args); + } +} \ No newline at end of file diff --git a/src/project/plugins/file-plugins/notebooks/flowr-analyzer-rmd-file-plugin.ts b/src/project/plugins/file-plugins/notebooks/flowr-analyzer-rmd-file-plugin.ts new file mode 100644 index 00000000000..0cdea9ef022 --- /dev/null +++ b/src/project/plugins/file-plugins/notebooks/flowr-analyzer-rmd-file-plugin.ts @@ -0,0 +1,26 @@ +import type { PathLike } from 'fs'; +import { SemVer } from 'semver'; +import type { FlowrAnalyzerContext } from '../../../context/flowr-analyzer-context'; +import type { FlowrFileProvider } from '../../../context/flowr-file'; +import { FlowrAnalyzerFilePlugin } from '../flowr-analyzer-file-plugin'; +import { FlowrRMarkdownFile } from './flowr-rmarkdown-file'; + + +const RmdPattern = /\.rmd$/i; + +/** + * The Plugin provides support for R Markdown (.rmd) files + */ +export class FlowrAnalyzerRmdFilePlugin extends FlowrAnalyzerFilePlugin { + public readonly name = 'rmd-file-plugin'; + public readonly description = 'Parses R Markdown files'; + public readonly version = new SemVer('0.1.0'); + + public applies(file: PathLike): boolean { + return RmdPattern.test(file.toString()); + } + + protected process(analyzer: FlowrAnalyzerContext, args: FlowrFileProvider): FlowrRMarkdownFile { + return new FlowrRMarkdownFile(args); + } +} \ No newline at end of file diff --git a/src/project/plugins/file-plugins/notebooks/flowr-rmarkdown-file.ts b/src/project/plugins/file-plugins/notebooks/flowr-rmarkdown-file.ts new file mode 100644 index 00000000000..2ab0864ad1f --- /dev/null +++ b/src/project/plugins/file-plugins/notebooks/flowr-rmarkdown-file.ts @@ -0,0 +1,159 @@ +import type { FlowrFileProvider } from '../../../context/flowr-file'; +import { FlowrFile } from '../../../context/flowr-file'; +import { guard } from '../../../../util/assert'; +import { type Node , Parser } from 'commonmark'; +import matter from 'gray-matter'; + +/** + * This decorates a text file and parses its contents as a R Markdown file. + * Finnaly, it provides access to the single cells, and all cells fused together as one R file. + */ +export class FlowrRMarkdownFile extends FlowrFile { + private data?: RmdInfo; + private readonly wrapped: FlowrFileProvider; + + /** + * Prefer the static {@link FlowrRMarkdownFile.from} method + * @param file - the file to load as R Markdown + */ + constructor(file: FlowrFileProvider) { + super(file.path(), file.role); + this.wrapped = file; + } + + get rmd(): RmdInfo { + if(!this.data) { + this.loadContent(); + } + guard(this.data); + return this.data; + } + + /** + * Loads and parses the content of the wrapped file. + * @returns RmdInfo + */ + protected loadContent(): string { + this.data = parseRMarkdownFile(this.wrapped.content()); + return this.data.content; + } + + public static from(file: FlowrFileProvider | FlowrRMarkdownFile): FlowrRMarkdownFile { + return file instanceof FlowrRMarkdownFile ? file : new FlowrRMarkdownFile(file); + } +} + +export interface CodeBlock { + options: string, + code: string, +} + +export type CodeBlockEx = CodeBlock & { + startpos: { line: number, col: number } +} + +export interface RmdInfo { + content: string + blocks: CodeBlock[] + options: object +} + +/** + * Parse the contents of a RMarkdown file into complete code and blocks + * @param raw - the raw file content + * @returns Rmd Info + */ +export function parseRMarkdownFile(raw: string): RmdInfo { + // Read and Parse Markdown + const parser = new Parser(); + const ast = parser.parse(raw); + + // Parse Frontmatter + const frontmatter = matter(raw); + + // Parse Codeblocks + const walker = ast.walker(); + const blocks: CodeBlockEx[] = []; + let e; + while((e = walker.next())) { + const node = e.node; + if(!isRCodeBlock(node)) { + continue; + } + + blocks.push({ + code: node.literal, + options: parseCodeBlockOptions(node.info, node.literal), + startpos: { line: node.sourcepos[0][0] + 1, col: 0 } + }); + } + + return { + content: restoreBlocksWithoutMd(blocks, countNewlines(raw)), + // eslint-disable-next-line unused-imports/no-unused-vars + blocks: blocks.map(({ startpos, ...block }) => block), + options: frontmatter.data + }; +} + +const RTagRegex = /{[rR](?:[\s,][^}]*)?}/; + +/** + * Checks whether a CommonMark node is an R code block + */ +export function isRCodeBlock(node: Node): node is Node & { literal: string, info: string } { + return node.type === 'code_block' && node.literal !== null && node.info !== null && RTagRegex.test(node.info); +} + +const LineRegex = /\r\n|\r|\n/; +function countNewlines(str: string): number { + return str.split(LineRegex).length - 1; +} + +/** + * Restores an Rmd file from code blocks, filling non-code lines with empty lines + */ +export function restoreBlocksWithoutMd(blocks: CodeBlockEx[], totalLines: number): string { + let line = 1; + let output = ''; + + const goToLine = (n: number) => { + const diff = n - line; + guard(diff >= 0); + line += diff; + output += '\n'.repeat(diff); + }; + + for(const block of blocks) { + goToLine(block.startpos.line); + output += block.code; + line += countNewlines(block.code); + } + + // Add remainder of file + goToLine(totalLines + 1); + + return output; +} + +/** + * Parses the options of an R code block from its header and content + */ +export function parseCodeBlockOptions(header: string, content: string): string { + let opts = header.length === 3 // '{r}' => header.length=3 (no options in header) + ? '' + : header.substring(3, header.length-1).trim(); + + const lines = content.split('\n'); + for(const line of lines) { + if(!line.trim().startsWith('#|')) { + break; + } + + const opt = line.substring(3); + + opts += opts.length === 0 ? opt : `, ${opt}`; + } + + return opts; +} diff --git a/src/util/formats/adapter-format.ts b/src/util/formats/adapter-format.ts deleted file mode 100644 index 02f0ed18b00..00000000000 --- a/src/util/formats/adapter-format.ts +++ /dev/null @@ -1,9 +0,0 @@ -import type { RParseRequest } from '../../r-bridge/retriever'; - -export interface FileAdapter { - convertRequest(request: RParseRequest): RParseRequest -} - -export type SupportedFormats = 'R' | 'Rmd'; - -export type SupportedDocumentTypes = '.r' | '.rmd'; diff --git a/src/util/formats/adapter.ts b/src/util/formats/adapter.ts deleted file mode 100644 index 00d48c982fc..00000000000 --- a/src/util/formats/adapter.ts +++ /dev/null @@ -1,63 +0,0 @@ -import type { RParseRequest, RParseRequestFromFile, RParseRequestFromText } from '../../r-bridge/retriever'; -import type { FileAdapter, SupportedDocumentTypes, SupportedFormats } from './adapter-format'; -import { RAdapter } from './adapters/r-adapter'; -import path from 'path'; -import { RmdAdapter } from './adapters/rmd-adapter'; - -export const FileAdapters = { - 'R': RAdapter, - 'Rmd': RmdAdapter -} as const satisfies Record; - -export const DocumentTypeToFormat = { - '.r': 'R', - '.rmd': 'Rmd' -} as const satisfies Record; - -export type AdapterReturnTypes = ReturnType; - -/** - * Produce a parse request from a file path - */ -export function requestFromFile(path: string): AdapterReturnTypes { - const baseRequest = { - request: 'file', - content: path - } satisfies RParseRequestFromFile; - - - const type = inferFileType(baseRequest); - return FileAdapters[type].convertRequest(baseRequest); -} - -/** - * Produce a parse request from a text input - */ -export function requestFromText(text: string, typeHint?: SupportedFormats): AdapterReturnTypes { - const baseRequest = { - request: 'text', - content: text, - info: typeHint ? { type: typeHint } : undefined - } satisfies RParseRequestFromText; - - const type = inferFileType(baseRequest); - return FileAdapters[type].convertRequest(baseRequest); -} - -/** - * Infer the file type from a parse request, using file extension or info hints - */ -export function inferFileType(request: RParseRequest): keyof typeof FileAdapters { - if(request.request === 'text') { - return request.info ? request.info.type : 'R'; - } - - const type = path.extname(request.content).toLowerCase(); - - // Fallback to default if unknown - if(!Object.hasOwn(DocumentTypeToFormat, type)) { - return 'R'; - } - - return DocumentTypeToFormat[type as keyof typeof DocumentTypeToFormat]; -} diff --git a/src/util/formats/adapters/r-adapter.ts b/src/util/formats/adapters/r-adapter.ts deleted file mode 100644 index 0af0b1569ae..00000000000 --- a/src/util/formats/adapters/r-adapter.ts +++ /dev/null @@ -1,6 +0,0 @@ -import type { RParseRequest } from '../../../r-bridge/retriever'; -import type { FileAdapter } from '../adapter-format'; - -export const RAdapter = { - convertRequest: (request: RParseRequest) => request -} satisfies FileAdapter; diff --git a/src/util/formats/adapters/rmd-adapter.ts b/src/util/formats/adapters/rmd-adapter.ts deleted file mode 100644 index 286371dd525..00000000000 --- a/src/util/formats/adapters/rmd-adapter.ts +++ /dev/null @@ -1,128 +0,0 @@ -import fs from 'fs'; -import { type Node , Parser } from 'commonmark'; -import matter from 'gray-matter'; -import { guard } from '../../assert'; -import type { FileAdapter } from '../adapter-format'; -import type { RParseRequest, RParseRequestFromText } from '../../../r-bridge/retriever'; - -export interface CodeBlock { - options: string, - code: string, -} - -export type CodeBlockEx = CodeBlock & { - startpos: { line: number, col: number } -} - -export interface RmdInfo { - type: 'Rmd' - blocks: CodeBlock[] - options: object -} - -export const RmdAdapter = { - convertRequest: (request: RParseRequest) => { - // Read and Parse Markdown - const raw = request.request === 'text' - ? request.content - : fs.readFileSync(request.content, 'utf-8').toString(); - - const parser = new Parser(); - const ast = parser.parse(raw); - - // Parse Frontmatter - const frontmatter = matter(raw); - - // Parse Codeblocks - const walker = ast.walker(); - const blocks: CodeBlockEx[] = []; - let e; - while((e = walker.next())) { - const node = e.node; - if(!isRCodeBlock(node)) { - continue; - } - - blocks.push({ - code: node.literal, - options: parseCodeBlockOptions(node.info, node.literal), - startpos: { line: node.sourcepos[0][0] + 1, col: 0 } - }); - } - - return { - request: 'text', - content: restoreBlocksWithoutMd(blocks, countNewlines(raw)), - info: { - // eslint-disable-next-line unused-imports/no-unused-vars - blocks: blocks.map(({ startpos, ...block }) => block), - options: frontmatter.data, - type: 'Rmd' - } - } as RParseRequestFromText; - - } -} satisfies FileAdapter; - - -const RTagRegex = /{[rR](?:[\s,][^}]*)?}/; - -/** - * Checks whether a CommonMark node is an R code block - */ -export function isRCodeBlock(node: Node): node is Node & { literal: string, info: string } { - return node.type === 'code_block' && node.literal !== null && node.info !== null && RTagRegex.test(node.info); -} - -const LineRegex = /\r\n|\r|\n/; -function countNewlines(str: string): number { - return str.split(LineRegex).length - 1; -} - -/** - * Restores an Rmd file from code blocks, filling non-code lines with empty lines - */ -export function restoreBlocksWithoutMd(blocks: CodeBlockEx[], totalLines: number): string { - let line = 1; - let output = ''; - - const goToLine = (n: number) => { - const diff = n - line; - guard(diff >= 0); - line += diff; - output += '\n'.repeat(diff); - }; - - for(const block of blocks) { - goToLine(block.startpos.line); - output += block.code; - line += countNewlines(block.code); - } - - // Add remainder of file - goToLine(totalLines + 1); - - return output; -} - -/** - * Parses the options of an R code block from its header and content - */ -export function parseCodeBlockOptions(header: string, content: string): string { - let opts = header.length === 3 // '{r}' => header.length=3 (no options in header) - ? '' - : header.substring(3, header.length-1).trim(); - - const lines = content.split('\n'); - for(const line of lines) { - if(!line.trim().startsWith('#|')) { - break; - } - - const opt = line.substring(3); - - opts += opts.length === 0 ? opt : `, ${opt}`; - } - - return opts; -} \ No newline at end of file