From 4cf8802aae4829a939c96c70c667574f1f116c5a Mon Sep 17 00:00:00 2001 From: Zac Spitzer Date: Wed, 30 Apr 2025 14:09:00 +0200 Subject: [PATCH] LDEV-171 add lucee docs basic export for AI / LLM RAG https://luceeserver.atlassian.net/browse/LD-171 --- api/build/BuildRunner.cfc | 14 +- api/rendering/WikiLinksRenderer.cfc | 6 +- builders/html/Builder.cfc | 232 +++++++++++++++++++++----- builders/html/layouts/breadcrumbs.cfm | 9 +- builders/html/layouts/html_head.cfm | 67 ++++++++ builders/html/layouts/main.cfm | 66 +------- builders/html/layouts/main_basic.cfm | 39 +++++ server/Application.cfc | 3 +- 8 files changed, 317 insertions(+), 119 deletions(-) create mode 100644 builders/html/layouts/html_head.cfm create mode 100644 builders/html/layouts/main_basic.cfm diff --git a/api/build/BuildRunner.cfc b/api/build/BuildRunner.cfc index 52fbcc3d0..a00f42a64 100644 --- a/api/build/BuildRunner.cfc +++ b/api/build/BuildRunner.cfc @@ -48,8 +48,8 @@ component accessors=true { var builders = []; for( var dir in dirs ){ - if ( dir.type == "dir" - && FileExists( dir.directory & "/#dir.name#/Builder.cfc" ) + if ( dir.type == "dir" + && FileExists( dir.directory & "/#dir.name#/Builder.cfc" ) && dir.name neq "dash" ) { builders.append( dir.name ); } @@ -88,14 +88,18 @@ component accessors=true { arguments.builder.injectMethod = this.injectMethod; - arguments.builder.injectMethod( "renderLinks", function( required string text ){ - return new api.rendering.WikiLinksRenderer( docTree=variables.docTree ).renderLinks( text=arguments.text, builder=variables._builder ); + arguments.builder.injectMethod( "renderLinks", function( required string text, required struct args ){ + return new api.rendering.WikiLinksRenderer( docTree=variables.docTree ).renderLinks( + text = arguments.text, + builder = variables._builder, + args = arguments.args + ); } ); arguments.builder.injectMethod( "renderTemplate", function( required string template, struct args={} ){ var renderer = new api.rendering.TemplateRenderer(); var rendered = renderer.render( argumentCollection=arguments, template=_rootPathForRenderer & arguments.template ); - return builder.renderLinks( rendered ); + return builder.renderLinks( rendered, args ); } ); StructDelete( arguments.builder, "injectMethod" ); diff --git a/api/rendering/WikiLinksRenderer.cfc b/api/rendering/WikiLinksRenderer.cfc index 03a2e8e77..2c084071c 100644 --- a/api/rendering/WikiLinksRenderer.cfc +++ b/api/rendering/WikiLinksRenderer.cfc @@ -2,14 +2,16 @@ component accessors=true { property name="docTree"; - public string function renderLinks( required string text, required any builder ) { + public string function renderLinks( required string text, required any builder, required struct args ) { var rendered = arguments.text; var link = ""; var startPos = 1; do { link = _getNextLink( rendered, startPos ); if ( !IsNull( link ) ) { - rendered = Replace( rendered, link.rawMatch, arguments.builder.renderLink( link.page ?: NullValue(), link.title ), "all" ); + rendered = Replace( rendered, link.rawMatch, + arguments.builder.renderLink( link.page ?: NullValue(), link.title, args ), + "all" ); startPos = link.nextStartPos; } } while( !IsNull( link ) ); diff --git a/builders/html/Builder.cfc b/builders/html/Builder.cfc index a930743e5..52b7e376f 100644 --- a/builders/html/Builder.cfc +++ b/builders/html/Builder.cfc @@ -1,26 +1,4 @@ component { - public string function renderLink( any page, required string title ) { - if ( IsNull( arguments.page ) ) { - if (arguments.title.left(4) eq "http"){ - return '#HtmlEditFormat( arguments.title )#'; - } else { - request.logger (text="Missing docs link: [[#HtmlEditFormat( arguments.title )#]]", type="WARN"); - return '#HtmlEditFormat( arguments.title )#'; - } - } - var link = arguments.page.getPath() & ".html"; - return '#HtmlEditFormat( arguments.title )#'; - } - - public string function _getIssueTrackerLink(required string name) { - var link = Replace( new api.build.BuildProperties().getIssueTrackerLink(), "{search}", urlEncodedFormat(arguments.name) ) - return 'Search Issue Tracker '; - } - - public string function _getTestCasesLink(required string name) { - var link = Replace( new api.build.BuildProperties().getTestCasesLink(), "{search}", urlEncodedFormat(arguments.name) ) - return 'Search Lucee Test Cases (good for further, detailed examples)'; - } public void function build( required any docTree, required string buildDirectory, required numeric threads) { var pagePaths = arguments.docTree.getPageCache().getPages(); @@ -28,14 +6,37 @@ component { request.filesWritten = 0; request.filesToWrite = StructCount(pagePaths); + var simpleBuildDirectory = arguments.buildDirectory & "Basic"; + + // purge previous build directory contents + loop array="#[buildDirectory, simpleBuildDirectory]#" item="local.dir" { + if ( directoryExists( dir ) ) + DirectoryDelete(dir, true); + directoryCreate( dir ); + } request.logger (text="Builder HTML directory: #arguments.buildDirectory#"); + request.logger (text="Builder Simple HTML directory: #simplebuildDirectory#"); new api.parsers.ParserFactory().getMarkdownParser(); // so the parser in use shows up in logs //for ( var path in pagePaths ) { each(pagePaths, function(path){ var tick = getTickCount(); - _writePage( pagePaths[arguments.path].page, buildDirectory, docTree ); + var page = pagePaths[ arguments.path ].page; + // write out full html page + var pageContent = renderPageContent( page, docTree, false, {} ); + _writePage( page, buildDirectory, docTree, pageContent, {} ); + + // write out reduced stripped down basic html page with no navigation etc + var basicArgs = { + //"no_css": true, + "no_google_analytics": true, + "no_navigation": true, + "mainTemplate": "main_basic.cfm", + "base_href": _calcRelativeBaseHref( page, simpleBuildDirectory ) + }; + var basicPageContent = renderPageContent( page, docTree, false, basicArgs ); + _writePage( page, simpleBuildDirectory, docTree, basicPageContent, basicArgs ); request.filesWritten++; if ((request.filesWritten mod 100) eq 0){ @@ -51,13 +52,19 @@ component { _copyStaticAssets( arguments.buildDirectory ); _copySiteImages( arguments.buildDirectory, arguments.docTree ); _writeSearchIndex( arguments.docTree, arguments.buildDirectory ); + + _copyStaticAssets( simpleBuildDirectory); + _zipBasicPages( arguments.buildDirectory, simpleBuildDirectory, "lucee-docs-basic" ); } - public string function renderPage( required any page, required any docTree, required boolean edit ){ + public string function renderPageContent( required any page, required any docTree, + required boolean edit, required struct htmlOpts ){ try { - var renderedPage = renderTemplate( - template = "templates/#_getPageLayoutFile( arguments.page )#.cfm" - , args = { page = arguments.page, docTree=arguments.docTree, edit=arguments.edit } + var contentArgs = { page = arguments.page, docTree=arguments.docTree, edit=arguments.edit, htmlOpts=arguments.htmlOpts }; + + var pageContent = renderTemplate( + template = "templates/#_getPageLayoutFile( arguments.page )#.cfm" + , args = contentArgs , helpers = "/builders/html/helpers" ); } catch( any e ) { @@ -66,7 +73,13 @@ component { e.additional.luceeDocsPageId = arguments.page.getid(); rethrow; } - var crumbs = arguments.docTree.getPageBreadCrumbs(arguments.page); + return pageContent; + } + + public string function renderPage( required any page, required any docTree, + required string pageContent, required boolean edit, required struct htmlOptions ){ + + var crumbs = arguments.docTree.getPageBreadCrumbs( arguments.page ); var excludeLinkMap = {}; // tracks links to exclude from See also var links = []; var categories = []; @@ -112,21 +125,50 @@ component { break; } } + + var template = arguments.htmlOptions.mainTemplate ?: "main.cfm"; + var crumbsArgs = { + crumbs:crumbs, + page: arguments.page, + docTree: arguments.docTree, + categories: categories.sort("textNoCase"), + edit: arguments.edit, + htmlOpts: arguments.htmlOptions + }; + var seeAlsoArgs = { + links= links, + htmlOpts=arguments.htmlOptions + } + try { + + var args = { + body = Trim( arguments.pageContent ) + , htmlOpts = arguments.htmlOptions + , page = arguments.page + , edit = arguments.edit + , crumbs = renderTemplate( template="layouts/breadcrumbs.cfm", helpers = "/builders/html/helpers", + args = crumbsArgs + ) + , seeAlso = renderTemplate( template="layouts/seeAlso.cfm" , helpers = "/builders/html/helpers", + args = seeAlsoArgs ) + }; + + if ( !structKeyExists(arguments.htmlOptions, "no_navigation" ) ){ + args.navTree = renderTemplate( template="layouts/sideNavTree.cfm", helpers = "/builders/html/helpers", args={ + crumbs=crumbs, + docTree=arguments.docTree, + pageLineage=arguments.page.getLineage(), + pageLineageMap=arguments.page.getPageLineageMap() + } ); + } else { + args.navTree = ""; + } + var pageContent = renderTemplate( - template = "layouts/main.cfm" + template = "layouts/#template#" , helpers = "/builders/html/helpers" - , args = { - body = Trim( renderedPage ) - , page = arguments.page - , edit = arguments.edit - , crumbs = renderTemplate( template="layouts/breadcrumbs.cfm", helpers = "/builders/html/helpers", args={ crumbs=crumbs, page=arguments.page, docTree=arguments.docTree, categories=categories.sort("textNoCase"), edit= arguments.edit } ) - , navTree = renderTemplate( template="layouts/sideNavTree.cfm", helpers = "/builders/html/helpers", args={ - crumbs=crumbs, docTree=arguments.docTree, pageLineage=arguments.page.getLineage(), pageLineageMap=arguments.page.getPageLineageMap() - } ) - , seeAlso = renderTemplate( template="layouts/seeAlso.cfm" , helpers = "/builders/html/helpers", - args={ links=links } ) - } + , args = args ); } catch( any e ) { //e.additional.luceeDocsPage = arguments.page; @@ -173,18 +215,24 @@ component { } // PRIVATE HELPERS - private void function _writePage( required any page, required string buildDirectory, required any docTree ) { + private void function _writePage( required any page, required string buildDirectory, + required any docTree, required string pageContent, required struct htmlOptions ) { var filePath = variables._getHtmlFilePath( arguments.page, arguments.buildDirectory ); var fileDirectory = GetDirectoryFromPath( filePath ); - //var starttime = getTickCount(); lock name="CreateDirectory" timeout=10 { if ( !DirectoryExists( fileDirectory ) ) { DirectoryCreate( fileDirectory ); } } - var pageContent = variables.cleanHtml(variables.renderPage( arguments.page, arguments.docTree, false )); - FileWrite( filePath, pageContent ); + + var html = variables.cleanHtml( + variables.renderPage( arguments.page, + arguments.docTree, arguments.pageContent, false , + arguments.htmlOptions, arguments.buildDirectory + ) + ); + FileWrite( filePath, html ); } // regex strips left over whitespace multiple new lines @@ -201,6 +249,18 @@ component { return arguments.buildDirectory & arguments.page.getPath() & ".html"; } + private string function _calcRelativeBaseHref( required any page, required string buildDirectory ) { + var path = arguments.page.getPath(); + var depth = listLen( path, "/" ); + if ( depth eq 1) + return ""; + var baseHref = []; + loop times="#depth-1#" { + arrayAppend(baseHref, ".."); + } + return ArrayToList(baseHref, "/"); + } + private void function _copyStaticAssets( required string buildDirectory ) { updateHighlightsCss( arguments.buildDirectory ); var subdirs = directoryList(path=GetDirectoryFromPath( GetCurrentTemplatePath() ) & "/assets", type="dir", recurse="false"); @@ -214,8 +274,10 @@ component { private function updateHighlightsCss( required string buildDirectory ){ var highlighter = new api.rendering.Pygments(); - var cssFile = path=GetDirectoryFromPath( GetCurrentTemplatePath() ) & "/assets/css/highlight.css"; - fileWrite( cssFile, highlighter.getCss() ); + var cssFile = GetDirectoryFromPath( GetCurrentTemplatePath() ) & "/assets/css/highlight.css"; + var css = highlighter.getCss(); + if ( trim( css ) neq trim( fileRead( cssFile ) ) ) + fileWrite( cssFile, highlighter.getCss() ); // only update if changed } private void function _copySiteImages( required string buildDirectory, required any docTree ) { @@ -305,4 +367,82 @@ component { return '#chr(10)#' & ArrayToList(siteMap, chr(10) ) & '#chr(10)#'; } + + public string function renderLink( any page, required string title, required struct args ) { + if ( IsNull( arguments.page ) ) { + if (arguments.title.left(4) eq "http"){ + return '#HtmlEditFormat( arguments.title )#'; + } else { + request.logger (text="Missing docs link: [[#HtmlEditFormat( arguments.title )#]]", type="WARN"); + return '#HtmlEditFormat( arguments.title )#'; + } + } + var link = arguments.page.getPath() & ".html"; + if (!structKeyExists( args, "htmlOpts" ) ){ + SystemOutput(structKeyList(args), true); + throw "zac"; + } + //if ( arguments.page.getPath() contains "ormFlush" ) SystemOutput(args, true); + if ( structKeyExists( args, "htmlOpts" ) + && structKeyExists( args.htmlOpts, "base_href" ) ){ + link = args.htmlOpts.base_href & link; + } + + return '#HtmlEditFormat( arguments.title )#'; + } + + public string function _getIssueTrackerLink(required string name) { + var link = Replace( new api.build.BuildProperties().getIssueTrackerLink(), "{search}", urlEncodedFormat(arguments.name) ) + return 'Search Issue Tracker '; + } + + public string function _getTestCasesLink(required string name) { + var link = Replace( new api.build.BuildProperties().getTestCasesLink(), "{search}", urlEncodedFormat(arguments.name) ) + return 'Search Lucee Test Cases (good for further, detailed examples)'; + } + + public function _zipBasicPages( buildDirectory, simpleBuildDirectory, zipName ){ + + var zipFilename = arguments.zipName & ".zip"; + var doubleZipFilename = arguments.zipName & "-zipped.zip"; + + // neat trick, storing then zipping the stored zip reduces the file size from 496 Kb to 216 Kb + var tempStoredZip = getTempFile( "", "#zipfileName#-store", "zip" ); + var tempDoubleZip = getTempFile( "", "#zipfileName#-normal", "zip" ); + var tempNormalZip = getTempFile( "", "#zipfileName#-normal", "zip" ); + + zip action="zip" + source="#arguments.simpleBuildDirectory#" + file="#tempStoredZip#" + compressionmethod="store" + recurse="true"; + + zip action="zip" + source="#arguments.simpleBuildDirectory#" + file="#tempDoubleZip#" + compressionmethod="deflateUtra" // typo in cfzip! + recurse="false" { + zipparam entrypath="#zipFilename#" source="#tempStoredZip#"; + }; + fileDelete( tempStoredZip ); + + zip action="zip" + source="#arguments.simpleBuildDirectory#" + file="#tempNormalZip#" + recurse="true"; + + publishWithChecksum( tempNormalZip, "#buildDirectory#/#zipFilename#" ); + publishWithChecksum( tempDoubleZip, "#buildDirectory#/#doubleZipFilename#" ); + }; + + function publishWithChecksum( src, dest ){ + request.logger (text="Builder copying zip to #dest#"); + fileCopy( src, dest ); + loop list="md5,sha1" item="local.hashType" { + var checksumPath = left( dest, len( dest ) - 3 ) & hashType; + filewrite( checksumPath, lcase( hash( fileReadBinary( arguments.src ), hashType ) ) ); + request.logger (text="Builder added #checksumPath# checksum"); + } + } + } diff --git a/builders/html/layouts/breadcrumbs.cfm b/builders/html/layouts/breadcrumbs.cfm index 263700897..34af6a36c 100644 --- a/builders/html/layouts/breadcrumbs.cfm +++ b/builders/html/layouts/breadcrumbs.cfm @@ -3,14 +3,17 @@ - - + + + + + - diff --git a/builders/html/layouts/html_head.cfm b/builders/html/layouts/html_head.cfm new file mode 100644 index 000000000..a902e26bc --- /dev/null +++ b/builders/html/layouts/html_head.cfm @@ -0,0 +1,67 @@ + + local.baseHref = ( repeatString( '../', args.page.getDepth()-1 ) ); + // this breaks the /static/ local server mode + //if (baseHref eq "") + // baseHref = "/"; + local.path = args.page.getPath(); + if ( local.path eq "/home" ) + local.path = "/index"; + local.pageHref = "https://docs.lucee.org#local.path#.html"; + local.pagePath = "#local.path#.html"; + if (args.page.getTitle() neq "Lucee Documentation") + local.pageTitle = args.page.getTitle() & " :: Lucee Documentation"; + else + local.pageTitle = args.page.getTitle(); + // many sites (slack, discord one box etc) can't handle the escaped and strip out the tag name from previews + local.safePageTitle = EncodeForHtml( Replace( Replace( local.pageTitle, "<", "", "all" ), ">", "", "all" ) ); + local.pageTitle = EncodeForHtml( local.pageTitle ); + local.pageDescription = getMetaDescription( args.page, args.body ); + + + + + #local.pageTitle# + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/builders/html/layouts/main.cfm b/builders/html/layouts/main.cfm index 8c5585d1e..1eaeb8ebc 100644 --- a/builders/html/layouts/main.cfm +++ b/builders/html/layouts/main.cfm @@ -4,71 +4,13 @@ - - - local.baseHref = ( repeatString( '../', args.page.getDepth()-1 ) ); - // this breaks the /static/ local server mode - //if (baseHref eq "") - // baseHref = "/"; - local.path = args.page.getPath(); - if ( local.path eq "/home" ) - local.path = "/index"; - local.pageHref = "https://docs.lucee.org#local.path#.html"; - local.pagePath = "#local.path#.html"; - if (args.page.getTitle() neq "Lucee Documentation") - local.pageTitle = args.page.getTitle() & " :: Lucee Documentation"; - else - local.pageTitle = args.page.getTitle(); - // many sites (slack, discord one box etc) can't handle the escaped and strip out the tag name from previews - local.safePageTitle = EncodeForHtml( Replace( Replace( local.pageTitle, "<", "", "all" ), ">", "", "all" ) ); - local.pageTitle = EncodeForHtml( local.pageTitle ); - local.pageDescription = getMetaDescription( args.page, args.body ); - + + - - #local.pageTitle# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +