Skip to content

Commit 7258a9e

Browse files
authored
Merge pull request #8 from safeai-aus/codex/update-sitemap-configuration-and-validation
Generate sitemap during MkDocs builds
2 parents a504af0 + d62b122 commit 7258a9e

File tree

3 files changed

+66
-87
lines changed

3 files changed

+66
-87
lines changed

hooks/sitemap.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
"""MkDocs hook module to generate a sitemap.xml during builds."""
2+
from __future__ import annotations
3+
4+
from datetime import datetime, timezone
5+
from pathlib import Path
6+
from typing import Iterable, List, Optional, Tuple
7+
from urllib.parse import urljoin
8+
import xml.etree.ElementTree as ET
9+
10+
try:
11+
from mkdocs.structure.files import Files, File
12+
except ImportError: # pragma: no cover - MkDocs always provides this during builds.
13+
Files = None # type: ignore
14+
File = None # type: ignore
15+
16+
_DOCUMENT_PAGES: List[Tuple[str, Optional[Path]]] = []
17+
18+
19+
def _iter_document_pages(files: "Files") -> Iterable["File"]:
20+
"""Return the documentation pages from the current build."""
21+
return getattr(files, "documentation_pages", lambda: [])()
22+
23+
24+
def on_files(files: "Files", config):
25+
"""Capture documentation page URLs so we can emit them after the build."""
26+
base_url = (config or {}).get("site_url")
27+
_DOCUMENT_PAGES.clear()
28+
if not base_url:
29+
return files
30+
31+
base_url = base_url.rstrip("/") + "/"
32+
for file in _iter_document_pages(files):
33+
url = urljoin(base_url, file.url)
34+
src_path = Path(file.abs_src_path) if file.abs_src_path else None
35+
_DOCUMENT_PAGES.append((url, src_path))
36+
return files
37+
38+
39+
def on_post_build(config):
40+
"""Write the sitemap.xml file into the built site directory."""
41+
if not _DOCUMENT_PAGES:
42+
return
43+
44+
site_dir = Path(config["site_dir"])
45+
site_dir.mkdir(parents=True, exist_ok=True)
46+
47+
urlset = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
48+
49+
for url, src_path in sorted(_DOCUMENT_PAGES, key=lambda item: item[0]):
50+
url_el = ET.SubElement(urlset, "url")
51+
loc_el = ET.SubElement(url_el, "loc")
52+
loc_el.text = url
53+
54+
if src_path and src_path.exists():
55+
lastmod = datetime.fromtimestamp(src_path.stat().st_mtime, tz=timezone.utc)
56+
lastmod_el = ET.SubElement(url_el, "lastmod")
57+
lastmod_el.text = lastmod.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")
58+
59+
tree = ET.ElementTree(urlset)
60+
tree.write(site_dir / "sitemap.xml", encoding="utf-8", xml_declaration=True)
61+
62+
# Clear cached pages so subsequent builds start fresh (important for `mkdocs serve`).
63+
_DOCUMENT_PAGES.clear()

mkdocs.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ plugins:
9494
# - minify:
9595
# minify_html: true
9696

97+
hooks:
98+
- hooks/sitemap.py
99+
97100
nav:
98101
- Home: index.md
99102
- About: about.md

sitemap.xml

Lines changed: 0 additions & 87 deletions
This file was deleted.

0 commit comments

Comments
 (0)