Merge branch 'H-G-Hristov-hgh/added-build_ebook_v2.py'

Overv · Overv · commit d51734eb415a · 2023-03-08T23:07:27.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,8 @@
 *.html
 ads.txt
+
+**/_out/*
+**/.vscode/*
+.DS_Store
+build_ebook.log
+temp_ebook.md
diff --git a/build_ebook.py b/build_ebook.py
@@ -1,112 +1,241 @@
-import subprocess
-import datetime
-import os
-import re
-
-
-def create_ebook(path):
-
-    name_path = path
-    print('\n Creating \"' + name_path + '\" ebook')
-    # Recursively gather all markdown files in the right order
-    markdownFiles = []
-
-    for root, subdirs, files in os.walk(name_path):
-        for fn in files:
-            if 'md' in fn and 'ebook.md' not in fn:
-                path = os.path.join(root, fn)
-
-                # "02_Development_environment.md" -> "Development environment"
-                # "02_Development_environment.md" -> "02_Development_environment"
-                title = fn.split('.')[0]
-                # "02_Development_environment" -> "02 Development environment"
-                title = title.replace('_', ' ')
-                # "02 Development environment" -> "Development environment"
-                title = ' '.join(title.split(' ')[1:])
-
-                with open(path, 'r') as f:
-                    markdownFiles.append({
-                        'title': title,
-                        'filename': os.path.join(root, fn),
-                        'contents': f.read()
-                    })
-
-    markdownFiles.sort(key=lambda entry: entry['filename'])
+"""Generate EPUB and PDF ebooks from sources."""
 
-    # Create concatenated document
-    print('processing markdown...')
-
-    allMarkdown = ''
-
-    for entry in markdownFiles:
-        contents = entry['contents']
-
-        # Add title
-        contents = '# ' + entry['title'] + '\n\n' + contents
-
-        # Fix image links
-        contents = re.sub(r'\/images\/', 'images/', contents)
-        contents = re.sub(r'\.svg', '.png', contents)
-
-        # Fix remaining relative links (e.g. code files)
-        contents = re.sub(
-            r'\]\(\/', '](https://vulkan-tutorial.com/', contents)
-
-        # Fix chapter references
-        def repl(m):
-            target = m.group(1)
-            target = target.lower()
-            target = re.sub('_', '-', target)
-            target = target.split('/')[-1]
+from datetime import datetime
+import json
+import logging
+from pathlib import Path
+import re
+from tempfile import TemporaryDirectory
+import subprocess
+from dataclasses import dataclass
+from subprocess import CalledProcessError
+from re import Match
+import shutil
+
+logging.basicConfig(
+    format="%(asctime)s %(levelname)-8s %(message)s",
+    level=logging.INFO,
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
 
-            return '](#' + target + ')'
 
-        contents = re.sub(r'\]\(!([^)]+)\)', repl, contents)
+def convert_images(images_dir: Path, converted_image_dir: Path) -> None:
+    """Convert all SVG images to PNGs."""
+
+    if not converted_image_dir.exists():
+        converted_image_dir.mkdir()
+
+    for source_file in images_dir.glob("*"):
+        if source_file.suffix == ".svg":
+            dest_file = converted_image_dir / source_file.with_suffix(".png").name
 
-        allMarkdown += contents + '\n\n'
+            try:
+                subprocess.check_output(
+                    [
+                        "inkscape",
+                        f"--export-filename={dest_file.as_posix()}",
+                        source_file.as_posix(),
+                    ],
+                    stderr=subprocess.STDOUT,
+                )
+            except FileNotFoundError:
+                raise RuntimeError(
+                    f"failed to convert {source_file.name} to {dest_file.name}: "
+                    "inkscape not installed"
+                )
+            except CalledProcessError as e:
+                raise RuntimeError(
+                    f"failed to convert {source_file.name} to {dest_file.name}: "
+                    f"inkscape failed: {e.output.decode()}"
+                )
+        else:
+            shutil.copy(source_file, converted_image_dir / source_file.name)
 
-    # Add title
-    dateNow = datetime.datetime.now()
+    return converted_image_dir
 
-    metadata = '% Vulkan Tutorial\n'
-    metadata += '% Alexander Overvoorde\n'
-    metadata += '% ' + dateNow.strftime('%B %Y') + '\n\n'
 
-    allMarkdown = metadata + allMarkdown
+@dataclass
+class MarkdownChapter:
+    title: str
+    depth: int
+    contents: str
 
-    with open('ebook.md', 'w') as f:
-        f.write(allMarkdown)
 
-    # Building PDF
-    print('building pdf...')
+def find_markdown_chapters(markdown_dir: Path) -> list[Path]:
+    """Find all Markdown files and interpret them as chapters."""
 
-    subprocess.check_output(['pandoc', 'ebook.md', '-V', 'documentclass=report', '-t', 'latex', '-s',
-                             '--toc', '--listings', '-H', 'ebook/listings-setup.tex', '-o', 'ebook/Vulkan Tutorial ' + name_path + '.pdf', '--pdf-engine=xelatex'])
+    markdown_entries = list(markdown_dir.rglob("*"))
+    markdown_entries.sort()
 
-    print('building epub...')
+    markdown_chapters = []
 
-    subprocess.check_output(
-        ['pandoc', 'ebook.md', '--toc', '-o', 'ebook/Vulkan Tutorial ' + name_path + '.epub', '--epub-cover-image=ebook/cover.png'])
+    for markdown_path in markdown_entries:
+        # Skip privacy policy (regardless of language)
+        if markdown_path.name.startswith("95_"):
+            continue
+
+        title = markdown_path.stem.partition("_")[-1].replace("_", " ")
+        depth = len(markdown_path.relative_to(markdown_dir).parts) - 1
+
+        markdown_chapters.append(
+            MarkdownChapter(
+                title=title,
+                depth=depth,
+                contents=markdown_path.read_text() if markdown_path.is_file() else "",
+            )
+        )
+
+    return markdown_chapters
+
+
+def generate_markdown_preface() -> str:
+    current_date = datetime.now().strftime("%B %Y")
+
+    return "\n".join(
+        [
+            "% Vulkan Tutorial",
+            "% Alexander Overvoorde",
+            f"% {current_date}",
+        ]
+    )
+
+
+def generate_markdown_chapter(
+    chapter: MarkdownChapter, converted_image_dir: Path
+) -> str:
+    contents = f"# {chapter.title}\n\n{chapter.contents}"
+
+    # Adjust titles based on depth of chapter itself
+    if chapter.depth > 0:
+
+        def adjust_title_depth(match: Match) -> str:
+            return ("#" * chapter.depth) + match.group(0)
+
+        contents = re.sub(r"#+ ", adjust_title_depth, contents)
+
+    # Fix image links
+    contents = contents.replace("/images/", f"{converted_image_dir.as_posix()}/")
+    contents = contents.replace(".svg", ".png")
+
+    # Fix remaining relative links
+    contents = contents.replace("(/code", "(https://vulkan-tutorial.com/code")
+    contents = contents.replace("(/resources", "(https://vulkan-tutorial.com/resources")
+
+    # Fix chapter references
+    def fix_chapter_reference(match: Match) -> str:
+        target = match.group(1).lower().replace("_", "-").split("/")[-1]
+        return f"](#{target})"
+
+    contents = re.sub(r"\]\(!([^)]+)\)", fix_chapter_reference, contents)
+
+    return contents
+
+
+def compile_full_markdown(
+    markdown_dir: Path, markdown_file: Path, converted_image_dir: Path
+) -> Path:
+    """Combine Markdown source files into one large file."""
+
+    markdown_fragments = [generate_markdown_preface()]
+
+    for chapter in find_markdown_chapters(markdown_dir):
+        markdown_fragments.append(
+            generate_markdown_chapter(chapter, converted_image_dir)
+        )
 
-    # Clean up
-    os.remove('ebook.md')
+    markdown_file.write_text("\n\n".join(markdown_fragments))
 
+    return markdown_file
 
-# Convert all SVG images to PNG for pandoc
-print('converting svgs...')
 
-generatedPngs = []
+def build_pdf(markdown_file: Path, pdf_file: Path) -> Path:
+    """Build combined Markdown file into a PDF."""
 
-for fn in os.listdir('images'):
-    parts = fn.split('.')
+    try:
+        subprocess.check_output(["xelatex", "--version"])
+    except FileNotFoundError:
+        raise RuntimeError(f"failed to build {pdf_file}: xelatex not installed")
 
-    if parts[1] == 'svg':
-        subprocess.check_output(['inkscape', '--export-filename=images/' +
-                                 parts[0] + '.png', 'images/' + fn], stderr=subprocess.STDOUT)
-        generatedPngs.append('images/' + parts[0] + '.png')
+    try:
+        subprocess.check_output(
+            [
+                "pandoc",
+                markdown_file.as_posix(),
+                "-V",
+                "documentclass=report",
+                "-t",
+                "latex",
+                "-s",
+                "--toc",
+                "--listings",
+                "-H",
+                "ebook/listings-setup.tex",
+                "-o",
+                pdf_file.as_posix(),
+                "--pdf-engine=xelatex",
+            ]
+        )
+    except CalledProcessError as e:
+        raise RuntimeError(
+            f"failed to build {pdf_file}: pandoc failed: {e.output.decode()}"
+        )
+
+    return pdf_file
 
-create_ebook('en')
-create_ebook('fr')
 
-for fn in generatedPngs:
-    os.remove(fn)
+def build_epub(markdown_file: Path, epub_file: Path) -> Path:
+    try:
+        subprocess.check_output(
+            [
+                "pandoc",
+                markdown_file.as_posix(),
+                "--toc",
+                "-o",
+                epub_file.as_posix(),
+                "--epub-cover-image=ebook/cover.png",
+            ]
+        )
+    except CalledProcessError as e:
+        raise RuntimeError(
+            f"failed to build {epub_file}: pandoc failed: {e.output.decode()}"
+        )
+
+    return epub_file
+
+
+def main() -> None:
+    """Build ebooks."""
+    with TemporaryDirectory() as raw_out_dir:
+        out_dir = Path(raw_out_dir)
+
+        logging.info("converting svg images to png...")
+        converted_image_dir = convert_images(
+            Path("images"), out_dir / "converted_images"
+        )
+
+        languages = json.loads(Path("config.json").read_text())["languages"].keys()
+        logging.info(f"building ebooks for languages {'/'.join(languages)}")
+
+        for lang in languages:
+            logging.info(f"{lang}: generating markdown...")
+            markdown_file = compile_full_markdown(
+                Path(lang), out_dir / f"{lang}.md", converted_image_dir
+            )
+
+            logging.info(f"{lang}: building pdf...")
+            pdf_file = build_pdf(markdown_file, out_dir / f"{lang}.pdf")
+
+            logging.info(f"{lang}: building epub...")
+            epub_file = build_epub(markdown_file, out_dir / f"{lang}.epub")
+
+            shutil.copy(pdf_file, f"ebook/Vulkan Tutorial {lang}.pdf")
+            shutil.copy(epub_file, f"ebook/Vulkan Tutorial {lang}.epub")
+
+    logging.info("done")
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except RuntimeError as e:
+        logging.error(str(e))