diff --git a/.github/check_headings.py b/.github/check_headings.py new file mode 100644 index 000000000..c0f20200e --- /dev/null +++ b/.github/check_headings.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python +# /// script +# dependencies = ["typer","rich"] +# /// + +import os +import re +import sys +from pathlib import Path +from typing import List, Tuple + +import typer +from rich.console import Console +from rich.panel import Panel +from rich.table import Table + +app = typer.Typer() +console = Console(force_terminal=True if os.getenv("GITHUB_ACTIONS") else False) + + +def is_heading(line: str) -> bool: + """ + Check if a line is a numeric markdown heading. + """ + return bool(re.match(r"^#+\s+\d+(\.\d+)*\.?\s+", line)) + + +def has_trailing_period(heading_text: str) -> bool: + """Check if heading ends with a period.""" + return bool(re.match(r"^#+\s+\d+(\.\d+)*\.\s+", heading_text)) + + +def extract_heading_number(heading_text: str) -> str: + """Extract the number part from a heading.""" + match = re.match(r"^#+\s+(\d+(\.\d+)*)", heading_text) + return match.group(1) if match else "" + + +def extract_heading_level(heading_text: str) -> int: + """Extract the number part from a heading.""" + match = re.match(r"^(#+)\s+\d+(\.\d+)*", heading_text) + return len(match.group(1)) if match else 0 + + +def check_heading_numbering(content: str, fix: bool = False) -> Tuple[List[tuple], str]: + """ + Check if headings follow proper numbering conventions and optionally fix issues. + + Args: + content: The markdown content + fix: Whether to fix issues + + Returns: + A tuple of (errors, fixed_content) + """ + errors = [] + lines = content.split("\n") + fixed_lines = [] + last_depth_nums = {} + last_level_depth = 0 + + for i, line in enumerate(lines): + if not is_heading(line): + fixed_lines.append(line) + continue + + number_part = extract_heading_number(line) + heading_level = extract_heading_level(line) + number_sections = number_part.split(".") + level_depth = len(number_sections) + + # Check for trailing period + if not has_trailing_period(line): + errors.append( + ( + line, + "Missing trailing period", + "Heading number should end with a period", + ) + ) + if fix: + line = line.replace(number_part, f"{number_part}.") + + # Check if heading level matches numbering level + # Top level headings (1.) should be h2 (##), so expected level is depth + 1 + expected_level = level_depth + 1 + if heading_level != expected_level: + errors.append( + ( + line, + "Heading level mismatch", + f"Numbering suggests h{expected_level} but found {heading_level}", + ) + ) + if fix: + line = ("#" * expected_level) + line.lstrip("#") + + # Check sections start at 0 or 1 + if ( + level_depth not in last_depth_nums + and int(number_sections[-1]) != 0 + and int(number_sections[-1]) != 1 + ): + errors.append( + ( + line, + "Numbering must start at 0 or 1", + f"Expected 0 or 1, but got {number_sections[-1]}.", + ) + ) + if fix: + fixed_numbers = number_sections[:-1] + ["1"] + line = line.replace(number_part, ".".join(fixed_numbers)) + + # Going back down a level, reset + if int(last_level_depth) > int(level_depth): + for i in range(level_depth + 1, 5): + last_depth_nums[i] = 0 + + # Check sequential numbering + if ( + level_depth in last_depth_nums + and int(number_sections[-1]) != int(last_depth_nums[level_depth]) + 1 + ): + errors.append( + ( + line, + "Non-sequential heading", + f"Expected {'.'.join(number_sections[:-1])}.{last_depth_nums[level_depth] + 1}. but got {'.'.join(number_sections[:-1])}.{number_sections[-1]}.", + ) + ) + if fix: + fixed_numbers = number_sections[:-1] + [ + str(last_depth_nums[level_depth] + 1) + ] + line = line.replace(number_part, ".".join(fixed_numbers)) + + last_level_depth = level_depth + last_depth_nums[level_depth] = int(number_sections[-1]) + + fixed_lines.append(line) + + fixed_content = "\n".join(fixed_lines) + return errors, fixed_content + + +@app.command() +def check( + markdown_files: List[Path], + fix: bool = typer.Option(False, "--fix", help="Automatically fix detected issues"), +): + """ + Check markdown files for proper heading numbering. + + Validates: + - Sequential numbering at each level + - Trailing period after numbers + - Heading level matches number level (## for 1., ### for 1.1., etc.) + + With --fix flag, automatically corrects issues in the files. + """ + has_errors = False + all_error_messages = [] + total_errors = 0 + fixed_files = 0 + + for file_path in markdown_files: + content = file_path.read_text(encoding="utf-8") + errors, fixed_content = check_heading_numbering(content, fix) + + if errors: + has_errors = True + total_errors += len(errors) + + # Create table for errors + table = Table(show_header=False, box=None) + table.add_column("Heading", style="green") + table.add_column("Reason", style="bold") + table.add_column("Description") + + # Add each error directly to the table + for heading_text, error_type, error_details in errors: + table.add_row(f"'{heading_text}'", error_type, error_details) + + panel = Panel( + table, + title=f"[bold red]{file_path}[/bold red]", + border_style="red", + title_align="left", + ) + all_error_messages.append(panel) + + # Write fixed content if needed + if fix and fixed_content != content: + file_path.write_text(fixed_content, encoding="utf-8") + fixed_files += 1 + + if has_errors: + for panel in all_error_messages: + console.print(panel) + + if fix: + msg = f":wrench: [green bold]Checked {len(markdown_files)} files, found {total_errors} errors, " + msg += f"fixed {fixed_files} files" + console.print(msg) + else: + console.print( + f":x: [red]Checked {len(markdown_files)} files and found {total_errors} errors" + ) + + if not fix: + sys.exit(1) + elif fixed_files < len(all_error_messages): + # Some files couldn't be fully fixed + sys.exit(1) + else: + console.print( + f":white_check_mark: [green]Checked {len(markdown_files)} files and found {total_errors} errors" + ) + + +if __name__ == "__main__": + app() diff --git a/.github/workflows/check-headings.yml b/.github/workflows/check-headings.yml new file mode 100644 index 000000000..78456766f --- /dev/null +++ b/.github/workflows/check-headings.yml @@ -0,0 +1,24 @@ +name: Check Markdown Headings + +on: + pull_request: + paths: + - "**.md" + +jobs: + check-headings: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + + - name: Get changed markdown files + id: changed-files + uses: tj-actions/changed-files@v42 + with: + files: "**.md" + + - name: Run check_headings.py + run: uv run .github/check_headings.py ${{ steps.changed-files.outputs.all_changed_files }} diff --git a/README.md b/README.md index ca92bd221..461660037 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,33 @@ We welcome fixes and improvements from the community. Please fork the repository You can find instructions about how to develop the training material code in [`CONTRIBUTING.md`](CONTRIBUTING.md). If you want to contribute with a translation instead, check [`TRANSLATING.md`](TRANSLATING.md). +### Headings CI tests + +This repository includes a Python tool to validate markdown heading numbering consistency across training materials. + +The `check_headings.py` script ensures: + +- Sequential numbering at each level (1., 1.1., 1.2., etc.) +- Trailing periods after heading numbers +- Heading levels match numbering depth (## for 1., ### for 1.1.) + +The easiest way to run it is [with `uv`](https://docs.astral.sh/uv/), which handles dependencies for you automatically: + +```bash +# Check files for issues +uv run .github/check_headings.py docs/**/*.md +``` + +```bash +# Auto-fix detected issues +uv run .github/check_headings.py --fix docs/**/*.md +``` + +Otherwise, run `pip install typer rich` then `python .github/check_headings.py`. + +The script runs automatically in CI on markdown file changes via GitHub Actions, +and will cause a CI failure if any incorrect headings are found. + ## Credits & Copyright This training material is developed and maintained by [Seqera](https://seqera.io) and released under an open-source license ([CC BY-NC-ND](https://creativecommons.org/licenses/by-nc-nd/4.0/)) for the benefit of the community. You are welcome to reuse these materials according to the terms of the license. If you are an instructor running your own trainings, we'd love to hear about how it goes and what we could do to make it easier.