From 75158addab65f3ae3214ec07ce0eab0dde6a2579 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Wed, 9 Apr 2025 16:47:38 +0200 Subject: [PATCH 1/8] New script to validate numbered headings --- .github/check_headings.py | 169 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 .github/check_headings.py diff --git a/.github/check_headings.py b/.github/check_headings.py new file mode 100644 index 000000000..57ae95106 --- /dev/null +++ b/.github/check_headings.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python + +import re +import sys +from pathlib import Path +from typing import List + +import typer +from rich.console import Console +from rich.panel import Panel +from rich.table import Table + +app = typer.Typer() +console = Console() + +def extract_headings(content: str) -> List[tuple]: + """Extract all markdown headings with their level and text.""" + heading_pattern = r'^(#{1,6})\s+(.*?)$' + return [(len(match.group(1)), match.group(2).strip()) for match in re.finditer(heading_pattern, content, re.MULTILINE)] + +def is_numbered_heading(heading_text: str) -> bool: + """Check if heading starts with a number pattern like 1., 1.1., etc.""" + return bool(re.match(r'^\d+(\.\d+)*', heading_text)) + +def has_trailing_period(heading_text: str) -> bool: + """Check if heading ends with a period.""" + return bool(re.match(r'^\d+(\.\d+)*\. ', heading_text)) + +def extract_heading_number(heading_text: str) -> str: + """Extract the number part from a heading.""" + match = re.match(r'^(\d+(\.\d+)*)', heading_text) + return match.group(1) if match else "" + +def check_heading_numbering(headings: List[tuple]) -> List[tuple]: + """ + Check if headings follow proper numbering conventions. + Returns a list of tuples (heading_text, error_type, error_details). + """ + errors = [] + current_numbers = {} # Track the current number at each level + + for heading_level, heading_text in headings: + if not is_numbered_heading(heading_text): + # Check if it's a numbered heading but missing the trailing period + if re.match(r'^\d+(\.\d+)*\s', heading_text): + errors.append(( + heading_text, + "Missing trailing period", + "Number part should end with a period" + )) + continue + + if not has_trailing_period(heading_text): + errors.append(( + heading_text, + "Missing trailing period", + "Heading number should end with a period" + )) + + number_part = extract_heading_number(heading_text) + number_sections = number_part.split('.') + level_depth = len(number_sections) + + # Check if heading level matches numbering level + # Top level headings (1.) should be h2 (##), so expected level is depth + 1 + expected_level = level_depth + 1 + if heading_level != expected_level: + errors.append(( + heading_text, + "Heading level mismatch", + f"Numbering suggests h{expected_level} but found {heading_level}" + )) + + # Check sequential numbering + parent_key = '.'.join(number_sections[:-1]) + current_level = int(number_sections[-1]) + + # For sub-levels, we don't need to verify parent level exists + # as it might come later in the document + if level_depth > 1: + # Initialize parent level if not seen yet + if parent_key not in current_numbers: + current_numbers[parent_key] = 0 + + # Check if numbering is sequential + expected = current_numbers.get(parent_key, 0) + 1 + if current_level != expected: + # Special case: allow starting with 0 + if level_depth == 1 and current_level == 0 and expected == 1: + # This is the first heading and it starts with 0, which is valid + pass + else: + if level_depth == 1: + errors.append(( + heading_text, + "Non-sequential top-level heading", + f"Expected {expected}. but got {current_level}." + )) + else: + errors.append(( + heading_text, + "Non-sequential heading", + f"Expected {parent_key}.{expected}. but got {number_part}" + )) + + # Update the current number for this level + current_numbers[parent_key] = current_level + + # Reset all deeper levels when encountering a new parent + deeper_levels = [ + k for k in current_numbers + if k.startswith(f"{parent_key}.") or (parent_key == "" and '.' in k) + ] + for k in deeper_levels: + del current_numbers[k] + + return errors + +@app.command() +def check(markdown_files: List[Path]): + """ + Check markdown files for proper heading numbering. + + Validates: + - Sequential numbering at each level + - Trailing period after numbers + - Heading level matches number level (## for 1., ### for 1.1., etc.) + """ + has_errors = False + all_error_messages = [] + total_errors = 0 + + for file_path in markdown_files: + content = file_path.read_text(encoding="utf-8") + headings = extract_headings(content) + errors = check_heading_numbering(headings) + + if errors: + has_errors = True + total_errors += len(errors) + + # Create table for errors + table = Table(show_header=False, box=None) + table.add_column("Heading", style="green") + table.add_column("Reason", style="bold") + table.add_column("Description") + + # Add each error directly to the table + for heading_text, error_type, error_details in errors: + table.add_row(f"'{heading_text}'", error_type, error_details) + + panel = Panel( + table, + title=f"[bold red]{file_path}[/bold red]", + border_style="red", + title_align="left" + ) + all_error_messages.append(panel) + + if has_errors: + for panel in all_error_messages: + console.print(panel) + console.print(f":x: Checked {len(markdown_files)} files and found {total_errors} errors") + sys.exit(1) + else: + console.print(f":white_check_mark: Checked {len(markdown_files)} files and found {total_errors} errors") + +if __name__ == "__main__": + app() From 9f38a7a4a55d587d66f35611d0865dff354d27a8 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Wed, 9 Apr 2025 16:54:30 +0200 Subject: [PATCH 2/8] Add github actions workflow to check headings --- .github/workflows/check-headings.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .github/workflows/check-headings.yml diff --git a/.github/workflows/check-headings.yml b/.github/workflows/check-headings.yml new file mode 100644 index 000000000..d0ae73ab3 --- /dev/null +++ b/.github/workflows/check-headings.yml @@ -0,0 +1,28 @@ +name: Check Markdown Headings + +on: + pull_request: + paths: + - "**.md" + +jobs: + check-headings: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all branches and tags + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Get changed markdown files + id: changed-files + uses: tj-actions/changed-files@v42 + with: + files: "**.md" + + - name: Run check_headings.py + run: python check_headings.py ${{ steps.changed-files.outputs.all_changed_files }} From 16aff6d8fbae2cabdb079c8f7b620d46844e73f8 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Wed, 9 Apr 2025 18:07:15 +0200 Subject: [PATCH 3/8] Mostly rewrite by hand, add --fix --- .github/check_headings.py | 226 +++++++++++++++++++++++--------------- 1 file changed, 138 insertions(+), 88 deletions(-) diff --git a/.github/check_headings.py b/.github/check_headings.py index 57ae95106..147b95d5e 100644 --- a/.github/check_headings.py +++ b/.github/check_headings.py @@ -3,7 +3,7 @@ import re import sys from pathlib import Path -from typing import List +from typing import List, Tuple import typer from rich.console import Console @@ -13,111 +13,138 @@ app = typer.Typer() console = Console() -def extract_headings(content: str) -> List[tuple]: - """Extract all markdown headings with their level and text.""" - heading_pattern = r'^(#{1,6})\s+(.*?)$' - return [(len(match.group(1)), match.group(2).strip()) for match in re.finditer(heading_pattern, content, re.MULTILINE)] -def is_numbered_heading(heading_text: str) -> bool: - """Check if heading starts with a number pattern like 1., 1.1., etc.""" - return bool(re.match(r'^\d+(\.\d+)*', heading_text)) +def is_heading(line: str) -> bool: + """ + Check if a line is a numeric markdown heading. + """ + return bool(re.match(r"^#+\s+\d+(\.\d+)*\.?\s+", line)) + def has_trailing_period(heading_text: str) -> bool: """Check if heading ends with a period.""" - return bool(re.match(r'^\d+(\.\d+)*\. ', heading_text)) + return bool(re.match(r"^#+\s+\d+(\.\d+)*\.\s+", heading_text)) + def extract_heading_number(heading_text: str) -> str: """Extract the number part from a heading.""" - match = re.match(r'^(\d+(\.\d+)*)', heading_text) + match = re.match(r"^#+\s+(\d+(\.\d+)*)", heading_text) return match.group(1) if match else "" -def check_heading_numbering(headings: List[tuple]) -> List[tuple]: + +def extract_heading_level(heading_text: str) -> int: + """Extract the number part from a heading.""" + match = re.match(r"^(#+)\s+\d+(\.\d+)*", heading_text) + return len(match.group(1)) if match else 0 + + +def check_heading_numbering(content: str, fix: bool = False) -> Tuple[List[tuple], str]: """ - Check if headings follow proper numbering conventions. - Returns a list of tuples (heading_text, error_type, error_details). + Check if headings follow proper numbering conventions and optionally fix issues. + + Args: + content: The markdown content + fix: Whether to fix issues + + Returns: + A tuple of (errors, fixed_content) """ errors = [] - current_numbers = {} # Track the current number at each level - - for heading_level, heading_text in headings: - if not is_numbered_heading(heading_text): - # Check if it's a numbered heading but missing the trailing period - if re.match(r'^\d+(\.\d+)*\s', heading_text): - errors.append(( - heading_text, - "Missing trailing period", - "Number part should end with a period" - )) + lines = content.split("\n") + fixed_lines = [] + last_depth_nums = {} + last_level_depth = 0 + + for i, line in enumerate(lines): + if not is_heading(line): + fixed_lines.append(line) continue - if not has_trailing_period(heading_text): - errors.append(( - heading_text, - "Missing trailing period", - "Heading number should end with a period" - )) - - number_part = extract_heading_number(heading_text) - number_sections = number_part.split('.') + number_part = extract_heading_number(line) + heading_level = extract_heading_level(line) + number_sections = number_part.split(".") level_depth = len(number_sections) + # Check for trailing period + if not has_trailing_period(line): + errors.append( + ( + line, + "Missing trailing period", + "Heading number should end with a period", + ) + ) + if fix: + line = line.replace(number_part, f"{number_part}.") + # Check if heading level matches numbering level # Top level headings (1.) should be h2 (##), so expected level is depth + 1 expected_level = level_depth + 1 if heading_level != expected_level: - errors.append(( - heading_text, - "Heading level mismatch", - f"Numbering suggests h{expected_level} but found {heading_level}" - )) + errors.append( + ( + line, + "Heading level mismatch", + f"Numbering suggests h{expected_level} but found {heading_level}", + ) + ) + if fix: + line = ("#" * expected_level) + line.lstrip("#") + + # Check sections start at 0 or 1 + if ( + level_depth not in last_depth_nums + and int(number_sections[-1]) != 0 + and int(number_sections[-1]) != 1 + ): + errors.append( + ( + line, + "Numbering must start at 0 or 1", + f"Expected 0 or 1, but got {number_sections[-1]}.", + ) + ) + if fix: + fixed_numbers = number_sections[:-1] + ["1"] + line = line.replace(number_part, ".".join(fixed_numbers)) + + # Going back down a level, reset + if int(last_level_depth) > int(level_depth): + for i in range(level_depth + 1, 5): + last_depth_nums[i] = 0 # Check sequential numbering - parent_key = '.'.join(number_sections[:-1]) - current_level = int(number_sections[-1]) - - # For sub-levels, we don't need to verify parent level exists - # as it might come later in the document - if level_depth > 1: - # Initialize parent level if not seen yet - if parent_key not in current_numbers: - current_numbers[parent_key] = 0 - - # Check if numbering is sequential - expected = current_numbers.get(parent_key, 0) + 1 - if current_level != expected: - # Special case: allow starting with 0 - if level_depth == 1 and current_level == 0 and expected == 1: - # This is the first heading and it starts with 0, which is valid - pass - else: - if level_depth == 1: - errors.append(( - heading_text, - "Non-sequential top-level heading", - f"Expected {expected}. but got {current_level}." - )) - else: - errors.append(( - heading_text, - "Non-sequential heading", - f"Expected {parent_key}.{expected}. but got {number_part}" - )) - - # Update the current number for this level - current_numbers[parent_key] = current_level - - # Reset all deeper levels when encountering a new parent - deeper_levels = [ - k for k in current_numbers - if k.startswith(f"{parent_key}.") or (parent_key == "" and '.' in k) - ] - for k in deeper_levels: - del current_numbers[k] - - return errors + if ( + level_depth in last_depth_nums + and int(number_sections[-1]) != int(last_depth_nums[level_depth]) + 1 + ): + errors.append( + ( + line, + "Non-sequential heading", + f"Expected {'.'.join(number_sections[:-1])}.{last_depth_nums[level_depth] + 1}. but got {'.'.join(number_sections[:-1])}.{number_sections[-1]}.", + ) + ) + if fix: + fixed_numbers = number_sections[:-1] + [ + str(last_depth_nums[level_depth] + 1) + ] + line = line.replace(number_part, ".".join(fixed_numbers)) + + last_level_depth = level_depth + last_depth_nums[level_depth] = int(number_sections[-1]) + + fixed_lines.append(line) + + fixed_content = "\n".join(fixed_lines) + return errors, fixed_content + @app.command() -def check(markdown_files: List[Path]): +def check( + markdown_files: List[Path], + fix: bool = typer.Option(False, "--fix", help="Automatically fix detected issues"), +): """ Check markdown files for proper heading numbering. @@ -125,15 +152,17 @@ def check(markdown_files: List[Path]): - Sequential numbering at each level - Trailing period after numbers - Heading level matches number level (## for 1., ### for 1.1., etc.) + + With --fix flag, automatically corrects issues in the files. """ has_errors = False all_error_messages = [] total_errors = 0 + fixed_files = 0 for file_path in markdown_files: content = file_path.read_text(encoding="utf-8") - headings = extract_headings(content) - errors = check_heading_numbering(headings) + errors, fixed_content = check_heading_numbering(content, fix) if errors: has_errors = True @@ -153,17 +182,38 @@ def check(markdown_files: List[Path]): table, title=f"[bold red]{file_path}[/bold red]", border_style="red", - title_align="left" + title_align="left", ) all_error_messages.append(panel) + # Write fixed content if needed + if fix and fixed_content != content: + file_path.write_text(fixed_content, encoding="utf-8") + fixed_files += 1 + if has_errors: for panel in all_error_messages: console.print(panel) - console.print(f":x: Checked {len(markdown_files)} files and found {total_errors} errors") - sys.exit(1) + + if fix: + msg = f":wrench: [green bold]Checked {len(markdown_files)} files, found {total_errors} errors, " + msg += f"fixed {fixed_files} files" + console.print(msg) + else: + console.print( + f":x: [red]Checked {len(markdown_files)} files and found {total_errors} errors" + ) + + if not fix: + sys.exit(1) + elif fixed_files < len(all_error_messages): + # Some files couldn't be fully fixed + sys.exit(1) else: - console.print(f":white_check_mark: Checked {len(markdown_files)} files and found {total_errors} errors") + console.print( + f":white_check_mark: [green]Checked {len(markdown_files)} files and found {total_errors} errors" + ) + if __name__ == "__main__": app() From d8836b9b23fb9d2e262e05338bf6d91f852d75b9 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Wed, 9 Apr 2025 21:07:02 +0200 Subject: [PATCH 4/8] Fix path to Python script --- .github/workflows/check-headings.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check-headings.yml b/.github/workflows/check-headings.yml index d0ae73ab3..1f73183f3 100644 --- a/.github/workflows/check-headings.yml +++ b/.github/workflows/check-headings.yml @@ -25,4 +25,4 @@ jobs: files: "**.md" - name: Run check_headings.py - run: python check_headings.py ${{ steps.changed-files.outputs.all_changed_files }} + run: python .github/check_headings.py ${{ steps.changed-files.outputs.all_changed_files }} From 8f01d70474829aaae64de4e16d2b17357313cdc2 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Wed, 9 Apr 2025 21:23:59 +0200 Subject: [PATCH 5/8] Use uv --- .github/check_headings.py | 1 + .github/workflows/check-headings.yml | 10 +++------- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/check_headings.py b/.github/check_headings.py index 147b95d5e..fdab948ee 100644 --- a/.github/check_headings.py +++ b/.github/check_headings.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# uv: typer, rich import re import sys diff --git a/.github/workflows/check-headings.yml b/.github/workflows/check-headings.yml index 1f73183f3..78456766f 100644 --- a/.github/workflows/check-headings.yml +++ b/.github/workflows/check-headings.yml @@ -10,13 +10,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Fetch all history for all branches and tags - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.x" + - name: Install uv + uses: astral-sh/setup-uv@v5 - name: Get changed markdown files id: changed-files @@ -25,4 +21,4 @@ jobs: files: "**.md" - name: Run check_headings.py - run: python .github/check_headings.py ${{ steps.changed-files.outputs.all_changed_files }} + run: uv run .github/check_headings.py ${{ steps.changed-files.outputs.all_changed_files }} From f10f4fe18690e5157a609870f39dddf6d74dbd9f Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Wed, 9 Apr 2025 21:26:49 +0200 Subject: [PATCH 6/8] AI don't know uv spec --- .github/check_headings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/check_headings.py b/.github/check_headings.py index fdab948ee..9ebe458d1 100644 --- a/.github/check_headings.py +++ b/.github/check_headings.py @@ -1,5 +1,7 @@ #!/usr/bin/env python -# uv: typer, rich +# /// script +# dependencies = ["typer","rich"] +# /// import re import sys From a4bf64cc4bccc4e826df0cae69f722612e396ac2 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Wed, 9 Apr 2025 21:29:32 +0200 Subject: [PATCH 7/8] Force ANSI colours if on github actions --- .github/check_headings.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/check_headings.py b/.github/check_headings.py index 9ebe458d1..c0f20200e 100644 --- a/.github/check_headings.py +++ b/.github/check_headings.py @@ -3,6 +3,7 @@ # dependencies = ["typer","rich"] # /// +import os import re import sys from pathlib import Path @@ -14,7 +15,7 @@ from rich.table import Table app = typer.Typer() -console = Console() +console = Console(force_terminal=True if os.getenv("GITHUB_ACTIONS") else False) def is_heading(line: str) -> bool: From 31427e91966fc170a4ed051db9b20eaebe56aba1 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Tue, 3 Jun 2025 16:00:16 +0200 Subject: [PATCH 8/8] Document in the readme --- README.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/README.md b/README.md index b899bdfb2..9a1c548b1 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,33 @@ We welcome fixes and improvements from the community. Please fork the repository You can find instructions about how to develop the training material code in [`CONTRIBUTING.md`](CONTRIBUTING.md). If you want to contribute with a translation instead, check [`TRANSLATING.md`](TRANSLATING.md). +### Headings CI tests + +This repository includes a Python tool to validate markdown heading numbering consistency across training materials. + +The `check_headings.py` script ensures: + +- Sequential numbering at each level (1., 1.1., 1.2., etc.) +- Trailing periods after heading numbers +- Heading levels match numbering depth (## for 1., ### for 1.1.) + +The easiest way to run it is [with `uv`](https://docs.astral.sh/uv/), which handles dependencies for you automatically: + +```bash +# Check files for issues +uv run .github/check_headings.py docs/**/*.md +``` + +```bash +# Auto-fix detected issues +uv run .github/check_headings.py --fix docs/**/*.md +``` + +Otherwise, run `pip install typer rich` then `python .github/check_headings.py`. + +The script runs automatically in CI on markdown file changes via GitHub Actions, +and will cause a CI failure if any incorrect headings are found. + ## Credits & Copyright This training material is developed and maintained by [Seqera](https://seqera.io) and released under an open-source license ([CC BY-NC-ND](https://creativecommons.org/licenses/by-nc-nd/4.0/)) for the benefit of the community. You are welcome to reuse these materials according to the terms of the license. If you are an instructor running your own trainings, we'd love to hear about how it goes and what we could do to make it easier.