|
| 1 | +## IMPORTS |
| 2 | +import os |
| 3 | +import sys |
| 4 | +import re |
| 5 | +import json |
| 6 | +import argparse |
| 7 | + |
| 8 | +## CONSTANTS |
| 9 | +REGEX_PATTERN = r"(?<!!)\[.*?\]\(\s*https?:\/\/[^\(\)]+\)(?!\{\s*:?\s*target\s*=\s*(?:\s*_blank\s*|\s*\"\s*_blank\s*\"\s*)\})" |
| 10 | +LINK_REGEX = r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)" |
| 11 | +PASSED_MSG = "[PASSED]" |
| 12 | +FAILED_MSG = "[FAILED]" |
| 13 | +ERROR_MSG1 = "External links should redirect to a new tab. Change the link to " |
| 14 | +ERROR_MSG2 = "{target=_blank}" |
| 15 | +ROOT_DEFAULT = "./" |
| 16 | +CONFIG_DEFAULT = "" |
| 17 | + |
| 18 | + |
| 19 | +# Annotation strings for GitHub error annotations |
| 20 | +annotations = [] |
| 21 | + |
| 22 | + |
| 23 | +## MAIN LOGIC |
| 24 | +def main(): |
| 25 | + args = parseInputArguments() |
| 26 | + root = args.root if args.root else ROOT_DEFAULT |
| 27 | + config_file = args.config if args.config else CONFIG_DEFAULT |
| 28 | + |
| 29 | + # Perform the linting process |
| 30 | + ignore_patterns = get_ignore_patterns(config_file) |
| 31 | + ignore_files = get_ignore_files(config_file) |
| 32 | + markdown_files = get_markdown_files(root, ignore_files) |
| 33 | + passed = lint_markdown_files(markdown_files, REGEX_PATTERN, ignore_patterns) |
| 34 | + |
| 35 | + # If linting fails, print any annotations to stderr for GitHub and exit with status code 1 |
| 36 | + if not passed: |
| 37 | + print("\n".join(annotations), file=sys.stderr) |
| 38 | + sys.exit(1) |
| 39 | + |
| 40 | + |
| 41 | +## HELPER FUNCTIONS |
| 42 | +def parseInputArguments(): |
| 43 | + """ |
| 44 | + Parses command line arguments for the root directory and configuration file. |
| 45 | +
|
| 46 | + Returns: |
| 47 | + Namespace: A namespace object used for accessing command line arguments. |
| 48 | + """ |
| 49 | + parser = argparse.ArgumentParser() |
| 50 | + parser.add_argument('-r', '--root', default=ROOT_DEFAULT, nargs='?', type=str, help='Path to root directory where linting begins') |
| 51 | + parser.add_argument('-c', '--config', default=CONFIG_DEFAULT, nargs='?', type=str, help='Path to a JSON configuration file specifying ignore files and patterns') |
| 52 | + args = parser.parse_args() |
| 53 | + return args |
| 54 | + |
| 55 | + |
| 56 | +def get_ignore_patterns(config_file): |
| 57 | + """ |
| 58 | + Obtain a list of patterns to ignore specified in the configuration file whose path is specified as a command line argument. |
| 59 | +
|
| 60 | + Args: |
| 61 | + config_file (str): The path of the configuration file relative to the root. |
| 62 | +
|
| 63 | + Returns: |
| 64 | + List[str]: A list of regex patterns to ignore when performing linting. |
| 65 | + """ |
| 66 | + if not os.path.isfile(config_file): |
| 67 | + print("Warning: Configuration file not found", file=sys.stderr) |
| 68 | + return [] |
| 69 | + |
| 70 | + ignore_patterns = [] |
| 71 | + with open(config_file) as f: |
| 72 | + data = json.load(f) |
| 73 | + ignore_patterns = [row["pattern"] for row in data.get("ignorePatterns", []) if "pattern" in row] |
| 74 | + return ignore_patterns |
| 75 | + |
| 76 | + |
| 77 | +def get_ignore_files(config_file): |
| 78 | + """ |
| 79 | + Obtain a list of files to ignore specified in the configuration file whose path is specified as a command line argument. |
| 80 | +
|
| 81 | + Args: |
| 82 | + config_file (str): The path of the configuration file relative to the root. |
| 83 | +
|
| 84 | + Returns: |
| 85 | + List[str]: A list of markdown file paths (relative to the root) to ignore when performing linting. |
| 86 | + """ |
| 87 | + if not os.path.isfile(config_file): |
| 88 | + print("Warning: Configuration file not found", file=sys.stderr) |
| 89 | + return [] |
| 90 | + |
| 91 | + ignore_files = [] |
| 92 | + with open(config_file) as f: |
| 93 | + data = json.load(f) |
| 94 | + ignore_files = [row["file"] for row in data.get("ignoreFiles", []) if "file" in row] |
| 95 | + return ignore_files |
| 96 | + |
| 97 | + |
| 98 | +def get_markdown_files(root_dir, ignore_files): |
| 99 | + """ |
| 100 | + Recursively searches for markdown files (.md) starting at a specified root directory. |
| 101 | +
|
| 102 | + Args: |
| 103 | + root_dir (str): The root directory to start the search at. |
| 104 | + ignore_files (List[str]): A list of markdown file paths to ignore. |
| 105 | +
|
| 106 | + Returns: |
| 107 | + List[str]: A list of markdown file paths relative to the root directory. |
| 108 | + """ |
| 109 | + markdown_files = [] |
| 110 | + markdown_matcher = re.compile(r".+\.md") |
| 111 | + for root, dirs, files in os.walk(root_dir): |
| 112 | + markdown_file_basenames = filter(lambda f: markdown_matcher.match(f) is not None, files) |
| 113 | + markdown_files_with_full_path = map(lambda f: os.path.join(root, f), markdown_file_basenames) |
| 114 | + markdown_files_to_keep = filter(lambda f: f not in ignore_files, markdown_files_with_full_path) |
| 115 | + markdown_files += list(markdown_files_to_keep) |
| 116 | + return markdown_files |
| 117 | + |
| 118 | + |
| 119 | +def lint_markdown_files(files, pattern, ignore_patterns): |
| 120 | + """ |
| 121 | + Lints all specified markdown files and checks for any links to outside the Sailbot Docs website |
| 122 | + that do not redirect to a new tab. If any such links exists, the linting process fails. |
| 123 | +
|
| 124 | + Args: |
| 125 | + files (List[str]): A list of markdown file paths relative to some root directory. |
| 126 | + pattern (str): A raw string containing the regular expression pattern to be used for linting. |
| 127 | + ignore_patterns (List[str]): A list of regex patterns to ignore. |
| 128 | +
|
| 129 | + Returns: |
| 130 | + bool: Returns True if the linting process succeeds for all markdown files and False otherwise. |
| 131 | + """ |
| 132 | + passed = True |
| 133 | + num_passed = 0 |
| 134 | + num_checks = len(files) |
| 135 | + |
| 136 | + for n, file in enumerate(files): |
| 137 | + check_passed, error_message = check_markdown_file(file, pattern, ignore_patterns) |
| 138 | + passed = (passed and check_passed) |
| 139 | + num_passed += int(check_passed) |
| 140 | + print_check_message(file, check_passed, n+1, error_message) |
| 141 | + |
| 142 | + print(f"{num_passed}/{num_checks} checks passed") |
| 143 | + |
| 144 | + return passed |
| 145 | + |
| 146 | + |
| 147 | +def check_markdown_file(filename, pattern, ignore_patterns): |
| 148 | + """ |
| 149 | + Lints a specified markdown file. |
| 150 | +
|
| 151 | + Args: |
| 152 | + filename (str): The path to the markdown file relative to some root directory. |
| 153 | + pattern (str): A raw string containing the regular expression pattern to be used for linting. |
| 154 | + ignore_patterns (List[str]): A list of regex patterns to ignore. |
| 155 | +
|
| 156 | + Returns: |
| 157 | + tuple[bool, str]: Returns a tuple containing two variables: |
| 158 | + 1. A boolean variable that indicates if the check passes |
| 159 | + 2. A string containing an error message. This string is empty if the check passes. |
| 160 | + """ |
| 161 | + passed = True |
| 162 | + error_message_buffer = "" |
| 163 | + |
| 164 | + with open(filename) as file: |
| 165 | + for line_number, line_text in enumerate(file.readlines()): |
| 166 | + match = non_redirecting_hyperlinks(line_text, pattern, ignore_patterns) |
| 167 | + passed, buffer = prepare_error_messages(match, filename, line_number, passed) |
| 168 | + error_message_buffer += buffer |
| 169 | + return passed, error_message_buffer |
| 170 | + |
| 171 | + |
| 172 | +def non_redirecting_hyperlinks(line_text, pattern, ignore_patterns): |
| 173 | + """ |
| 174 | + Helper function that finds all hyperlinks missing a redirection attribute on a given line. |
| 175 | +
|
| 176 | + Args: |
| 177 | + line_text (str): A line from a markdown file being linted. |
| 178 | + pattern (str): A raw string containing the regular expression pattern to be used for linting. |
| 179 | + ignore_patterns (List[str]): A list of regex patterns to ignore. |
| 180 | +
|
| 181 | + Returns: |
| 182 | + List[str]: A list of hyperlinks missing a redirection attribute |
| 183 | + """ |
| 184 | + match = re.findall(pattern, line_text, flags=re.M) |
| 185 | + ignore_links = [] |
| 186 | + for ignore_pattern in ignore_patterns: |
| 187 | + expression = re.compile(ignore_pattern) |
| 188 | + ignore_links += list(filter(lambda l: expression.match(re.search(LINK_REGEX, l).group()[:-1]), match)) |
| 189 | + match = list(filter(lambda l: l not in ignore_links, match)) |
| 190 | + return match |
| 191 | + |
| 192 | + |
| 193 | +def prepare_error_messages(match, filename, line_number, passed): |
| 194 | + """ |
| 195 | + Helper function that generates error messages for hyperlinks without redirection attributes and modifies the passed bool if necessary. |
| 196 | +
|
| 197 | + Args: |
| 198 | + match (list[str]): A list of strings representing the matched links. |
| 199 | + filename (str): The name of the file being checked for errors. |
| 200 | + line_number (int): The line number where the error occurred. |
| 201 | + passed (bool): A bool specifying if previous links have passed or not. |
| 202 | +
|
| 203 | + Returns: |
| 204 | + tuple[bool, str]: Returns a tuple containing two variables: |
| 205 | + 1. The previous passed bool if there are no failed hyperlinks and false if there are. |
| 206 | + 2. A string containing the generated error message buffer. |
| 207 | + """ |
| 208 | + error_message_buffer = "" |
| 209 | + |
| 210 | + if not match: |
| 211 | + return passed, error_message_buffer |
| 212 | + |
| 213 | + passed = False |
| 214 | + for link in match: |
| 215 | + error_message_buffer += f"\tLine {line_number+1}: {link}\n" |
| 216 | + annotations.append(f"::error file={filename},line={line_number+1}::{ERROR_MSG1 + link + ERROR_MSG2}") |
| 217 | + return passed, error_message_buffer |
| 218 | + |
| 219 | + |
| 220 | +def print_check_message(filename, check_passed, check_number, error_message): |
| 221 | + """ |
| 222 | + Prints the status of a markdown file after a check. Any errors will be printed along with |
| 223 | + the status. |
| 224 | +
|
| 225 | + Args: |
| 226 | + filename (str): The path to the markdown file relative to some root directory. |
| 227 | + check_passed (bool): Whether the check on the markdown file passed or not. |
| 228 | + check_number (int): The check number. |
| 229 | + error_message (str): An error message (empty if the check passed). |
| 230 | + """ |
| 231 | + status = PASSED_MSG if check_passed else FAILED_MSG |
| 232 | + print(f"Check {check_number}: {status} {filename}\n" + error_message) |
| 233 | + |
| 234 | + |
| 235 | +if __name__ == '__main__': |
| 236 | + main() |
0 commit comments