|
| 1 | +import argparse |
| 2 | +import collections |
| 3 | +import errno |
| 4 | +import json |
| 5 | +import pathlib |
| 6 | +import sys |
| 7 | +from typing import Iterable |
| 8 | + |
| 9 | +import rich_argparse |
| 10 | + |
| 11 | +# pyperf/pyperformance run benchmarks by benchmark name, but store results, |
| 12 | +# including loops used, by data point name (as reported by the benchmark). |
| 13 | +# There's no mapping from the one to the other that we can easily use (other |
| 14 | +# than running benchmarks one by one and checking what data points they |
| 15 | +# report), so here's a hand-written mapping instead. Benchmarks that use |
| 16 | +# their own name for the data point are omitted. This will probably (slowly) |
| 17 | +# get out of date, but so be it. |
| 18 | +# |
| 19 | +# (Sorted by value, then key.) |
| 20 | +DATAPOINT_TO_BENCHMARK = { |
| 21 | + "many_optionals": "argparse", |
| 22 | + "subparsers": "argparse", |
| 23 | + "async_tree_none": "async_tree", |
| 24 | + "async_tree_none_tg": "async_tree_tg", |
| 25 | + "bench_mp_pool": "concurrent_imap", |
| 26 | + "bench_thread_pool": "concurrent_imap", |
| 27 | + "deepcopy_memo": "deepcopy", |
| 28 | + "deepcopy_reduce": "deepcopy", |
| 29 | + "create_gc_cycles": "gc_collect", |
| 30 | + "genshi_text": "genshi", |
| 31 | + "genshi_xml": "genshi", |
| 32 | + "logging_format": "logging", |
| 33 | + "logging_silent": "logging", |
| 34 | + "logging_simple": "logging", |
| 35 | + "shortest_path": "networkx", |
| 36 | + "connected_components": "networkx_connected_components", |
| 37 | + "k_core": "networkx_k_core", |
| 38 | + "pprint_pformat": "pprint", |
| 39 | + "pprint_safe_repr": "pprint", |
| 40 | + "scimark_fft": "scimark", |
| 41 | + "scimark_lu": "scimark", |
| 42 | + "scimark_monte_carlo": "scimark", |
| 43 | + "scimark_sor": "scimark", |
| 44 | + "scimark_sparse_mat_mult": "scimark", |
| 45 | + "sqlglot_v2_normalize": "sqlglot_v2", |
| 46 | + "sympy_expand": "sympy", |
| 47 | + "sympy_integrate": "sympy", |
| 48 | + "sympy_str": "sympy", |
| 49 | + "sympy_sum": "sympy", |
| 50 | + "xml_etree_generate": "xml_etree", |
| 51 | + "xml_etree_iterparse": "xml_etree", |
| 52 | + "xml_etree_parse": "xml_etree", |
| 53 | + "xml_etree_process": "xml_etree", |
| 54 | +} |
| 55 | + |
| 56 | +# The list of bm_* directories in pyperformance and pyston-benchmarks, plus |
| 57 | +# the aliases defined in their MANIFEST files (entries with |
| 58 | +# '<local:$dirname>') |
| 59 | +KNOWN_BENCHMARKS = { |
| 60 | + "2to3", |
| 61 | + "aiohttp", |
| 62 | + "argparse", |
| 63 | + "argparse_subparsers", |
| 64 | + "async_generators", |
| 65 | + "async_tree", |
| 66 | + "async_tree_cpu_io_mixed", |
| 67 | + "async_tree_cpu_io_mixed_tg", |
| 68 | + "async_tree_eager", |
| 69 | + "async_tree_eager_cpu_io_mixed", |
| 70 | + "async_tree_eager_cpu_io_mixed_tg", |
| 71 | + "async_tree_eager_io", |
| 72 | + "async_tree_eager_io_tg", |
| 73 | + "async_tree_eager_memoization", |
| 74 | + "async_tree_eager_memoization_tg", |
| 75 | + "async_tree_eager_tg", |
| 76 | + "async_tree_io", |
| 77 | + "async_tree_io_tg", |
| 78 | + "async_tree_memoization", |
| 79 | + "async_tree_memoization_tg", |
| 80 | + "async_tree_tg", |
| 81 | + "asyncio_tcp", |
| 82 | + "asyncio_tcp_ssl", |
| 83 | + "asyncio_websockets", |
| 84 | + "bpe_tokeniser", |
| 85 | + "chameleon", |
| 86 | + "chaos", |
| 87 | + "comprehensions", |
| 88 | + "concurrent_imap", |
| 89 | + "coroutines", |
| 90 | + "coverage", |
| 91 | + "crypto_pyaes", |
| 92 | + "dask", |
| 93 | + "decimal_factorial", |
| 94 | + "decimal_pi", |
| 95 | + "deepcopy", |
| 96 | + "deltablue", |
| 97 | + "django_template", |
| 98 | + "djangocms", |
| 99 | + "docutils", |
| 100 | + "dulwich_log", |
| 101 | + "fannkuch", |
| 102 | + "flaskblogging", |
| 103 | + "float", |
| 104 | + "gc_collect", |
| 105 | + "gc_traversal", |
| 106 | + "generators", |
| 107 | + "genshi", |
| 108 | + "gevent_hub", |
| 109 | + "go", |
| 110 | + "gunicorn", |
| 111 | + "hexiom", |
| 112 | + "hg_startup", |
| 113 | + "html5lib", |
| 114 | + "json", |
| 115 | + "json_dumps", |
| 116 | + "json_loads", |
| 117 | + "kinto", |
| 118 | + "logging", |
| 119 | + "mako", |
| 120 | + "mdp", |
| 121 | + "meteor_contest", |
| 122 | + "mypy2", |
| 123 | + "nbody", |
| 124 | + "networkx", |
| 125 | + "networkx_connected_components", |
| 126 | + "networkx_k_core", |
| 127 | + "nqueens", |
| 128 | + "pathlib", |
| 129 | + "pickle", |
| 130 | + "pickle_dict", |
| 131 | + "pickle_list", |
| 132 | + "pickle_pure_python", |
| 133 | + "pidigits", |
| 134 | + "pprint", |
| 135 | + "pycparser", |
| 136 | + "pyflate", |
| 137 | + "pylint", |
| 138 | + "python_startup", |
| 139 | + "python_startup_no_site", |
| 140 | + "pytorch_alexnet_inference", |
| 141 | + "raytrace", |
| 142 | + "regex_compile", |
| 143 | + "regex_dna", |
| 144 | + "regex_effbot", |
| 145 | + "regex_v8", |
| 146 | + "richards", |
| 147 | + "richards_super", |
| 148 | + "scimark", |
| 149 | + "spectral_norm", |
| 150 | + "sphinx", |
| 151 | + "sqlalchemy_declarative", |
| 152 | + "sqlalchemy_imperative", |
| 153 | + "sqlglot_v2", |
| 154 | + "sqlglot_v2_optimize", |
| 155 | + "sqlglot_v2_parse", |
| 156 | + "sqlglot_v2_transpile", |
| 157 | + "sqlite_synth", |
| 158 | + "sympy", |
| 159 | + "telco", |
| 160 | + "thrift", |
| 161 | + "tomli_loads", |
| 162 | + "tornado_http", |
| 163 | + "typing_runtime_protocols", |
| 164 | + "unpack_sequence", |
| 165 | + "unpickle", |
| 166 | + "unpickle_list", |
| 167 | + "unpickle_pure_python", |
| 168 | + "xml_etree", |
| 169 | +} |
| 170 | + |
| 171 | + |
| 172 | +def parse_result(results_file, benchmark_data): |
| 173 | + with results_file.open() as f: |
| 174 | + result = json.load(f) |
| 175 | + bms = result["benchmarks"] |
| 176 | + if len(bms) == 1 and "metadata" not in bms[0]: |
| 177 | + # Sometimes a .json file contains just a single benchmark. |
| 178 | + bms = [result] |
| 179 | + for bm in bms: |
| 180 | + if "metadata" not in bm: |
| 181 | + raise RuntimeError(f"Invalid data {bm.keys()!r} in {results_file}") |
| 182 | + return |
| 183 | + name = bm["metadata"]["name"] |
| 184 | + name = DATAPOINT_TO_BENCHMARK.get(name, name) |
| 185 | + assert name is not None # to satisfy pyright. |
| 186 | + if name not in KNOWN_BENCHMARKS: |
| 187 | + print( |
| 188 | + f"WARNING: unknown benchmark {name!r} in {results_file}", |
| 189 | + file=sys.stderr, |
| 190 | + ) |
| 191 | + # Avoid repeated warnings. |
| 192 | + KNOWN_BENCHMARKS.add(name) |
| 193 | + benchmark_data[name].append(bm["metadata"]["loops"]) |
| 194 | + |
| 195 | + |
| 196 | +def _main( |
| 197 | + loops_file: pathlib.Path, |
| 198 | + update: bool, |
| 199 | + overwrite: bool, |
| 200 | + merger: str, |
| 201 | + results: Iterable[pathlib.Path], |
| 202 | +): |
| 203 | + if not update and not overwrite and loops_file.exists(): |
| 204 | + raise OSError( |
| 205 | + errno.EEXIST, |
| 206 | + f"{loops_file} exists (use -f to overwrite, -u to merge data)", |
| 207 | + ) |
| 208 | + benchmark_data = collections.defaultdict(list) |
| 209 | + if update: |
| 210 | + parse_result(loops_file, benchmark_data) |
| 211 | + for result_file in results: |
| 212 | + parse_result(result_file, benchmark_data) |
| 213 | + |
| 214 | + merge_func = { |
| 215 | + "max": max, |
| 216 | + "min": min, |
| 217 | + }[merger] |
| 218 | + |
| 219 | + # pyperformance expects a specific layout, and needs the top-level |
| 220 | + # metadata even if it's empty. |
| 221 | + loops_data = {"benchmarks": [], "metadata": {}} |
| 222 | + for bm in sorted(benchmark_data): |
| 223 | + loops = merge_func(benchmark_data[bm]) |
| 224 | + bm_result = {"metadata": {"name": bm, "loops": loops}} |
| 225 | + loops_data["benchmarks"].append(bm_result) |
| 226 | + with loops_file.open("w") as f: |
| 227 | + json.dump(loops_data, f, sort_keys=True, indent=4) |
| 228 | + f.write("\n") |
| 229 | + |
| 230 | + |
| 231 | +def main(): |
| 232 | + parser = argparse.ArgumentParser( |
| 233 | + description=""" |
| 234 | + Synthesize a loops.json file for use with `pyperformance`'s |
| 235 | + `--same-loops` (or `PYPERFORMANCE_LOOPS_FILE`) from one or more |
| 236 | + benchmark results. |
| 237 | + """, |
| 238 | + formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter, |
| 239 | + ) |
| 240 | + parser.add_argument( |
| 241 | + "-o", "--loops_file", help="loops file to write to", required=True |
| 242 | + ) |
| 243 | + group = parser.add_mutually_exclusive_group(required=False) |
| 244 | + group.add_argument( |
| 245 | + "-u", "--update", action="store_true", help="add to existing loops file" |
| 246 | + ) |
| 247 | + group.add_argument( |
| 248 | + "-f", "--overwrite", action="store_true", help="replace loops file" |
| 249 | + ) |
| 250 | + parser.add_argument( |
| 251 | + "-s", |
| 252 | + "--select", |
| 253 | + choices=("max", "min"), |
| 254 | + default="max", |
| 255 | + help="how to merge multiple runs", |
| 256 | + ) |
| 257 | + parser.add_argument("results", nargs="+", help="benchmark results to parse") |
| 258 | + args = parser.parse_args() |
| 259 | + |
| 260 | + _main( |
| 261 | + pathlib.Path(args.loops_file), |
| 262 | + args.update, |
| 263 | + args.overwrite, |
| 264 | + args.select, |
| 265 | + [pathlib.Path(r) for r in args.results], |
| 266 | + ) |
| 267 | + |
| 268 | + |
| 269 | +if __name__ == "__main__": |
| 270 | + main() |
0 commit comments