Skip to content

Commit e11ae42

Browse files
author
Amr Akmal Moustafa Abouelmagd
committed
Merge branch 'develop' into 809-self-standard-hwloc-configs
2 parents b5a1d30 + 3e49480 commit e11ae42

File tree

91 files changed

+1075
-1174
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+1075
-1174
lines changed

.github/actions/dynamic-dry-run/action.yml

Lines changed: 0 additions & 39 deletions
This file was deleted.

.github/utils/dryrun.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright 2023 Lawrence Livermore National Security, LLC and other
2+
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
3+
#
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
#!/bin/bash
7+
8+
benchmark_spec="$1"
9+
system_spec="$2"
10+
11+
timestamp=$(date +%s)
12+
benchmark="b-$timestamp"
13+
system="s-$timestamp"
14+
./bin/benchpark system init --dest=$system $system_spec
15+
./bin/benchpark experiment init --dest=$benchmark --system=$system $benchmark_spec
16+
./bin/benchpark setup ./$benchmark ./$system workspace/
17+
. workspace/setup.sh
18+
ramble \
19+
--workspace-dir "workspace/$benchmark/$system/workspace" \
20+
--disable-logger \
21+
workspace setup --dry-run

.github/utils/dryruns.py

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
# Copyright 2023 Lawrence Livermore National Security, LLC and other
2+
# Benchpark Project Developers. See the top-level COPYRIGHT file for details.
3+
#
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
import subprocess
7+
import time
8+
import sys
9+
import argparse
10+
import os
11+
12+
import benchpark.paths
13+
14+
sys.path.append(str(benchpark.paths.benchpark_home) + "/spack/lib/spack")
15+
from lib.benchpark.accounting import benchpark_experiments # noqa: E402
16+
17+
DEFAULT_SYSTEM = "llnl-cluster cluster=dane"
18+
# Skip experiments
19+
SKIP_EXPR = [
20+
# System not enough cores/node
21+
"gromacs+openmp aws-pcluster instance_type=c6g.xlarge",
22+
"gromacs+openmp aws-pcluster instance_type=c4.xlarge",
23+
"gromacs+openmp generic-x86",
24+
"stream aws-pcluster instance_type=c6g.xlarge",
25+
"stream aws-pcluster instance_type=c4.xlarge",
26+
"stream cscs-daint",
27+
"stream generic-x86",
28+
# Broken URL's in application.py going to cause dryrun failure
29+
"genesis",
30+
]
31+
32+
33+
def run_subprocess_cmd(cmd_list, decode=False):
34+
try:
35+
result = subprocess.run(cmd_list, capture_output=True, check=True)
36+
return result.stdout.decode("utf-8") if decode else result
37+
except subprocess.CalledProcessError as e:
38+
raise RuntimeError(
39+
f"Command: {' '.join(cmd_list)}\nOutput: {e.stdout}\nError: {e.stderr}"
40+
)
41+
42+
43+
def main():
44+
parser = argparse.ArgumentParser()
45+
parser.add_argument(
46+
"--test",
47+
choices=[
48+
"mpi",
49+
"cuda",
50+
"rocm",
51+
"openmp",
52+
"strong",
53+
"weak",
54+
"throughput",
55+
"modifiers",
56+
],
57+
help="Only run tests of this type",
58+
)
59+
parser.add_argument(
60+
"--dryrun", action="store_true", help="Dry runs this script for testing."
61+
)
62+
args = parser.parse_args()
63+
64+
expr_str = run_subprocess_cmd(
65+
["./bin/benchpark", "list", "experiments", "--no-title"], decode=True
66+
)
67+
experiments = [e for e in expr_str.replace(" ", "").replace("\t", "").split("\n") if e != ""]
68+
69+
mpi_only_expr = set()
70+
cuda_expr = []
71+
rocm_expr = []
72+
openmp_expr = []
73+
strong_expr = []
74+
weak_expr = []
75+
throughput_expr = []
76+
77+
for e in experiments:
78+
if "scaling" in e:
79+
e = e.replace("scaling=", " scaling=")
80+
if "+strong" in e or "+weak" in e or "+throughput" in e:
81+
e = e.replace(e, e + "~single_node")
82+
elif "+" not in e and "=" not in e:
83+
mpi_only_expr.add(e)
84+
85+
if "cuda" in e:
86+
cuda_expr.append(e)
87+
elif "rocm" in e:
88+
rocm_expr.append(e)
89+
elif "openmp" in e:
90+
openmp_expr.append(e)
91+
elif "strong" in e:
92+
strong_expr.append(e)
93+
elif "weak" in e:
94+
weak_expr.append(e)
95+
elif "throughput" in e:
96+
throughput_expr.append(e)
97+
98+
str_dict = {}
99+
for pmodel in ["mpi", "cuda", "rocm", "openmp"]:
100+
cmd = ["./bin/benchpark", "list", "systems", "--no-title"]
101+
if pmodel != "mpi":
102+
cmd += ["-p", pmodel]
103+
output = run_subprocess_cmd(cmd, decode=True)
104+
str_dict[pmodel] = [
105+
i
106+
for i in output.replace(" " * 4, "").replace("\t", "").split("\n")
107+
if i != ""
108+
]
109+
110+
mods_str = run_subprocess_cmd(
111+
["./bin/benchpark", "list", "modifiers", "--no-title"], decode=True
112+
)
113+
nmods = [
114+
i
115+
for i in mods_str.replace(" " * 4, "").replace("\t", "").split("\n")
116+
if i != "" and i not in ["allocation", "caliper"]
117+
]
118+
119+
caliper_exp = [
120+
e.replace("+caliper", " caliper=time") for e in benchpark_experiments(exclude_variants=[]) if "+caliper" in e and e.split("+")[0] in mpi_only_expr
121+
]
122+
modifiers_expr = caliper_exp + [e + " " + m + "=on" for e in mpi_only_expr for m in nmods]
123+
124+
exprs_to_sys = [
125+
("mpi", mpi_only_expr, str_dict["mpi"]),
126+
("cuda", cuda_expr, str_dict["cuda"]),
127+
("rocm", rocm_expr, str_dict["rocm"]),
128+
("openmp", openmp_expr, str_dict["openmp"]),
129+
("strong", strong_expr, str_dict["mpi"]),
130+
("weak", weak_expr, str_dict["mpi"]),
131+
("throughput", throughput_expr, str_dict["mpi"]),
132+
("modifiers", modifiers_expr, [DEFAULT_SYSTEM]),
133+
]
134+
135+
if args.test:
136+
exprs_to_sys = [tup for tup in exprs_to_sys if tup[0] == args.test]
137+
138+
total_tests = sum(
139+
len(expr_spec_list) * len(sys_spec_list)
140+
for _, expr_spec_list, sys_spec_list in exprs_to_sys
141+
)
142+
print(f"Total tests to run: {total_tests}")
143+
144+
start = time.time()
145+
errors = {}
146+
fail_tests = 0
147+
ran_tests = 0
148+
skip_tests = 0
149+
for _, expr_spec_list, sys_spec_list in exprs_to_sys:
150+
for espec in expr_spec_list:
151+
for sspec in sys_spec_list:
152+
expr = f"{espec} {sspec}"
153+
if expr in SKIP_EXPR:
154+
skip_tests += 1
155+
print(f'Skipping "{expr}"')
156+
continue
157+
ran_tests += 1
158+
print(f'Running "{expr}"')
159+
if args.dryrun:
160+
continue
161+
try:
162+
subprocess.run(
163+
["bash", ".github/utils/dryrun.sh", espec, sspec],
164+
env={**os.environ},
165+
capture_output=True,
166+
check=True
167+
)
168+
except subprocess.CalledProcessError as e:
169+
errors[f"{espec} {sspec}"] = e.stderr.decode()
170+
fail_tests += 1
171+
end = time.time()
172+
173+
for i, (key, value) in enumerate(errors.items()):
174+
print("=" * 100)
175+
print(str(i + 1) + ". " + key)
176+
print(value)
177+
178+
print(f"Elapsed: {(end - start) / 60:.2f} minutes")
179+
print(f"{ran_tests - fail_tests} Passing. {fail_tests} Failing. {skip_tests} Skipped.")
180+
181+
sys.exit(1 if fail_tests > 0 else 0)
182+
183+
184+
if __name__ == "__main__":
185+
main()

.github/workflows/ci.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,11 @@ jobs:
127127
if: ${{ needs.changes.outputs.run == 'true' }}
128128
needs: changes
129129
uses: ./.github/workflows/run.yml
130+
# 'workflow_call' actions do not get access to secrets
131+
# normally. So, we add this (plus the corresponding)
132+
# settings in coverage.yml to pass the token on
133+
secrets:
134+
BENCHPARK_CODECOV_TOKEN: ${{ secrets.BENCHPARK_CODECOV_TOKEN }}
130135
coverage:
131136
if: ${{ needs.changes.outputs.coverage == 'true' }}
132137
needs: changes

.github/workflows/coverage.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ jobs:
1818
./bin/benchpark unit-test --cov=./ --cov-branch --cov-report=xml --durations=20 -ra
1919
- name: Upload coverage to Codecov
2020
uses: codecov/codecov-action@v4
21-
if: true
2221
with:
2322
token: ${{ secrets.BENCHPARK_CODECOV_TOKEN }}
2423
directory: ./coverage/reports

0 commit comments

Comments
 (0)