Skip to content

Commit b9cfece

Browse files
Fixes
- readme: typos and content - requirements: slimmer - tests: assertions - run: also run without line_profiler - routines: dimension check
1 parent ae31357 commit b9cfece

File tree

7 files changed

+125
-50
lines changed

7 files changed

+125
-50
lines changed

README.md

Lines changed: 20 additions & 13 deletions
Large diffs are not rendered by default.

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
# Choosing a build backend:
2+
[build-system]
23
requires = ["setuptools"]
34
build-backend = "setuptools.build_meta"
45

56

67
[project]
78
name = "matmul"
89
version = "0.0.1"
9-
description = "Gabriele Codega"
10+
description = "Distributed matrix multiplication."
1011
readme = "README.md"
1112
requires-python = ">=3.11"
1213
license = { file = "LICENSE" }
@@ -22,3 +23,5 @@ dependencies = { file = ["requirements.txt"] }
2223

2324
[project.optional-dependencies]
2425
test = ["pytest"]
26+
profile = ["line_profiler"]
27+
dev = ["pytest", "line_profiler"]

requirements.txt

Lines changed: 1 addition & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,5 @@
1-
filelock==3.17.0
2-
fsspec==2025.2.0
3-
iniconfig==2.0.0
4-
Jinja2==3.1.5
5-
line_profiler==4.2.0
61
llvmlite==0.44.0
7-
MarkupSafe==3.0.2
8-
mpi4py==4.0.3
9-
mpmath==1.3.0
10-
networkx==3.4.2
2+
mpi4py==4.0.3 --no-binary=mpi4py
113
numba==0.61.0
124
numpy==2.1.3
13-
nvidia-cublas-cu12==12.4.5.8
14-
nvidia-cuda-cupti-cu12==12.4.127
15-
nvidia-cuda-nvrtc-cu12==12.4.127
16-
nvidia-cuda-runtime-cu12==12.4.127
17-
nvidia-cudnn-cu12==9.1.0.70
18-
nvidia-cufft-cu12==11.2.1.3
19-
nvidia-curand-cu12==10.3.5.147
20-
nvidia-cusolver-cu12==11.6.1.9
21-
nvidia-cusparse-cu12==12.3.1.170
22-
nvidia-cusparselt-cu12==0.6.2
23-
nvidia-ml-py==12.570.86
24-
nvidia-nccl-cu12==2.21.5
25-
nvidia-nvjitlink-cu12==12.4.127
26-
nvidia-nvtx-cu12==12.4.127
27-
packaging==24.2
28-
pluggy==1.5.0
29-
pytest==8.3.5
305
PyYAML==6.0.2
31-
scipy==1.15.2
32-
sympy==1.13.1
33-
torch==2.6.0
34-
triton==3.2.0
35-
typing_extensions==4.12.2

scripts/run.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from functools import wraps
2+
from warnings import warn
13
import numpy as np
24
from numba import cuda
35

@@ -11,7 +13,14 @@
1113
import argparse
1214
import importlib
1315

14-
from line_profiler import profile
16+
try:
17+
from line_profiler import profile
18+
except ModuleNotFoundError:
19+
warn("Did not find line_profiler. Please install it to access profiling information.")
20+
def profile(f,*args,**kwargs):
21+
def wrapper(*args,**kwargs):
22+
f(*args,**kwargs)
23+
return wrapper
1524

1625
@profile
1726
def main_cpu(params: dict):

src/matmul/routines.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import numba
33

44
def matmul(A,B,C,_):
5+
assert (A.shape[0] == C.shape[0]) and (A.shape[1] == B.shape[0]) and (B.shape[1] == C.shape[1]), f"Matrices have incompatible shapes: {A.shape}, {B.shape}, {C.shape}"
56
for i in range(A.shape[0]):
67
for j in range(B.shape[1]):
78
tmp = 0.
@@ -11,13 +12,15 @@ def matmul(A,B,C,_):
1112

1213
@njit(void(float64[:,::1],float64[:,::1],float64[:,:],numba.optional(int32)), cache=True)
1314
def matmul_numba_serial(A,B,C,_):
15+
assert (A.shape[0] == C.shape[0]) and (A.shape[1] == B.shape[0]) and (B.shape[1] == C.shape[1]), f"Matrices have incompatible shapes: {A.shape}, {B.shape}, {C.shape}"
1416
for i in range(A.shape[0]):
1517
for k in range(A.shape[-1]):
1618
for j in range(B.shape[1]):
1719
C[i,j] += A[i,k] * B[k,j]
1820

1921
@njit(void(float64[:,::1],float64[:,::1],float64[:,:],numba.optional(int32)), parallel=True, nogil=True, cache=True)
2022
def matmul_numba_cpu(A,B,C,_):
23+
assert (A.shape[0] == C.shape[0]) and (A.shape[1] == B.shape[0]) and (B.shape[1] == C.shape[1]), f"Matrices have incompatible shapes: {A.shape}, {B.shape}, {C.shape}"
2124
for i in prange(A.shape[0]):
2225
for k in range(A.shape[1]):
2326
for j in range(B.shape[1]):
@@ -27,6 +30,7 @@ def matmul_numba_cpu(A,B,C,_):
2730

2831
@njit(void(float64[:,::1],float64[:,::1],float64[:,:],int32), parallel=True, nogil=True, cache=True)
2932
def matmul_numba_block_cpu(A,B,C, bs=64):
33+
assert (A.shape[0] == C.shape[0]) and (A.shape[1] == B.shape[0]) and (B.shape[1] == C.shape[1]), f"Matrices have incompatible shapes: {A.shape}, {B.shape}, {C.shape}"
3034
N = A.shape[0]
3135
M = B.shape[1]
3236
K = A.shape[1]
@@ -45,6 +49,7 @@ def matmul_numba_block_cpu(A,B,C, bs=64):
4549

4650
@njit(void(float64[:,::1],float64[:,::1],float64[:,:],int32), parallel=False, nogil=True, cache=True)
4751
def matmul_numba_block_serial(A,B,C, bs=64):
52+
assert (A.shape[0] == C.shape[0]) and (A.shape[1] == B.shape[0]) and (B.shape[1] == C.shape[1]), f"Matrices have incompatible shapes: {A.shape}, {B.shape}, {C.shape}"
4853
N = A.shape[0]
4954
M = B.shape[1]
5055
K = A.shape[1]
@@ -61,8 +66,10 @@ def matmul_numba_block_serial(A,B,C, bs=64):
6166
for j in range(jj,jmax):
6267
C[i,j] += A[i,k] * B[k,j]
6368

64-
@cuda.jit(void(float64[:,::1],float64[:,::1],float64[:,:]), cache=True)
69+
@cuda.jit(void(float64[:,::1],float64[:,::1],float64[:,:]), cache=True, debug=False)
6570
def matmul_numba_gpu(A,B,C):
71+
# this only has effect if function is compiled with debug = True
72+
assert (A.shape[0] == C.shape[0]) and (A.shape[1] == B.shape[0]) and (B.shape[1] == C.shape[1]), "Matrices have incompatible shapes"
6673
i, j = cuda.grid(ndim=2)
6774
if i < C.shape[0] and j < C.shape[1]:
6875
tmp = 0.
@@ -71,8 +78,10 @@ def matmul_numba_gpu(A,B,C):
7178
C[i,j] = tmp
7279

7380
BLOCK_SIZE = 16
74-
@cuda.jit(void(float64[:,::1],float64[:,::1],float64[:,:]), cache=True)
81+
@cuda.jit(void(float64[:,::1],float64[:,::1],float64[:,:]), cache=True, debug=False)
7582
def matmul_numba_block_gpu(A,B,C):
83+
# this only has effect if function is compiled with debug = True
84+
assert (A.shape[0] == C.shape[0]) and (A.shape[1] == B.shape[0]) and (B.shape[1] == C.shape[1]), "Matrices have incompatible shapes"
7685

7786
bi = cuda.blockIdx.y
7887
bj = cuda.blockIdx.x

src/matmul/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def read_config(config_path: str):
1010
1111
Params:
1212
- config_path : str
13-
Path to the config file, withot the extension.
13+
Path to the config file, without the extension.
1414
Returns:
1515
- params:
1616
Dictionary with parameters.

test/test_shared.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,22 @@ def test_matmul():
1515

1616
assert np.allclose(np.eye(size),C)
1717

18+
with pytest.raises(AssertionError):
19+
A = np.empty((size,size+1))
20+
B = np.empty((size,size))
21+
C = np.empty((size,size))
22+
matmul(A,B,C,None)
23+
A = np.empty((size+1,size))
24+
B = np.empty((size,size))
25+
C = np.empty((size,size))
26+
matmul(A,B,C,None)
27+
A = np.empty((size,size))
28+
B = np.empty((size,size+1))
29+
C = np.empty((size,size))
30+
matmul(A,B,C,None)
31+
32+
33+
1834
def test_matmul_numba_cpu():
1935
size = 20
2036
np.random.seed(0)
@@ -26,6 +42,21 @@ def test_matmul_numba_cpu():
2642

2743
assert np.allclose(np.eye(size),C)
2844

45+
with pytest.raises(AssertionError):
46+
A = np.empty((size,size+1),dtype=np.float64)
47+
B = np.empty((size,size),dtype=np.float64)
48+
C = np.empty((size,size),dtype=np.float64)
49+
matmul_numba_cpu(A,B,C,None)
50+
A = np.empty((size+1,size),dtype=np.float64)
51+
B = np.empty((size,size),dtype=np.float64)
52+
C = np.empty((size,size),dtype=np.float64)
53+
matmul_numba_cpu(A,B,C,None)
54+
A = np.empty((size,size),dtype=np.float64)
55+
B = np.empty((size,size+1),dtype=np.float64)
56+
C = np.empty((size,size),dtype=np.float64)
57+
matmul_numba_cpu(A,B,C,None)
58+
59+
2960
def test_matmul_numba_serial():
3061
size = 20
3162
np.random.seed(0)
@@ -37,6 +68,20 @@ def test_matmul_numba_serial():
3768

3869
assert np.allclose(np.eye(size),C)
3970

71+
with pytest.raises(AssertionError):
72+
A = np.empty((size,size+1))
73+
B = np.empty((size,size))
74+
C = np.empty((size,size))
75+
matmul_numba_serial(A,B,C,None)
76+
A = np.empty((size+1,size))
77+
B = np.empty((size,size))
78+
C = np.empty((size,size))
79+
matmul_numba_serial(A,B,C,None)
80+
A = np.empty((size,size))
81+
B = np.empty((size,size+1))
82+
C = np.empty((size,size))
83+
matmul_numba_serial(A,B,C,None)
84+
4085
def test_matmul_numba_block_cpu():
4186
size = 20
4287
np.random.seed(0)
@@ -48,6 +93,20 @@ def test_matmul_numba_block_cpu():
4893

4994
assert np.allclose(np.eye(size),C)
5095

96+
with pytest.raises(AssertionError):
97+
A = np.empty((size,size+1))
98+
B = np.empty((size,size))
99+
C = np.empty((size,size))
100+
matmul_numba_block_cpu(A,B,C,6)
101+
A = np.empty((size+1,size))
102+
B = np.empty((size,size))
103+
C = np.empty((size,size))
104+
matmul_numba_block_cpu(A,B,C,6)
105+
A = np.empty((size,size))
106+
B = np.empty((size,size+1))
107+
C = np.empty((size,size))
108+
matmul_numba_block_cpu(A,B,C,6)
109+
51110
def test_matmul_numba_block_serial():
52111
size = 20
53112
np.random.seed(0)
@@ -59,6 +118,20 @@ def test_matmul_numba_block_serial():
59118

60119
assert np.allclose(np.eye(size),C)
61120

121+
with pytest.raises(AssertionError):
122+
A = np.empty((size,size+1))
123+
B = np.empty((size,size))
124+
C = np.empty((size,size))
125+
matmul_numba_block_serial(A,B,C,6)
126+
A = np.empty((size+1,size))
127+
B = np.empty((size,size))
128+
C = np.empty((size,size))
129+
matmul_numba_block_serial(A,B,C,6)
130+
A = np.empty((size,size))
131+
B = np.empty((size,size+1))
132+
C = np.empty((size,size))
133+
matmul_numba_block_serial(A,B,C,6)
134+
62135
@pytest.mark.skipif((not numba.cuda.is_available()), reason='Could not find any CUDA GPU')
63136
def test_matmul_numba_gpu():
64137
size = 20
@@ -81,6 +154,9 @@ def test_matmul_numba_gpu():
81154

82155
assert np.allclose(np.eye(size),C)
83156

157+
# No tests for assertion errors on matrix shape since they are only available
158+
# for debug=True
159+
84160
@pytest.mark.skipif((not numba.cuda.is_available()), reason='Could not find any CUDA GPU')
85161
def test_matmul_numba_block_gpu():
86162
size = 20
@@ -103,4 +179,5 @@ def test_matmul_numba_block_gpu():
103179

104180
assert np.allclose(np.eye(size),C)
105181

106-
182+
# No tests for assertion errors on matrix shape since they are only available
183+
# for debug=True

0 commit comments

Comments
 (0)