Skip to content

Add RAM Estimation Feature for Ripser Parameters Calculation (Fixes #145) #185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,17 @@ diagrams = ripser(data)['dgms']
plot_diagrams(diagrams, show=True)
```

You can also estimate the RAM requirements before running Ripser:

```python
import numpy as np
from ripser import estimate_ripser_memory

data = np.random.random((1000, 2))
estimated_gb = estimate_ripser_memory(data, maxdim=2)
print(f"Estimated RAM required: {estimated_gb:.2f} GB")
```

We also supply a Scikit-learn transformer style object if you would prefer to use that:

```
Expand Down
56 changes: 55 additions & 1 deletion src/ripser/ripser.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,60 @@ def get_greedy_perm(X, n_perm=None, distance_matrix=False, metric="euclidean"):
return (idx_perm, lambdas, dperm2all)


def estimate_ripser_memory(X, maxdim=1, distance_matrix=False):
"""Estimate the RAM requirements for running Ripser.

Parameters
----------
X : ndarray (n_samples, n_features)
A numpy array of either data or distance matrix.

maxdim : int, optional (default=1)
Maximum homology dimension to be computed.

distance_matrix : bool, optional (default=False)
Whether X is a distance matrix.

Returns
-------
float
Estimated RAM requirement in GB.

Notes
-----
This is a rough estimation based on:
1. Memory for distance matrix: O(n^2) where n is number of points
2. Memory for simplicial complex: O(n^(d+1)) where d is maxdim
3. Memory for persistence computation: O(n^3) worst case for matrix reduction
"""
if distance_matrix:
n_points = X.shape[0]
else:
n_points = X.shape[0]

# Base memory for distance matrix (8 bytes per float64)
distance_matrix_memory = (n_points * n_points * 8) / (1024**3) # in GB

# Memory for simplicial complex (worst case)
# For each dimension d, we need to store combinations of d+1 points
# Using scipy.special.comb for exact calculation would be more accurate
# but this is a rough upper bound
complex_memory = 0
for d in range(maxdim + 1):
# Approximate number of d-simplices (overestimate)
n_simplices = (n_points ** (d + 1)) / np.math.factorial(d + 1)
# Each simplex needs (d+1) integers for vertices + 1 float for filtration value
complex_memory += (n_simplices * ((d + 1) * 4 + 8)) / (1024**3) # in GB

# Memory for persistence computation (worst case)
# This is a rough estimate based on the boundary matrix
persistence_memory = (n_points**3 * 8) / (1024**3) # in GB

total_memory = distance_matrix_memory + complex_memory + persistence_memory

return total_memory


def ripser(
X,
maxdim=1,
Expand Down Expand Up @@ -651,4 +705,4 @@ def plot(
)


__all__ = ["Rips", "ripser", "lower_star_img"]
__all__ = ["Rips", "ripser", "lower_star_img", "estimate_ripser_memory"]
39 changes: 39 additions & 0 deletions test/test_memory_estimation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import numpy as np
from ripser import estimate_ripser_memory

def test_memory_estimation():
# Create a small point cloud
n_points = 100
n_features = 3
X = np.random.rand(n_points, n_features)

# Test memory estimation for different dimensions
mem_dim0 = estimate_ripser_memory(X, maxdim=0)
mem_dim1 = estimate_ripser_memory(X, maxdim=1)
mem_dim2 = estimate_ripser_memory(X, maxdim=2)

# Memory should increase with dimension
assert mem_dim0 < mem_dim1 < mem_dim2

# Test with distance matrix
D = np.random.rand(n_points, n_points)
D = (D + D.T) / 2 # Make it symmetric
np.fill_diagonal(D, 0) # Zero diagonal

mem_dist = estimate_ripser_memory(D, maxdim=1, distance_matrix=True)
assert mem_dist > 0

def test_memory_scaling():
# Test how memory scales with number of points
n_points_small = 50
n_points_large = 100
n_features = 3

X_small = np.random.rand(n_points_small, n_features)
X_large = np.random.rand(n_points_large, n_features)

mem_small = estimate_ripser_memory(X_small, maxdim=1)
mem_large = estimate_ripser_memory(X_large, maxdim=1)

# Memory should scale super-linearly with number of points
assert mem_large > 4 * mem_small # Since complexity is O(n^3)