Skip to content

Make behaviour of compute consistent for slicing #419

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 30, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions examples/ndarray/lazyexpr_where_indexing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Imports

import numpy as np

import blosc2

N = 1000
it = ((-x + 1, x - 2, 0.1 * x) for x in range(N))
sa = blosc2.fromiter(
it, dtype=[("A", "i4"), ("B", "f4"), ("C", "f8")], shape=(N,), urlpath="sa-1M.b2nd", mode="w"
)
expr = sa["(A < B)"]
A = sa["A"][:]
B = sa["B"][:]
C = sa["C"][:]
temp = sa[:]
indices = A < B
idx = np.argmax(indices)

# One might think that expr[:10] gives the first 10 elements of the evaluated expression, but this is not the case.
# It actually computes the expression on the first 10 elements of the operands; since for some elements the condition
# is False, the result will be shorter than 10 elements.
# Returns less than 10 elements in general
sliced = expr.compute(slice(0, 10))
gotitem = expr[:10]
np.testing.assert_array_equal(sliced[:], gotitem)
np.testing.assert_array_equal(gotitem, temp[:10][indices[:10]]) # Equivalent syntax
# Actually this makes sense since one can understand this as a request to compute on a portion of operands.
# If one desires a portion of the result, one should compute the whole expression and then slice it.

# Get first element for which condition is true
sliced = expr.compute(idx)
gotitem = expr[idx]
# Arrays of one element
np.testing.assert_array_equal(sliced[()], gotitem)
np.testing.assert_array_equal(gotitem, temp[idx])

# Should return void arrays here.
sliced = expr.compute(0)
gotitem = expr[0]
np.testing.assert_array_equal(sliced[()], gotitem)
np.testing.assert_array_equal(gotitem, temp[0])
40 changes: 23 additions & 17 deletions src/blosc2/lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,8 @@ def compute(self, item: slice | list[slice] | None = None, **kwargs: Any) -> blo
----------
item: slice, list of slices, optional
If not None, only the chunks that intersect with the slices
in items will be evaluated.
in item will be evaluated. If provided, items of the operands to be used in computation.
Important to note that item is used to slice the operands PRIOR to computation.

kwargs: Any, optional
Keyword arguments that are supported by the :func:`empty` constructor.
Expand Down Expand Up @@ -328,7 +329,9 @@ def __getitem__(self, item: int | slice | Sequence[slice]) -> blosc2.NDArray:
Parameters
----------
item: int, slice or sequence of slices
The slice(s) to be retrieved. Note that step parameter is not yet honored.
If provided, items of the operands to be used in computation.
Important to note that item is used to slice the operands PRIOR to computation, not to retrieve specified
slices of the evaluated result.

Returns
-------
Expand Down Expand Up @@ -1378,7 +1381,8 @@ def slices_eval( # noqa: C901
for i, (c, s) in enumerate(zip(coords, chunks, strict=True))
)
# Check whether current slice_ intersects with _slice
if _slice is not None and _slice != ():
checker = _slice.item() if hasattr(_slice, "item") else _slice # can't use != when _slice is np.int
if checker is not None and checker != ():
# Ensure that _slice is of type slice
key = ndindex.ndindex(_slice).expand(shape).raw
_slice = tuple(k if isinstance(k, slice) else slice(k, k + 1, None) for k in key)
Expand Down Expand Up @@ -1508,19 +1512,7 @@ def slices_eval( # noqa: C901
else:
raise ValueError("The where condition must be a tuple with one or two elements")

if orig_slice is not None:
if isinstance(out, np.ndarray):
out = out[orig_slice]
if _order is not None:
indices_ = indices_[orig_slice]
elif isinstance(out, blosc2.NDArray):
# It *seems* better to choose an automatic chunks and blocks for the output array
# out = out.slice(orig_slice, chunks=out.chunks, blocks=out.blocks)
out = out.slice(orig_slice)
else:
raise ValueError("The output array is not a NumPy array or a NDArray")

if where is not None and len(where) < 2:
if where is not None and len(where) < 2: # Don't need to take orig_slice since filled up from 0 index
if _order is not None:
# argsort the result following _order
new_order = np.argsort(out[:lenout])
Expand All @@ -1532,6 +1524,19 @@ def slices_eval( # noqa: C901
else:
out.resize((lenout,))

else: # Need to take orig_slice since filled up array according to slice_ for each chunk
if orig_slice is not None:
if isinstance(out, np.ndarray):
out = out[orig_slice]
if _order is not None:
indices_ = indices_[orig_slice]
elif isinstance(out, blosc2.NDArray):
# It *seems* better to choose an automatic chunks and blocks for the output array
# out = out.slice(orig_slice, chunks=out.chunks, blocks=out.blocks)
out = out.slice(orig_slice)
else:
raise ValueError("The output array is not a NumPy array or a NDArray")

return out


Expand Down Expand Up @@ -1827,7 +1832,8 @@ def chunked_eval( # noqa: C901
operands: dict
A dictionary containing the operands for the expression.
item: int, slice or sequence of slices, optional
The slice(s) to be retrieved. Note that step parameter is not honored yet.
The slice(s) of the operands to be used in computation. Note that step parameter is not honored yet.
Item is used to slice the operands PRIOR to computation.
kwargs: Any, optional
Additional keyword arguments supported by the :func:`empty` constructor. In addition,
the following keyword arguments are supported:
Expand Down
60 changes: 50 additions & 10 deletions tests/ndarray/test_lazyexpr_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,17 +279,19 @@ def test_where_one_param(array_fixture):
res = np.sort(res)
nres = np.sort(nres)
np.testing.assert_allclose(res[:], nres)

# Test with getitem
sl = slice(100)
res = expr.where(a1)[sl]
nres = na1[sl][ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1")[sl]]
if len(a1.shape) == 1 or a1.chunks == a1.shape:
# TODO: fix this, as it seems that is not working well for numexpr?
if blosc2.IS_WASM:
return
np.testing.assert_allclose(res, nres[sl])
np.testing.assert_allclose(res, nres)
else:
# In this case, we cannot compare results, only the length
assert len(res) == len(nres[sl])
assert len(res) == len(nres)


# Test where indirectly via a condition in getitem in a NDArray
Expand Down Expand Up @@ -330,25 +332,26 @@ def test_where_getitem(array_fixture):
# Test with partial slice
sl = slice(100)
res = sa1[a1**2 + a2**2 > 2 * a1 * a2 + 1][sl]
nres = nsa1[sl][ne_evaluate("na1**2 + na2**2 > 2 * na1 * na2 + 1")[sl]]
if len(a1.shape) == 1 or a1.chunks == a1.shape:
# TODO: fix this, as it seems that is not working well for numexpr?
if blosc2.IS_WASM:
return
np.testing.assert_allclose(res["a"], nres[sl]["a"])
np.testing.assert_allclose(res["b"], nres[sl]["b"])
np.testing.assert_allclose(res["a"], nres["a"])
np.testing.assert_allclose(res["b"], nres["b"])
else:
# In this case, we cannot compare results, only the length
assert len(res["a"]) == len(nres[sl]["a"])
assert len(res["b"]) == len(nres[sl]["b"])
assert len(res["a"]) == len(nres["a"])
assert len(res["b"]) == len(nres["b"])
# string version
res = sa1["a**2 + b**2 > 2 * a * b + 1"][sl]
if len(a1.shape) == 1 or a1.chunks == a1.shape:
np.testing.assert_allclose(res["a"], nres[sl]["a"])
np.testing.assert_allclose(res["b"], nres[sl]["b"])
np.testing.assert_allclose(res["a"], nres["a"])
np.testing.assert_allclose(res["b"], nres["b"])
else:
# We cannot compare the results here, other than the length
assert len(res["a"]) == len(nres[sl]["a"])
assert len(res["b"]) == len(nres[sl]["b"])
assert len(res["a"]) == len(nres["a"])
assert len(res["b"]) == len(nres["b"])


# Test where indirectly via a condition in getitem in a NDField
Expand Down Expand Up @@ -631,3 +634,40 @@ def test_col_reduction(reduce_op):
ns = nreduc(nC[nC > 0])
np.testing.assert_allclose(s, ns)
np.testing.assert_allclose(s2, ns)


def test_fields_indexing():
N = 1000
it = ((-x + 1, x - 2, 0.1 * x) for x in range(N))
sa = blosc2.fromiter(
it, dtype=[("A", "i4"), ("B", "f4"), ("C", "f8")], shape=(N,), urlpath="sa-1M.b2nd", mode="w"
)
expr = sa["(A < B)"]
A = sa["A"][:]
B = sa["B"][:]
C = sa["C"][:]
temp = sa[:]
indices = A < B
idx = np.argmax(indices)

# Returns less than 10 elements in general
sliced = expr.compute(slice(0, 10))
gotitem = expr[:10]
np.testing.assert_array_equal(sliced[:], gotitem)
np.testing.assert_array_equal(gotitem, temp[:10][indices[:10]])
# Actually this makes sense since one can understand this as a request to compute on a portion of operands.
# If one desires a portion of the result, one should compute the whole expression and then slice it.
# For a general slice it is quite difficult to simply stop when the desired slice has been obtained. Or
# to try to optimise chunk computation order.

# Get first true element
sliced = expr.compute(idx)
gotitem = expr[idx]
np.testing.assert_array_equal(sliced[()], gotitem)
np.testing.assert_array_equal(gotitem, temp[idx])

# Should return void arrays here.
sliced = expr.compute(0) # typically gives array of zeros
gotitem = expr[0] # gives an error
np.testing.assert_array_equal(sliced[()], gotitem)
np.testing.assert_array_equal(gotitem, temp[0])