Skip to content

Commit 1333dae

Browse files
author
Luke Shaw
committed
Add documentation of behaviour of get_item and compute for LazyExprs
1 parent 159fe82 commit 1333dae

File tree

2 files changed

+70
-1
lines changed

2 files changed

+70
-1
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Imports
2+
3+
import numpy as np
4+
5+
import blosc2
6+
7+
N = 1000
8+
it = ((-x + 1, x - 2, 0.1 * x) for x in range(N))
9+
sa = blosc2.fromiter(
10+
it, dtype=[("A", "i4"), ("B", "f4"), ("C", "f8")], shape=(N,), urlpath="sa-1M.b2nd", mode="w"
11+
)
12+
expr = sa["(A < B)"]
13+
A = sa["A"][:]
14+
B = sa["B"][:]
15+
C = sa["C"][:]
16+
temp = sa[:]
17+
indices = A < B
18+
idx = np.argmax(indices)
19+
20+
# One might think that expr[:10] gives the first 10 elements of the evaluated expression, but this is not the case.
21+
# It actually computes the expression on the first 10 elements of the operands; since for some elements the condition
22+
# is False, the result will be shorter than 10 elements.
23+
# Returns less than 10 elements in general
24+
sliced = expr.compute(slice(0, 10))
25+
gotitem = expr[:10]
26+
np.testing.assert_array_equal(sliced[:], gotitem)
27+
np.testing.assert_array_equal(gotitem, temp[:10][indices[:10]]) # Equivalent syntax
28+
# Actually this makes sense since one can understand this as a request to compute on a portion of operands.
29+
# If one desires a portion of the result, one should compute the whole expression and then slice it.
30+
31+
# Get first element for which condition is true
32+
sliced = expr.compute(idx)
33+
gotitem = expr[idx]
34+
# Arrays of one element
35+
np.testing.assert_array_equal(sliced[()], gotitem)
36+
np.testing.assert_array_equal(gotitem, temp[idx])
37+
38+
# Should return void arrays here.
39+
sliced = expr.compute(0)
40+
gotitem = expr[0]
41+
np.testing.assert_array_equal(sliced[()], gotitem)
42+
np.testing.assert_array_equal(gotitem, temp[0])

src/blosc2/lazyexpr.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1829,7 +1829,8 @@ def chunked_eval( # noqa: C901
18291829
operands: dict
18301830
A dictionary containing the operands for the expression.
18311831
item: int, slice or sequence of slices, optional
1832-
The slice(s) to be retrieved. Note that step parameter is not honored yet.
1832+
The slice(s) of the operands to be used in computation. Note that step parameter is not honored yet.
1833+
Item is used to slice the operands PRIOR to computation.
18331834
kwargs: Any, optional
18341835
Additional keyword arguments supported by the :func:`empty` constructor. In addition,
18351836
the following keyword arguments are supported:
@@ -2688,6 +2689,21 @@ def sort(self, order: str | list[str] | None = None) -> blosc2.LazyArray:
26882689
return lazy_expr
26892690

26902691
def compute(self, item=None, **kwargs) -> blosc2.NDArray:
2692+
"""
2693+
Compute the expression with the given item and kwargs.
2694+
Parameters
2695+
----------
2696+
item: int, slice or sequence of slices, optional
2697+
The slice(s) of the operands to be used in computation. Note that step parameter is not honored yet.
2698+
Item is used to slice the operands PRIOR to computation.
2699+
kwargs
2700+
2701+
Returns:
2702+
blosc2.NDArray or numpy.ndarray
2703+
-------
2704+
2705+
"""
2706+
26912707
# When NumPy ufuncs are called, the user may add an `out` parameter to kwargs
26922708
if "out" in kwargs:
26932709
kwargs["_output"] = kwargs.pop("out")
@@ -2722,6 +2738,17 @@ def compute(self, item=None, **kwargs) -> blosc2.NDArray:
27222738
return result
27232739

27242740
def __getitem__(self, item):
2741+
"""
2742+
Apply LazyExpr on a slice of the oeprands.
2743+
Parameters
2744+
----------
2745+
item: int, slice or sequence of slices, optional
2746+
The slice(s) of the operands to be used in computation. Note that step parameter is not honored yet.
2747+
Item is used to slice the operands PRIOR to computation.
2748+
Returns:
2749+
numpy.ndarray
2750+
"""
2751+
27252752
kwargs = {"_getitem": True}
27262753
return self.compute(item, **kwargs)
27272754

0 commit comments

Comments
 (0)