Skip to content

Commit eb3bdb1

Browse files
draft: add cumsum,cumproduct,cummin,cummax (#293)
* add cumsum,cumproduct,cummin,cummax * reformat * reformat * rename * adding new cumulative specs and cumprod,cumsum tests * updating cummax and cummin testing * fix def cummin * update cum-process tesing with nan values
1 parent 888abc5 commit eb3bdb1

File tree

3 files changed

+302
-0
lines changed

3 files changed

+302
-0
lines changed

openeo_processes_dask/process_implementations/math.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
"add",
2727
"_sum",
2828
"_min",
29+
"cumsum",
30+
"cumproduct",
31+
"cummin",
32+
"cummax",
2933
"_max",
3034
"median",
3135
"mean",
@@ -117,6 +121,58 @@ def _min(data, ignore_nodata=True, axis=None, keepdims=False):
117121
return np.min(data, axis=axis, keepdims=keepdims)
118122

119123

124+
def cumsum(data, ignore_nodata=True, axis=None):
125+
nan_mask = np.isnan(data)
126+
127+
if ignore_nodata:
128+
result = np.nancumsum(data, axis=axis)
129+
else:
130+
result = np.cumsum(data, axis=axis)
131+
132+
result[nan_mask] = np.nan
133+
return result
134+
135+
136+
def cumproduct(data, ignore_nodata=True, axis=None):
137+
nan_mask = np.isnan(data)
138+
139+
if ignore_nodata:
140+
result = np.nancumprod(data, axis=axis)
141+
else:
142+
result = np.cumprod(data, axis=axis)
143+
144+
result[nan_mask] = np.nan
145+
return result
146+
147+
148+
def cummin(data, ignore_nodata=True, axis=None):
149+
data = np.array(data)
150+
nan_mask = np.isnan(data)
151+
152+
if ignore_nodata:
153+
data_filled = np.where(nan_mask, np.inf, data)
154+
result = np.minimum.accumulate(data_filled, axis=axis)
155+
else:
156+
result = np.minimum.accumulate(data, axis=axis)
157+
158+
result[nan_mask] = np.nan
159+
return result
160+
161+
162+
def cummax(data, ignore_nodata=True, axis=None):
163+
data = np.array(data)
164+
nan_mask = np.isnan(data)
165+
166+
if ignore_nodata:
167+
data_filled = np.where(nan_mask, -np.inf, data)
168+
result = np.maximum.accumulate(data_filled, axis=axis)
169+
else:
170+
result = np.maximum.accumulate(data, axis=axis)
171+
172+
result[nan_mask] = np.nan
173+
return result
174+
175+
120176
def _max(data, ignore_nodata=True, axis=None, keepdims=False):
121177
if ignore_nodata:
122178
return np.nanmax(data, axis=axis, keepdims=keepdims)

tests/test_apply.py

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,3 +347,193 @@ def test_apply_kernel(temporal_interval, bounding_box, random_raster_data):
347347
)
348348

349349
xr.testing.assert_equal(output_cube, input_cube)
350+
351+
352+
@pytest.mark.parametrize("size", [(6, 5, 30, 4)])
353+
@pytest.mark.parametrize("dtype", [np.float32])
354+
def test_apply_dimension_cumsum_process(
355+
temporal_interval, bounding_box, random_raster_data, process_registry
356+
):
357+
input_cube = create_fake_rastercube(
358+
data=random_raster_data,
359+
spatial_extent=bounding_box,
360+
temporal_extent=temporal_interval,
361+
bands=["B02", "B03", "B04", "B08"],
362+
backend="dask",
363+
)
364+
365+
_process_cumsum = partial(
366+
process_registry["cumsum"].implementation,
367+
data=ParameterReference(from_parameter="data"),
368+
)
369+
370+
output_cube_cumsum = apply_dimension(
371+
data=input_cube,
372+
process=_process_cumsum,
373+
dimension="t",
374+
).compute()
375+
376+
original_abs_sum = np.sum(np.abs(input_cube.data))
377+
378+
cumsum_total = np.sum(np.abs(output_cube_cumsum.data))
379+
380+
assert cumsum_total >= original_abs_sum
381+
382+
input_cube.data[:, :, 15, :] = np.nan
383+
384+
_process_cumsum_with_nan = partial(
385+
process_registry["cumsum"].implementation,
386+
data=ParameterReference(from_parameter="data"),
387+
ignore_nodata=False,
388+
)
389+
390+
output_cube_cumsum_with_nan = apply_dimension(
391+
data=input_cube,
392+
process=_process_cumsum_with_nan,
393+
dimension="t",
394+
).compute()
395+
396+
assert np.isnan(output_cube_cumsum_with_nan[0, 0, 20, 0].values)
397+
398+
399+
@pytest.mark.parametrize("size", [(6, 5, 30, 4)])
400+
@pytest.mark.parametrize("dtype", [np.float32])
401+
def test_apply_dimension_cumproduct_process(
402+
temporal_interval, bounding_box, random_raster_data, process_registry
403+
):
404+
input_cube = create_fake_rastercube(
405+
data=random_raster_data,
406+
spatial_extent=bounding_box,
407+
temporal_extent=temporal_interval,
408+
bands=["B02", "B03", "B04", "B08"],
409+
backend="dask",
410+
)
411+
412+
_process_cumsum = partial(
413+
process_registry["cumproduct"].implementation,
414+
data=ParameterReference(from_parameter="data"),
415+
)
416+
417+
output_cube_cumprod = apply_dimension(
418+
data=input_cube,
419+
process=_process_cumsum,
420+
dimension="t",
421+
).compute()
422+
423+
original_data = np.abs(input_cube.data)
424+
original_data[np.isnan(original_data)] = 0
425+
original_abs_prod = np.sum(original_data)
426+
427+
cumprod_data = np.abs(output_cube_cumprod.data)
428+
cumprod_data[np.isnan(cumprod_data)] = 0
429+
cumprod_total = np.sum(cumprod_data)
430+
431+
assert cumprod_total >= original_abs_prod
432+
433+
input_cube.data[:, :, 15, :] = np.nan
434+
435+
_process_cumprod_with_nan = partial(
436+
process_registry["cumproduct"].implementation,
437+
data=ParameterReference(from_parameter="data"),
438+
ignore_nodata=False,
439+
)
440+
441+
output_cube_cumprod_with_nan = apply_dimension(
442+
data=input_cube,
443+
process=_process_cumprod_with_nan,
444+
dimension="t",
445+
).compute()
446+
447+
assert np.isnan(output_cube_cumprod_with_nan[0, 0, 20, 0].values)
448+
449+
450+
@pytest.mark.parametrize("size", [(6, 5, 30, 4)])
451+
@pytest.mark.parametrize("dtype", [np.float32])
452+
def test_apply_dimension_cummax_process(
453+
temporal_interval, bounding_box, random_raster_data, process_registry
454+
):
455+
input_cube = create_fake_rastercube(
456+
data=random_raster_data,
457+
spatial_extent=bounding_box,
458+
temporal_extent=temporal_interval,
459+
bands=["B02", "B03", "B04", "B08"],
460+
backend="dask",
461+
)
462+
463+
_process_cummax = partial(
464+
process_registry["cummax"].implementation,
465+
data=ParameterReference(from_parameter="data"),
466+
)
467+
468+
output_cube_cummax = apply_dimension(
469+
data=input_cube,
470+
process=_process_cummax,
471+
dimension="t",
472+
).compute()
473+
474+
original_abs_max = np.max(input_cube.data, axis=0)
475+
cummax_total = np.max(output_cube_cummax.data, axis=0)
476+
477+
assert np.all(cummax_total >= original_abs_max)
478+
479+
input_cube.data[:, :, 15, :] = np.nan
480+
481+
_process_cummax_with_nan = partial(
482+
process_registry["cummax"].implementation,
483+
data=ParameterReference(from_parameter="data"),
484+
ignore_nodata=False,
485+
)
486+
487+
output_cube_cummax_with_nan = apply_dimension(
488+
data=input_cube,
489+
process=_process_cummax_with_nan,
490+
dimension="t",
491+
).compute()
492+
493+
assert np.isnan(output_cube_cummax_with_nan[0, 0, 16, 0].values)
494+
495+
496+
@pytest.mark.parametrize("size", [(6, 5, 30, 4)])
497+
@pytest.mark.parametrize("dtype", [np.float32])
498+
def test_apply_dimension_cummin_process(
499+
temporal_interval, bounding_box, random_raster_data, process_registry
500+
):
501+
input_cube = create_fake_rastercube(
502+
data=random_raster_data,
503+
spatial_extent=bounding_box,
504+
temporal_extent=temporal_interval,
505+
bands=["B02", "B03", "B04", "B08"],
506+
backend="dask",
507+
)
508+
509+
_process_cummin = partial(
510+
process_registry["cummin"].implementation,
511+
data=ParameterReference(from_parameter="data"),
512+
)
513+
514+
output_cube_cummin = apply_dimension(
515+
data=input_cube,
516+
process=_process_cummin,
517+
dimension="t",
518+
).compute()
519+
520+
original_abs_min = np.min(input_cube.data, axis=0)
521+
cummin_total = np.min(output_cube_cummin.data, axis=0)
522+
523+
assert np.all(cummin_total <= original_abs_min)
524+
525+
input_cube.data[:, :, 15, :] = np.nan
526+
527+
_process_cummin_with_nan = partial(
528+
process_registry["cummin"].implementation,
529+
data=ParameterReference(from_parameter="data"),
530+
ignore_nodata=False,
531+
)
532+
533+
output_cube_cummin_with_nan = apply_dimension(
534+
data=input_cube,
535+
process=_process_cummin_with_nan,
536+
dimension="t",
537+
).compute()
538+
539+
assert np.isnan(output_cube_cummin_with_nan[0, 0, 16, 0].values)

tests/test_math.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,59 @@ def test_extrema():
118118
dask_array = da.from_array(np.array(array_list))
119119
result = extrema(dask_array, ignore_nodata=True, axis=0, keepdims=False)
120120
assert np.array_equal(result_np, result.compute())
121+
122+
123+
def test_cumproduct():
124+
array_list = [1, 2, 3, np.nan, 3, 1]
125+
result_np = [1, 2, 6, np.nan, 18, 18]
126+
127+
result = cumproduct(array_list)
128+
assert np.array_equal(result_np, result, equal_nan=True)
129+
130+
array_list = [1, 2, 3, np.nan, 3, 1]
131+
result_np = [1, 2, 6, np.nan, np.nan, np.nan]
132+
133+
result = cumproduct(array_list, ignore_nodata=False)
134+
assert np.array_equal(result_np, result, equal_nan=True)
135+
136+
137+
def test_cumsum():
138+
array_list = [1, 3, np.nan, 3, 1]
139+
result_np = [1, 4, np.nan, 7, 8]
140+
141+
result = cumsum(array_list)
142+
assert np.array_equal(result_np, result, equal_nan=True)
143+
144+
array_list = [1, 3, np.nan, 3, 1]
145+
result_np = [1, 4, np.nan, np.nan, np.nan]
146+
147+
result = cumsum(array_list, ignore_nodata=False)
148+
assert np.array_equal(result_np, result, equal_nan=True)
149+
150+
151+
def test_cummin():
152+
array_list = [5, 3, np.nan, 1, 5]
153+
result_np = [5, 3, np.nan, 1, 1]
154+
155+
result = cummin(array_list)
156+
assert np.array_equal(result_np, result, equal_nan=True)
157+
158+
array_list = [1, 3, np.nan, 3, 1]
159+
result_np = [1, 1, np.nan, np.nan, np.nan]
160+
161+
result = cummin(array_list, ignore_nodata=False)
162+
assert np.array_equal(result_np, result, equal_nan=True)
163+
164+
165+
def test_cummax():
166+
array_list = [1, 3, np.nan, 5, 1]
167+
result_np = [1, 3, np.nan, 5, 5]
168+
169+
result = cummax(array_list)
170+
assert np.array_equal(result_np, result, equal_nan=True)
171+
172+
array_list = [1, 3, np.nan, 3, 1]
173+
result_np = [1, 3, np.nan, np.nan, np.nan]
174+
175+
result = cummax(array_list, ignore_nodata=False)
176+
assert np.array_equal(result_np, result, equal_nan=True)

0 commit comments

Comments
 (0)