Skip to content

Commit 83ef8eb

Browse files
committed
Issue #752/#755 anticipate absence of band dimension
- prepare logic for absent band dimension (related to #743) - Introduce CubeMetadata.ensure_band_dimension (instead of confusing rename_labels usage) - avoid assumption about name of band dimension - more test coverage
1 parent 5d9b6d4 commit 83ef8eb

File tree

4 files changed

+252
-44
lines changed

4 files changed

+252
-44
lines changed

openeo/metadata.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,25 @@ def add_dimension(self, name: str, label: Union[str, float], type: Optional[str]
423423
dim = Dimension(type=type or "other", name=name)
424424
return self._clone_and_update(dimensions=self._dimensions + [dim])
425425

426+
def ensure_band_dimension(
427+
self, *, name: Optional[str] = None, bands: List[Union[Band, str]], warning: str
428+
) -> CubeMetadata:
429+
"""
430+
Create new CubeMetadata object, ensuring a band dimension with given bands.
431+
This will override any existing band dimension, and is intended for
432+
special cases where pragmatism necessitates to ignore the original metadata.
433+
For example, to overrule badly/incomplete detected band names from STAC metadata.
434+
"""
435+
_log.warning(warning)
436+
if name is None:
437+
# Preserve original band dimension name if possible
438+
name = self.band_dimension.name if self.has_band_dimension() else "bands"
439+
bands = [b if isinstance(b, Band) else Band(name=b) for b in bands]
440+
band_dimension = BandDimension(name=name, bands=bands)
441+
return self._clone_and_update(
442+
dimensions=[d for d in self._dimensions if not isinstance(d, BandDimension)] + [band_dimension]
443+
)
444+
426445
def drop_dimension(self, name: str = None) -> CubeMetadata:
427446
"""Create new CubeMetadata object without dropped dimension with given name"""
428447
dimension_names = self.dimension_names()
@@ -666,13 +685,13 @@ def is_band_asset(asset: pystac.Asset) -> bool:
666685
raise ValueError(stac_object)
667686

668687
# At least assume there are spatial dimensions
669-
# TODO: are there conditions in which we even should not assume the presence of spatial dimensions?
688+
# TODO #743: are there conditions in which we even should not assume the presence of spatial dimensions?
670689
dimensions = [
671690
SpatialDimension(name="x", extent=[None, None]),
672691
SpatialDimension(name="y", extent=[None, None]),
673692
]
674693

675-
# TODO: conditionally include band dimension when there was actual indication of band metadata?
694+
# TODO #743: conditionally include band dimension when there was actual indication of band metadata?
676695
band_dimension = BandDimension(name="bands", bands=bands)
677696
dimensions.append(band_dimension)
678697

openeo/rest/datacube.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -444,14 +444,23 @@ def load_stac(
444444
# TODO: also apply spatial/temporal filters to metadata?
445445

446446
if isinstance(bands, list):
447-
unknown_bands = [b for b in bands if not metadata.band_dimension.contains_band(b)]
448-
if len(unknown_bands) == 0:
449-
metadata = metadata.filter_bands(band_names=bands)
447+
if metadata.has_band_dimension():
448+
unknown_bands = [b for b in bands if not metadata.band_dimension.contains_band(b)]
449+
if len(unknown_bands) == 0:
450+
# Ideal case: bands requested by user correspond with bands extracted from metadata.
451+
metadata = metadata.filter_bands(band_names=bands)
452+
else:
453+
metadata = metadata.ensure_band_dimension(
454+
bands=bands,
455+
warning=f"The specified bands {bands} in `load_stac` are not a subset of the bands {metadata.band_dimension.band_names} found in the STAC metadata (unknown bands: {unknown_bands}). Working with specified bands as is.",
456+
)
450457
else:
451-
logging.warning(
452-
f"The specified bands {bands} are not a subset of the bands {metadata.band_dimension.band_names} found in the STAC metadata (unknown bands: {unknown_bands}). Using specified bands as is."
458+
metadata = metadata.ensure_band_dimension(
459+
name="bands",
460+
bands=bands,
461+
warning=f"Bands {bands} were specified in `load_stac`, but no band dimension was detected in the STAC metadata. Working with band dimension and specified bands.",
453462
)
454-
metadata = metadata.rename_labels(dimension="bands", target=bands)
463+
455464
except Exception as e:
456465
log.warning(f"Failed to extract cube metadata from STAC URL {url}", exc_info=True)
457466
metadata = None

tests/rest/test_connection.py

+164-36
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,13 @@
1919
from openeo import BatchJob
2020
from openeo.api.process import Parameter
2121
from openeo.internal.graph_building import FlatGraphableMixin, PGNode
22-
from openeo.metadata import _PYSTAC_1_9_EXTENSION_INTERFACE, TemporalDimension
22+
from openeo.metadata import (
23+
_PYSTAC_1_9_EXTENSION_INTERFACE,
24+
Band,
25+
BandDimension,
26+
CubeMetadata,
27+
TemporalDimension,
28+
)
2329
from openeo.rest import (
2430
CapabilitiesException,
2531
OpenEoApiError,
@@ -2737,6 +2743,21 @@ def test_load_result_filters(requests_mock):
27372743

27382744

27392745
class TestLoadStac:
2746+
@pytest.fixture
2747+
def build_stac_ref(self, tmp_path) -> typing.Callable[[dict], str]:
2748+
"""
2749+
Helper to dump (STAC) data to a temp file and return the path.
2750+
"""
2751+
# TODO #738 instead of working with local files: real request mocking of STAC resources compatible with pystac?
2752+
2753+
def dump(data) -> str:
2754+
stac_path = tmp_path / "stac.json"
2755+
with stac_path.open("w", encoding="utf8") as f:
2756+
json.dump(data, fp=f)
2757+
return str(stac_path)
2758+
2759+
return dump
2760+
27402761
def test_basic(self, con120):
27412762
cube = con120.load_stac("https://provider.test/dataset")
27422763
assert cube.flat_graph() == {
@@ -2890,20 +2911,19 @@ def test_load_stac_from_job_empty_result(self, con120, requests_mock):
28902911
}
28912912

28922913
@pytest.mark.parametrize("temporal_dim", ["t", "datezz"])
2893-
def test_load_stac_reduce_temporal(self, con120, tmp_path, temporal_dim):
2894-
stac_path = tmp_path / "stac.json"
2895-
stac_data = StacDummyBuilder.collection(
2896-
cube_dimensions={temporal_dim: {"type": "temporal", "extent": ["2024-01-01", "2024-04-04"]}}
2914+
def test_load_stac_reduce_temporal(self, con120, build_stac_ref, temporal_dim):
2915+
stac_ref = build_stac_ref(
2916+
StacDummyBuilder.collection(
2917+
cube_dimensions={temporal_dim: {"type": "temporal", "extent": ["2024-01-01", "2024-04-04"]}}
2918+
)
28972919
)
2898-
# TODO #738 real request mocking of STAC resources compatible with pystac?
2899-
stac_path.write_text(json.dumps(stac_data))
29002920

2901-
cube = con120.load_stac(str(stac_path))
2921+
cube = con120.load_stac(stac_ref)
29022922
reduced = cube.reduce_temporal("max")
29032923
assert reduced.flat_graph() == {
29042924
"loadstac1": {
29052925
"process_id": "load_stac",
2906-
"arguments": {"url": str(stac_path)},
2926+
"arguments": {"url": stac_ref},
29072927
},
29082928
"reducedimension1": {
29092929
"process_id": "reduce_dimension",
@@ -2957,66 +2977,174 @@ def test_load_stac_no_cube_extension_temporal_dimension(self, con120, tmp_path,
29572977
cube = con120.load_stac(str(stac_path))
29582978
assert cube.metadata.temporal_dimension == TemporalDimension(name="t", extent=dim_extent)
29592979

2980+
def test_load_stac_default_band_handling(self, dummy_backend, build_stac_ref):
2981+
stac_ref = build_stac_ref(
2982+
StacDummyBuilder.collection(
2983+
# TODO #586 also cover STAC 1.1 style "bands"
2984+
summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}, {"name": "B03"}]}
2985+
)
2986+
)
2987+
2988+
cube = dummy_backend.connection.load_stac(stac_ref)
2989+
assert cube.metadata.band_names == ["B01", "B02", "B03"]
2990+
2991+
cube.execute()
2992+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
2993+
"url": stac_ref,
2994+
}
2995+
29602996
@pytest.mark.parametrize(
29612997
"bands, expected_warning",
29622998
[
29632999
(
29643000
["B04"],
2965-
"The specified bands ['B04'] are not a subset of the bands ['B01', 'B02', 'B03'] found in the STAC metadata (unknown bands: ['B04']). Using specified bands as is.",
3001+
"The specified bands ['B04'] in `load_stac` are not a subset of the bands ['B01', 'B02', 'B03'] found in the STAC metadata (unknown bands: ['B04']). Working with specified bands as is.",
29663002
),
29673003
(
29683004
["B03", "B04", "B05"],
2969-
"The specified bands ['B03', 'B04', 'B05'] are not a subset of the bands ['B01', 'B02', 'B03'] found in the STAC metadata (unknown bands: ['B04', 'B05']). Using specified bands as is.",
3005+
"The specified bands ['B03', 'B04', 'B05'] in `load_stac` are not a subset of the bands ['B01', 'B02', 'B03'] found in the STAC metadata (unknown bands: ['B04', 'B05']). Working with specified bands as is.",
29703006
),
29713007
(["B03", "B02"], None),
29723008
(["B01", "B02", "B03"], None),
29733009
],
29743010
)
2975-
def test_load_stac_band_filtering(self, con120, tmp_path, caplog, bands, expected_warning):
2976-
stac_path = tmp_path / "stac.json"
2977-
stac_data = StacDummyBuilder.collection(
2978-
summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}, {"name": "B03"}]}
3011+
def test_load_stac_band_filtering(self, dummy_backend, build_stac_ref, caplog, bands, expected_warning):
3012+
stac_ref = build_stac_ref(
3013+
StacDummyBuilder.collection(
3014+
# TODO #586 also cover STAC 1.1 style "bands"
3015+
summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}, {"name": "B03"}]}
3016+
)
29793017
)
2980-
# TODO #738 real request mocking of STAC resources compatible with pystac?
2981-
stac_path.write_text(json.dumps(stac_data))
29823018

29833019
caplog.set_level(logging.WARNING)
29843020
# Test with non-existing bands in the collection metadata
2985-
cube = con120.load_stac(str(stac_path), bands=bands)
3021+
cube = dummy_backend.connection.load_stac(stac_ref, bands=bands)
29863022
assert cube.metadata.band_names == bands
29873023
if expected_warning is None:
29883024
assert caplog.text == ""
29893025
else:
29903026
assert expected_warning in caplog.text
29913027

2992-
def test_load_stac_band_filtering_no_requested_bands(self, con120, tmp_path):
2993-
stac_path = tmp_path / "stac.json"
2994-
stac_data = StacDummyBuilder.collection(
2995-
summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}, {"name": "B03"}]}
2996-
)
2997-
# TODO #738 real request mocking of STAC resources compatible with pystac?
2998-
stac_path.write_text(json.dumps(stac_data))
2999-
3000-
cube = con120.load_stac(str(stac_path))
3001-
assert cube.metadata.band_names == ["B01", "B02", "B03"]
3028+
cube.execute()
3029+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
3030+
"url": stac_ref,
3031+
"bands": bands,
3032+
}
30023033

3003-
def test_load_stac_band_filtering_no_metadata(self, con120, tmp_path, caplog):
3004-
stac_path = tmp_path / "stac.json"
3005-
stac_data = StacDummyBuilder.collection()
3006-
# TODO #738 real request mocking of STAC resources compatible with pystac?
3007-
stac_path.write_text(json.dumps(stac_data))
3034+
def test_load_stac_band_filtering_no_band_metadata_default(self, dummy_backend, build_stac_ref, caplog):
3035+
stac_ref = build_stac_ref(StacDummyBuilder.collection())
30083036

3009-
cube = con120.load_stac(str(stac_path))
3037+
cube = dummy_backend.connection.load_stac(stac_ref)
3038+
# TODO #743: what should the default list of bands be?
30103039
assert cube.metadata.band_names == []
30113040

3041+
cube.execute()
3042+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
3043+
"url": stac_ref,
3044+
}
3045+
3046+
@pytest.mark.parametrize(
3047+
["bands", "has_band_dimension", "expected_pg_args", "expected_warning"],
3048+
[
3049+
(None, False, {}, None),
3050+
(
3051+
["B02", "B03"],
3052+
True,
3053+
{"bands": ["B02", "B03"]},
3054+
"Bands ['B02', 'B03'] were specified in `load_stac`, but no band dimension was detected in the STAC metadata. Working with band dimension and specified bands.",
3055+
),
3056+
],
3057+
)
3058+
def test_load_stac_band_filtering_no_band_dimension(
3059+
self, dummy_backend, build_stac_ref, bands, has_band_dimension, expected_pg_args, expected_warning, caplog
3060+
):
3061+
stac_ref = build_stac_ref(StacDummyBuilder.collection())
3062+
3063+
# This is a temporary mock.patch hack to make metadata_from_stac return metadata without a band dimension
3064+
# TODO #743: Do this properly through appropriate STAC metadata
3065+
from openeo.metadata import metadata_from_stac as orig_metadata_from_stac
3066+
3067+
def metadata_from_stac(url: str):
3068+
metadata = orig_metadata_from_stac(url=url)
3069+
assert metadata.has_band_dimension()
3070+
metadata = metadata.drop_dimension("bands")
3071+
assert not metadata.has_band_dimension()
3072+
return metadata
3073+
3074+
with mock.patch("openeo.rest.datacube.metadata_from_stac", new=metadata_from_stac):
3075+
cube = dummy_backend.connection.load_stac(stac_ref, bands=bands)
3076+
3077+
assert cube.metadata.has_band_dimension() == has_band_dimension
3078+
3079+
cube.execute()
3080+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
3081+
**expected_pg_args,
3082+
"url": stac_ref,
3083+
}
3084+
3085+
if expected_warning:
3086+
assert expected_warning in caplog.text
3087+
else:
3088+
assert not caplog.text
3089+
3090+
def test_load_stac_band_filtering_no_band_metadata(self, dummy_backend, build_stac_ref, caplog):
30123091
caplog.set_level(logging.WARNING)
3013-
cube = con120.load_stac(str(stac_path), bands=["B01", "B02"])
3092+
stac_ref = build_stac_ref(StacDummyBuilder.collection())
3093+
3094+
cube = dummy_backend.connection.load_stac(stac_ref, bands=["B01", "B02"])
30143095
assert cube.metadata.band_names == ["B01", "B02"]
30153096
assert (
3016-
"The specified bands ['B01', 'B02'] are not a subset of the bands [] found in the STAC metadata (unknown bands: ['B01', 'B02']). Using specified bands as is."
3097+
# TODO: better warning than confusing "not a subset of the bands []" ?
3098+
"The specified bands ['B01', 'B02'] in `load_stac` are not a subset of the bands [] found in the STAC metadata (unknown bands: ['B01', 'B02']). Working with specified bands as is."
30173099
in caplog.text
30183100
)
30193101

3102+
cube.execute()
3103+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
3104+
"url": stac_ref,
3105+
"bands": ["B01", "B02"],
3106+
}
3107+
3108+
@pytest.mark.parametrize(
3109+
["bands", "expected_pg_args", "expected_warning"],
3110+
[
3111+
(None, {}, None),
3112+
(
3113+
["B02", "B03"],
3114+
{"bands": ["B02", "B03"]},
3115+
"The specified bands ['B02', 'B03'] in `load_stac` are not a subset of the bands ['Bz1', 'Bz2'] found in the STAC metadata (unknown bands: ['B02', 'B03']). Working with specified bands as is",
3116+
),
3117+
],
3118+
)
3119+
def test_load_stac_band_filtering_custom_band_dimension(
3120+
self, dummy_backend, build_stac_ref, bands, expected_pg_args, expected_warning, caplog
3121+
):
3122+
stac_ref = build_stac_ref(StacDummyBuilder.collection())
3123+
3124+
# This is a temporary mock.patch hack to make metadata_from_stac return metadata with a custom band dimension
3125+
# TODO #743: Do this properly through appropriate STAC metadata
3126+
from openeo.metadata import metadata_from_stac as orig_metadata_from_stac
3127+
3128+
def metadata_from_stac(url: str):
3129+
return CubeMetadata(dimensions=[BandDimension(name="bandzz", bands=[Band("Bz1"), Band("Bz2")])])
3130+
3131+
with mock.patch("openeo.rest.datacube.metadata_from_stac", new=metadata_from_stac):
3132+
cube = dummy_backend.connection.load_stac(stac_ref, bands=bands)
3133+
3134+
assert cube.metadata.has_band_dimension()
3135+
assert cube.metadata.band_dimension.name == "bandzz"
3136+
3137+
cube.execute()
3138+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
3139+
**expected_pg_args,
3140+
"url": stac_ref,
3141+
}
3142+
3143+
if expected_warning:
3144+
assert expected_warning in caplog.text
3145+
else:
3146+
assert not caplog.text
3147+
30203148

30213149
@pytest.mark.parametrize(
30223150
"bands",

tests/test_metadata.py

+52
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,58 @@ def test_cubemetadata_drop_dimension():
779779
metadata.drop_dimension("x")
780780

781781

782+
def test_cubemetadata_ensure_band_dimension_add_bands():
783+
metadata = CubeMetadata(
784+
dimensions=[
785+
TemporalDimension(name="t", extent=None),
786+
]
787+
)
788+
new = metadata.ensure_band_dimension(bands=["red", "green"], warning="ensure_band_dimension at work")
789+
assert new.has_band_dimension()
790+
assert new.dimension_names() == ["t", "bands"]
791+
assert new.band_names == ["red", "green"]
792+
793+
794+
def test_cubemetadata_ensure_band_dimension_add_name_and_bands():
795+
metadata = CubeMetadata(
796+
dimensions=[
797+
TemporalDimension(name="t", extent=None),
798+
]
799+
)
800+
new = metadata.ensure_band_dimension(name="bandzz", bands=["red", "green"], warning="ensure_band_dimension at work")
801+
assert new.has_band_dimension()
802+
assert new.dimension_names() == ["t", "bandzz"]
803+
assert new.band_names == ["red", "green"]
804+
805+
806+
def test_cubemetadata_ensure_band_dimension_override_bands():
807+
metadata = CubeMetadata(
808+
dimensions=[
809+
TemporalDimension(name="t", extent=None),
810+
BandDimension(name="bands", bands=[Band("red"), Band("green")]),
811+
]
812+
)
813+
new = metadata.ensure_band_dimension(bands=["tomato", "lettuce"], warning="ensure_band_dimension at work")
814+
assert new.has_band_dimension()
815+
assert new.dimension_names() == ["t", "bands"]
816+
assert new.band_names == ["tomato", "lettuce"]
817+
818+
819+
def test_cubemetadata_ensure_band_dimension_override_name_and_bands():
820+
metadata = CubeMetadata(
821+
dimensions=[
822+
TemporalDimension(name="t", extent=None),
823+
BandDimension(name="bands", bands=[Band("red"), Band("green")]),
824+
]
825+
)
826+
new = metadata.ensure_band_dimension(
827+
name="bandzz", bands=["tomato", "lettuce"], warning="ensure_band_dimension at work"
828+
)
829+
assert new.has_band_dimension()
830+
assert new.dimension_names() == ["t", "bandzz"]
831+
assert new.band_names == ["tomato", "lettuce"]
832+
833+
782834
def test_collectionmetadata_subclass():
783835
class MyCollectionMetadata(CollectionMetadata):
784836
def __init__(self, metadata: dict, dimensions: List[Dimension] = None, bbox=None):

0 commit comments

Comments
 (0)