Skip to content

Commit 5e77f00

Browse files
committed
Merge branch 'issue752-load-stac-make-bands-metadata-flexible'
2 parents 4a3dff6 + 4eaa15a commit 5e77f00

File tree

5 files changed

+328
-23
lines changed

5 files changed

+328
-23
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1515

1616
### Changed
1717

18+
- When the bands provided to `Connection.load_stac(..., bands=[...])` do not fully match the bands the client extracted from the STAC metadata, a warning will be triggered, but the provided band names will still be used during the client-side preparation of the process graph. This is a pragmatic approach to bridge the gap between differing interpretations of band detection in STAC. Note that this might produce process graphs that are technically invalid and might not work on other backends or future versions of the backend you currently use. It is recommended to consult with the provider of the STAC metadata and openEO backend on the correct and future-proof band names. ([#752](https://github.com/Open-EO/openeo-python-client/issues/752))
19+
1820
### Removed
1921

2022
### Fixed

openeo/metadata.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,16 @@ def rename_labels(self, target, source) -> Dimension:
218218
def rename(self, name) -> Dimension:
219219
return BandDimension(name=name, bands=self.bands)
220220

221+
def contains_band(self, band: Union[int, str]) -> bool:
222+
"""
223+
Check if the given band name or index is present in the dimension.
224+
"""
225+
try:
226+
self.band_index(band)
227+
return True
228+
except ValueError:
229+
return False
230+
221231

222232
class GeometryDimension(Dimension):
223233
# TODO: how to model/store labels of geometry dimension?
@@ -411,6 +421,31 @@ def add_dimension(self, name: str, label: Union[str, float], type: Optional[str]
411421
dim = Dimension(type=type or "other", name=name)
412422
return self._clone_and_update(dimensions=self._dimensions + [dim])
413423

424+
def _ensure_band_dimension(
425+
self, *, name: Optional[str] = None, bands: List[Union[Band, str]], warning: str
426+
) -> CubeMetadata:
427+
"""
428+
Create new CubeMetadata object, ensuring a band dimension with given bands.
429+
This will override any existing band dimension, and is intended for
430+
special cases where pragmatism necessitates to ignore the original metadata.
431+
For example, to overrule badly/incomplete detected band names from STAC metadata.
432+
433+
.. note::
434+
It is required to specify a warning message as this method is only intended
435+
to be used as temporary stop-gap solution for use cases that are possibly not future-proof.
436+
Enforcing a warning should make that clear and avoid that users unknowingly depend on
437+
metadata handling behavior that is not guaranteed to be stable.
438+
"""
439+
_log.warning(warning or "ensure_band_dimension: overriding band dimension metadata with user-defined bands.")
440+
if name is None:
441+
# Preserve original band dimension name if possible
442+
name = self.band_dimension.name if self.has_band_dimension() else "bands"
443+
bands = [b if isinstance(b, Band) else Band(name=b) for b in bands]
444+
band_dimension = BandDimension(name=name, bands=bands)
445+
return self._clone_and_update(
446+
dimensions=[d for d in self._dimensions if not isinstance(d, BandDimension)] + [band_dimension]
447+
)
448+
414449
def drop_dimension(self, name: str = None) -> CubeMetadata:
415450
"""Create new CubeMetadata object without dropped dimension with given name"""
416451
dimension_names = self.dimension_names()
@@ -654,13 +689,13 @@ def is_band_asset(asset: pystac.Asset) -> bool:
654689
raise ValueError(stac_object)
655690

656691
# At least assume there are spatial dimensions
657-
# TODO: are there conditions in which we even should not assume the presence of spatial dimensions?
692+
# TODO #743: are there conditions in which we even should not assume the presence of spatial dimensions?
658693
dimensions = [
659694
SpatialDimension(name="x", extent=[None, None]),
660695
SpatialDimension(name="y", extent=[None, None]),
661696
]
662697

663-
# TODO: conditionally include band dimension when there was actual indication of band metadata?
698+
# TODO #743: conditionally include band dimension when there was actual indication of band metadata?
664699
band_dimension = BandDimension(name="bands", bands=bands)
665700
dimensions.append(band_dimension)
666701

openeo/rest/datacube.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -441,10 +441,27 @@ def load_stac(
441441
graph = PGNode("load_stac", arguments=arguments)
442442
try:
443443
metadata = metadata_from_stac(url)
444+
# TODO: also apply spatial/temporal filters to metadata?
445+
444446
if isinstance(bands, list):
445-
# TODO: also apply spatial/temporal filters to metadata?
446-
metadata = metadata.filter_bands(band_names=bands)
447-
except Exception:
447+
if metadata.has_band_dimension():
448+
unknown_bands = [b for b in bands if not metadata.band_dimension.contains_band(b)]
449+
if len(unknown_bands) == 0:
450+
# Ideal case: bands requested by user correspond with bands extracted from metadata.
451+
metadata = metadata.filter_bands(band_names=bands)
452+
else:
453+
metadata = metadata._ensure_band_dimension(
454+
bands=bands,
455+
warning=f"The specified bands {bands} in `load_stac` are not a subset of the bands {metadata.band_dimension.band_names} found in the STAC metadata (unknown bands: {unknown_bands}). Working with specified bands as is.",
456+
)
457+
else:
458+
metadata = metadata._ensure_band_dimension(
459+
name="bands",
460+
bands=bands,
461+
warning=f"Bands {bands} were specified in `load_stac`, but no band dimension was detected in the STAC metadata. Working with band dimension and specified bands.",
462+
)
463+
464+
except Exception as e:
448465
log.warning(f"Failed to extract cube metadata from STAC URL {url}", exc_info=True)
449466
metadata = None
450467
return cls(graph=graph, connection=connection, metadata=metadata)

tests/rest/test_connection.py

Lines changed: 193 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,13 @@
1919
from openeo import BatchJob
2020
from openeo.api.process import Parameter
2121
from openeo.internal.graph_building import FlatGraphableMixin, PGNode
22-
from openeo.metadata import _PYSTAC_1_9_EXTENSION_INTERFACE, TemporalDimension
22+
from openeo.metadata import (
23+
_PYSTAC_1_9_EXTENSION_INTERFACE,
24+
Band,
25+
BandDimension,
26+
CubeMetadata,
27+
TemporalDimension,
28+
)
2329
from openeo.rest import (
2430
CapabilitiesException,
2531
OpenEoApiError,
@@ -2737,6 +2743,21 @@ def test_load_result_filters(requests_mock):
27372743

27382744

27392745
class TestLoadStac:
2746+
@pytest.fixture
2747+
def build_stac_ref(self, tmp_path) -> typing.Callable[[dict], str]:
2748+
"""
2749+
Helper to dump (STAC) data to a temp file and return the path.
2750+
"""
2751+
# TODO #738 instead of working with local files: real request mocking of STAC resources compatible with pystac?
2752+
2753+
def dump(data) -> str:
2754+
stac_path = tmp_path / "stac.json"
2755+
with stac_path.open("w", encoding="utf8") as f:
2756+
json.dump(data, fp=f)
2757+
return str(stac_path)
2758+
2759+
return dump
2760+
27402761
def test_basic(self, con120):
27412762
cube = con120.load_stac("https://provider.test/dataset")
27422763
assert cube.flat_graph() == {
@@ -2890,20 +2911,19 @@ def test_load_stac_from_job_empty_result(self, con120, requests_mock):
28902911
}
28912912

28922913
@pytest.mark.parametrize("temporal_dim", ["t", "datezz"])
2893-
def test_load_stac_reduce_temporal(self, con120, tmp_path, temporal_dim):
2894-
stac_path = tmp_path / "stac.json"
2895-
stac_data = StacDummyBuilder.collection(
2896-
cube_dimensions={temporal_dim: {"type": "temporal", "extent": ["2024-01-01", "2024-04-04"]}}
2914+
def test_load_stac_reduce_temporal(self, con120, build_stac_ref, temporal_dim):
2915+
stac_ref = build_stac_ref(
2916+
StacDummyBuilder.collection(
2917+
cube_dimensions={temporal_dim: {"type": "temporal", "extent": ["2024-01-01", "2024-04-04"]}}
2918+
)
28972919
)
2898-
# TODO #738 real request mocking of STAC resources compatible with pystac?
2899-
stac_path.write_text(json.dumps(stac_data))
29002920

2901-
cube = con120.load_stac(str(stac_path))
2921+
cube = con120.load_stac(stac_ref)
29022922
reduced = cube.reduce_temporal("max")
29032923
assert reduced.flat_graph() == {
29042924
"loadstac1": {
29052925
"process_id": "load_stac",
2906-
"arguments": {"url": str(stac_path)},
2926+
"arguments": {"url": stac_ref},
29072927
},
29082928
"reducedimension1": {
29092929
"process_id": "reduce_dimension",
@@ -2957,19 +2977,174 @@ def test_load_stac_no_cube_extension_temporal_dimension(self, con120, tmp_path,
29572977
cube = con120.load_stac(str(stac_path))
29582978
assert cube.metadata.temporal_dimension == TemporalDimension(name="t", extent=dim_extent)
29592979

2960-
def test_load_stac_band_filtering(self, con120, tmp_path):
2961-
stac_path = tmp_path / "stac.json"
2962-
stac_data = StacDummyBuilder.collection(
2963-
summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}, {"name": "B03"}]}
2980+
def test_load_stac_default_band_handling(self, dummy_backend, build_stac_ref):
2981+
stac_ref = build_stac_ref(
2982+
StacDummyBuilder.collection(
2983+
# TODO #586 also cover STAC 1.1 style "bands"
2984+
summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}, {"name": "B03"}]}
2985+
)
29642986
)
2965-
# TODO #738 real request mocking of STAC resources compatible with pystac?
2966-
stac_path.write_text(json.dumps(stac_data))
29672987

2968-
cube = con120.load_stac(str(stac_path))
2988+
cube = dummy_backend.connection.load_stac(stac_ref)
29692989
assert cube.metadata.band_names == ["B01", "B02", "B03"]
29702990

2971-
cube = con120.load_stac(str(stac_path), bands=["B03", "B02"])
2972-
assert cube.metadata.band_names == ["B03", "B02"]
2991+
cube.execute()
2992+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
2993+
"url": stac_ref,
2994+
}
2995+
2996+
@pytest.mark.parametrize(
2997+
"bands, expected_warning",
2998+
[
2999+
(
3000+
["B04"],
3001+
"The specified bands ['B04'] in `load_stac` are not a subset of the bands ['B01', 'B02', 'B03'] found in the STAC metadata (unknown bands: ['B04']). Working with specified bands as is.",
3002+
),
3003+
(
3004+
["B03", "B04", "B05"],
3005+
"The specified bands ['B03', 'B04', 'B05'] in `load_stac` are not a subset of the bands ['B01', 'B02', 'B03'] found in the STAC metadata (unknown bands: ['B04', 'B05']). Working with specified bands as is.",
3006+
),
3007+
(["B03", "B02"], None),
3008+
(["B01", "B02", "B03"], None),
3009+
],
3010+
)
3011+
def test_load_stac_band_filtering(self, dummy_backend, build_stac_ref, caplog, bands, expected_warning):
3012+
stac_ref = build_stac_ref(
3013+
StacDummyBuilder.collection(
3014+
# TODO #586 also cover STAC 1.1 style "bands"
3015+
summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}, {"name": "B03"}]}
3016+
)
3017+
)
3018+
3019+
caplog.set_level(logging.WARNING)
3020+
# Test with non-existing bands in the collection metadata
3021+
cube = dummy_backend.connection.load_stac(stac_ref, bands=bands)
3022+
assert cube.metadata.band_names == bands
3023+
if expected_warning is None:
3024+
assert caplog.text == ""
3025+
else:
3026+
assert expected_warning in caplog.text
3027+
3028+
cube.execute()
3029+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
3030+
"url": stac_ref,
3031+
"bands": bands,
3032+
}
3033+
3034+
def test_load_stac_band_filtering_no_band_metadata_default(self, dummy_backend, build_stac_ref, caplog):
3035+
stac_ref = build_stac_ref(StacDummyBuilder.collection())
3036+
3037+
cube = dummy_backend.connection.load_stac(stac_ref)
3038+
# TODO #743: what should the default list of bands be?
3039+
assert cube.metadata.band_names == []
3040+
3041+
cube.execute()
3042+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
3043+
"url": stac_ref,
3044+
}
3045+
3046+
@pytest.mark.parametrize(
3047+
["bands", "has_band_dimension", "expected_pg_args", "expected_warning"],
3048+
[
3049+
(None, False, {}, None),
3050+
(
3051+
["B02", "B03"],
3052+
True,
3053+
{"bands": ["B02", "B03"]},
3054+
"Bands ['B02', 'B03'] were specified in `load_stac`, but no band dimension was detected in the STAC metadata. Working with band dimension and specified bands.",
3055+
),
3056+
],
3057+
)
3058+
def test_load_stac_band_filtering_no_band_dimension(
3059+
self, dummy_backend, build_stac_ref, bands, has_band_dimension, expected_pg_args, expected_warning, caplog
3060+
):
3061+
stac_ref = build_stac_ref(StacDummyBuilder.collection())
3062+
3063+
# This is a temporary mock.patch hack to make metadata_from_stac return metadata without a band dimension
3064+
# TODO #743: Do this properly through appropriate STAC metadata
3065+
from openeo.metadata import metadata_from_stac as orig_metadata_from_stac
3066+
3067+
def metadata_from_stac(url: str):
3068+
metadata = orig_metadata_from_stac(url=url)
3069+
assert metadata.has_band_dimension()
3070+
metadata = metadata.drop_dimension("bands")
3071+
assert not metadata.has_band_dimension()
3072+
return metadata
3073+
3074+
with mock.patch("openeo.rest.datacube.metadata_from_stac", new=metadata_from_stac):
3075+
cube = dummy_backend.connection.load_stac(stac_ref, bands=bands)
3076+
3077+
assert cube.metadata.has_band_dimension() == has_band_dimension
3078+
3079+
cube.execute()
3080+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
3081+
**expected_pg_args,
3082+
"url": stac_ref,
3083+
}
3084+
3085+
if expected_warning:
3086+
assert expected_warning in caplog.text
3087+
else:
3088+
assert not caplog.text
3089+
3090+
def test_load_stac_band_filtering_no_band_metadata(self, dummy_backend, build_stac_ref, caplog):
3091+
caplog.set_level(logging.WARNING)
3092+
stac_ref = build_stac_ref(StacDummyBuilder.collection())
3093+
3094+
cube = dummy_backend.connection.load_stac(stac_ref, bands=["B01", "B02"])
3095+
assert cube.metadata.band_names == ["B01", "B02"]
3096+
assert (
3097+
# TODO: better warning than confusing "not a subset of the bands []" ?
3098+
"The specified bands ['B01', 'B02'] in `load_stac` are not a subset of the bands [] found in the STAC metadata (unknown bands: ['B01', 'B02']). Working with specified bands as is."
3099+
in caplog.text
3100+
)
3101+
3102+
cube.execute()
3103+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
3104+
"url": stac_ref,
3105+
"bands": ["B01", "B02"],
3106+
}
3107+
3108+
@pytest.mark.parametrize(
3109+
["bands", "expected_pg_args", "expected_warning"],
3110+
[
3111+
(None, {}, None),
3112+
(
3113+
["B02", "B03"],
3114+
{"bands": ["B02", "B03"]},
3115+
"The specified bands ['B02', 'B03'] in `load_stac` are not a subset of the bands ['Bz1', 'Bz2'] found in the STAC metadata (unknown bands: ['B02', 'B03']). Working with specified bands as is",
3116+
),
3117+
],
3118+
)
3119+
def test_load_stac_band_filtering_custom_band_dimension(
3120+
self, dummy_backend, build_stac_ref, bands, expected_pg_args, expected_warning, caplog
3121+
):
3122+
stac_ref = build_stac_ref(StacDummyBuilder.collection())
3123+
3124+
# This is a temporary mock.patch hack to make metadata_from_stac return metadata with a custom band dimension
3125+
# TODO #743: Do this properly through appropriate STAC metadata
3126+
from openeo.metadata import metadata_from_stac as orig_metadata_from_stac
3127+
3128+
def metadata_from_stac(url: str):
3129+
return CubeMetadata(dimensions=[BandDimension(name="bandzz", bands=[Band("Bz1"), Band("Bz2")])])
3130+
3131+
with mock.patch("openeo.rest.datacube.metadata_from_stac", new=metadata_from_stac):
3132+
cube = dummy_backend.connection.load_stac(stac_ref, bands=bands)
3133+
3134+
assert cube.metadata.has_band_dimension()
3135+
assert cube.metadata.band_dimension.name == "bandzz"
3136+
3137+
cube.execute()
3138+
assert dummy_backend.get_pg("load_stac")["arguments"] == {
3139+
**expected_pg_args,
3140+
"url": stac_ref,
3141+
}
3142+
3143+
if expected_warning:
3144+
assert expected_warning in caplog.text
3145+
else:
3146+
assert not caplog.text
3147+
29733148

29743149
@pytest.mark.parametrize(
29753150
"bands",

0 commit comments

Comments
 (0)