Skip to content

Commit 58358ac

Browse files
committed
Merge branch 'issue394-load_stac-resample'
2 parents 3289ec4 + 7d06ea0 commit 58358ac

File tree

10 files changed

+398
-25
lines changed

10 files changed

+398
-25
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ and start a new "In Progress" section above it.
2323

2424
- Introduce `asset_url` option to allow backend implementations to have custom code for retrieving assets. Default
2525
behavior remains unchanged.
26+
- Improve data cube dimension detection in `load_stac` dry-run ([#394](https://github.com/Open-EO/openeo-python-driver/issues/394))
27+
2628

2729
## 0.133.0
2830

openeo_driver/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.134.0a1"
1+
__version__ = "0.134.0a2"

openeo_driver/datacube.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import shapely.ops
1919
import xarray
2020
from geopandas import GeoDataFrame, GeoSeries
21-
from openeo.metadata import CollectionMetadata
21+
from openeo.metadata import CollectionMetadata, CubeMetadata
2222
from openeo.util import ensure_dir, str_truncate
2323
from pyproj import CRS
2424

@@ -55,10 +55,12 @@ def run_udf(self, udf: str, *, runtime: str = "Python", context: Optional[dict]
5555
class DriverDataCube:
5656
"""Base class for "driver" side raster data cubes."""
5757

58-
def __init__(self, metadata: CollectionMetadata = None):
59-
self.metadata = (
60-
metadata if isinstance(metadata, CollectionMetadata) else CollectionMetadata(metadata=metadata or {})
61-
)
58+
def __init__(self, metadata: Optional[CubeMetadata] = None):
59+
if isinstance(metadata, dict):
60+
# TODO: remove this security net once we're sure it's not necessary anymore
61+
log.warning("DriverDataCube: deprecated dict-based metadata usage")
62+
metadata = CollectionMetadata(metadata=metadata)
63+
self.metadata: CubeMetadata = metadata or CubeMetadata()
6264

6365
def __eq__(self, o: object) -> bool:
6466
if o.__class__ == self.__class__:

openeo_driver/dry_run.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
GeometryBufferer,
7272
geojson_to_geometry, reproject_geometry,
7373
)
74+
import openeo_driver.stac.datacube
7475
from openeo_driver.utils import EvalEnv, to_hashable
7576

7677
_log = logging.getLogger(__name__)
@@ -279,9 +280,10 @@ def load_collection(
279280
self, collection_id: str, arguments: dict, metadata: dict = None, env: EvalEnv = EvalEnv()
280281
) -> "DryRunDataCube":
281282
"""Create a DryRunDataCube from a `load_collection` process."""
282-
# TODO #275 avoid VITO/Terrascope specific handling here?
283+
metadata = CollectionMetadata(metadata=metadata)
283284
properties = {
284-
**CollectionMetadata(metadata).get("_vito", "properties", default={}),
285+
# TODO #275 avoid VITO/Terrascope specific handling here?
286+
**metadata.get("_vito", "properties", default={}),
285287
**arguments.get("properties", {}),
286288
}
287289

@@ -339,15 +341,20 @@ def load_stac(self, url: str, arguments: dict, env: EvalEnv = EvalEnv()) -> "Dry
339341
trace = DataSource.load_stac(url=url, properties=properties, bands=arguments.get("bands", []), env=env)
340342
self.add_trace(trace)
341343

342-
metadata = CollectionMetadata(
343-
{},
344-
dimensions=[
345-
SpatialDimension(name="x", extent=[]),
346-
SpatialDimension(name="y", extent=[]),
347-
TemporalDimension(name="t", extent=[]),
348-
BandDimension(name="bands", bands=[Band("unknown")]),
349-
],
350-
)
344+
try:
345+
metadata = openeo_driver.stac.datacube.stac_to_cube_metadata(stac_ref=url)
346+
except Exception as e:
347+
_log.exception(
348+
f"Dry-run load_stac: failed to parse cube metadata from {url!r} ({e!r}). Falling back in generic metadata"
349+
)
350+
metadata = CubeMetadata(
351+
dimensions=[
352+
SpatialDimension(name="x", extent=[]),
353+
SpatialDimension(name="y", extent=[]),
354+
TemporalDimension(name="t", extent=[]),
355+
BandDimension(name="bands", bands=[Band("unknown")]),
356+
]
357+
)
351358

352359
cube = DryRunDataCube(traces=[trace], data_tracer=self, metadata=metadata)
353360
if "temporal_extent" in arguments:
@@ -534,7 +541,9 @@ class DryRunDataCube(DriverDataCube):
534541
estimate memory/cpu usage, ...
535542
"""
536543

537-
def __init__(self, traces: List[DataTraceBase], data_tracer: DryRunDataTracer, metadata: CubeMetadata = None):
544+
def __init__(
545+
self, traces: List[DataTraceBase], data_tracer: DryRunDataTracer, metadata: Optional[CubeMetadata] = None
546+
):
538547
super(DryRunDataCube, self).__init__(metadata=metadata)
539548
self._traces = traces or []
540549
self._data_tracer = data_tracer

openeo_driver/dummy/dummy_backend.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
CollectionMetadata,
2727
SpatialDimension,
2828
TemporalDimension,
29+
CubeMetadata,
2930
)
3031
from openeo.util import rfc3339
3132
from openeo.utils.version import ComparableVersion
@@ -202,7 +203,7 @@ def mock_side_effect(fun):
202203

203204
class DummyDataCube(DriverDataCube):
204205

205-
def __init__(self, metadata: CollectionMetadata = None):
206+
def __init__(self, metadata: Optional[CubeMetadata] = None):
206207
super(DummyDataCube, self).__init__(metadata=metadata)
207208

208209
# TODO #47: remove this non-standard process?
@@ -227,15 +228,15 @@ def __init__(self, metadata: CollectionMetadata = None):
227228
def reduce_dimension(
228229
self, reducer, *, dimension: str, context: Optional[dict] = None, env: EvalEnv
229230
) -> "DummyDataCube":
230-
return DummyDataCube(self.metadata.reduce_dimension(dimension_name=dimension))
231+
return DummyDataCube(metadata=self.metadata.reduce_dimension(dimension_name=dimension))
231232

232233
@mock_side_effect
233234
def add_dimension(self, name: str, label, type: str = "other") -> 'DummyDataCube':
234-
return DummyDataCube(self.metadata.add_dimension(name=name, label=label, type=type))
235+
return DummyDataCube(metadata=self.metadata.add_dimension(name=name, label=label, type=type))
235236

236237
@mock_side_effect
237238
def drop_dimension(self, name: str) -> 'DriverDataCube':
238-
return DummyDataCube(self.metadata.drop_dimension(name=name))
239+
return DummyDataCube(metadata=self.metadata.drop_dimension(name=name))
239240

240241
@mock_side_effect
241242
def dimension_labels(self, dimension: str) -> 'DriverDataCube':
@@ -1068,8 +1069,7 @@ def load_disk_data(
10681069
self, format: str, glob_pattern: str, options: dict, load_params: LoadParameters, env: EvalEnv
10691070
) -> DummyDataCube:
10701071
_register_load_collection_call(glob_pattern, load_params)
1071-
metadata = CollectionMetadata(
1072-
{},
1072+
metadata = CubeMetadata(
10731073
dimensions=[
10741074
SpatialDimension(name="x", extent=[]),
10751075
SpatialDimension(name="y", extent=[]),

openeo_driver/stac/__init__.py

Whitespace-only changes.

openeo_driver/stac/datacube.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"""
2+
Utilities for working with STAC datacube extension
3+
4+
Based on pystac, with adaption layer to compensate for incompatibilities
5+
between available pystac version and actual STAC metadata.
6+
"""
7+
# TODO: move this functionality to the openeo-python-client for better reuse and client-server consistency?
8+
9+
import logging
10+
from typing import Dict, Union
11+
from pathlib import Path
12+
13+
import pystac
14+
import pystac.extensions.datacube
15+
16+
import openeo.metadata
17+
18+
_log = logging.getLogger(__name__)
19+
20+
StacRef = Union[pystac.STACObject, str, Path]
21+
22+
23+
def as_stac_object(stac_ref: StacRef) -> pystac.STACObject:
24+
if isinstance(stac_ref, pystac.STACObject):
25+
return stac_ref
26+
else:
27+
return pystac.read_file(stac_ref)
28+
29+
30+
def _get_dimensions(stac_ref: StacRef) -> Dict[str, pystac.extensions.datacube.Dimension]:
31+
stac_obj: pystac.STACObject = as_stac_object(stac_ref)
32+
# TODO #396 update this to new pystac extension API
33+
if pystac.extensions.datacube.DatacubeExtension.has_extension(stac_obj):
34+
cube = pystac.extensions.datacube.DatacubeExtension.ext(stac_obj)
35+
dimensions = cube.dimensions
36+
elif any(e.startswith("https://stac-extensions.github.io/datacube/") for e in stac_obj.stac_extensions):
37+
# TODO #370/#396 as we're currently stuck on an old pystac version that
38+
# doesn't support current versions of the datacube extension,
39+
# we need workarounds like this
40+
_log.warning("Forcing pystac datacube extension on possibly unsupported metadata")
41+
pystac.extensions.datacube.DatacubeExtension.add_to(stac_obj)
42+
cube = pystac.extensions.datacube.DatacubeExtension.ext(stac_obj)
43+
dimensions = cube.dimensions
44+
else:
45+
raise ValueError(f"No datacube extension found in STAC object {stac_ref=}")
46+
return dimensions
47+
48+
49+
def stac_to_cube_metadata(stac_ref: StacRef) -> openeo.metadata.CubeMetadata:
50+
"""
51+
Parse STAC metadata and convert it to :py:class:`openeo.metadata.CubeMetadata`.
52+
"""
53+
# Dimensions as pystac objects
54+
pystac_dimensions = _get_dimensions(stac_ref=stac_ref)
55+
56+
# Convert to openeo.metadata-style objects
57+
dimensions = []
58+
for name, dim in pystac_dimensions.items():
59+
if isinstance(dim, pystac.extensions.datacube.HorizontalSpatialDimension):
60+
dimensions.append(
61+
openeo.metadata.SpatialDimension(
62+
name=name,
63+
extent=dim.extent,
64+
crs=dim.reference_system,
65+
step=dim.step,
66+
)
67+
)
68+
elif isinstance(dim, pystac.extensions.datacube.TemporalDimension):
69+
dimensions.append(
70+
openeo.metadata.TemporalDimension(
71+
name=name,
72+
extent=dim.extent,
73+
)
74+
)
75+
elif isinstance(dim, pystac.extensions.datacube.AdditionalDimension) and dim.dim_type == "bands":
76+
dimensions.append(
77+
openeo.metadata.BandDimension(
78+
name=name,
79+
bands=[openeo.metadata.Band(name=b) for b in dim.values],
80+
)
81+
)
82+
else:
83+
_log.info("Ignoring dimension %s of type %s", name, type(dim))
84+
return openeo.metadata.CubeMetadata(dimensions=dimensions)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
"reretry~=0.11.8",
7474
"markdown>3.4",
7575
"traceback-with-variables==2.0.4",
76-
"pystac~=1.8.0",
76+
"pystac~=1.8.0", # TODO #370/#396 bump to more recent pystac version
7777
],
7878
extras_require={
7979
"dev": tests_require,

tests/stac/test_datacube.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import json
2+
3+
import pytest
4+
from openeo.testing.stac import StacDummyBuilder
5+
6+
from openeo_driver.stac.datacube import stac_to_cube_metadata
7+
8+
9+
def test_stac_to_cube_metadata_basic(tmp_path):
10+
path = tmp_path / "collection.json"
11+
path.write_text(
12+
json.dumps(
13+
StacDummyBuilder.collection(
14+
cube_dimensions={
15+
"x": {"type": "spatial", "axis": "x", "extent": [-180, 180], "reference_system": 4326},
16+
"y": {"type": "spatial", "axis": "y", "extent": [-56, 83], "reference_system": 4326},
17+
"t": {
18+
"type": "temporal",
19+
"extent": ["2015-06-23T00:00:00Z", "2019-07-10T13:44:56Z"],
20+
"step": "P5D",
21+
},
22+
"bands": {"type": "bands", "values": ["B1", "B2"]},
23+
},
24+
)
25+
)
26+
)
27+
28+
metadata = stac_to_cube_metadata(stac_ref=path)
29+
30+
[dim_x, dim_y] = metadata.spatial_dimensions
31+
assert (dim_x.name, dim_x.extent, dim_x.crs, dim_x.step) == ("x", [-180, 180], 4326, None)
32+
assert (dim_y.name, dim_y.extent, dim_y.crs, dim_y.step) == ("y", [-56, 83], 4326, None)
33+
34+
dim_t = metadata.temporal_dimension
35+
assert (dim_t.name, dim_t.extent) == ("t", ["2015-06-23T00:00:00Z", "2019-07-10T13:44:56Z"])
36+
37+
dim_bands = metadata.band_dimension
38+
assert (dim_bands.name, dim_bands.band_names) == ("bands", ["B1", "B2"])
39+
40+
41+
@pytest.mark.parametrize(
42+
["datacube_extension"],
43+
[
44+
("https://stac-extensions.github.io/datacube/v2.0.0/schema.json",),
45+
("https://stac-extensions.github.io/datacube/v2.2.0/schema.json",),
46+
],
47+
)
48+
def test_stac_to_cube_metadata_version_compatibility(tmp_path, datacube_extension):
49+
path = tmp_path / "collection.json"
50+
path.write_text(
51+
json.dumps(
52+
{
53+
"type": "Collection",
54+
"stac_version": "1.0.0",
55+
"stac_extensions": [datacube_extension],
56+
"id": "collection123",
57+
"description": "Collection 123",
58+
"license": "proprietary",
59+
"extent": {
60+
"spatial": {"bbox": [[3, 4, 5, 6]]},
61+
"temporal": {"interval": [["2024-01-01", "2024-05-05"]]},
62+
},
63+
"cube:dimensions": {
64+
"x": {"type": "spatial", "axis": "x", "extent": [-180, 180], "reference_system": 4326},
65+
"y": {"type": "spatial", "axis": "y", "extent": [-56, 83], "reference_system": 4326},
66+
"t": {
67+
"type": "temporal",
68+
"extent": ["2015-06-23T00:00:00Z", "2019-07-10T13:44:56Z"],
69+
"step": "P5D",
70+
},
71+
"bands": {"type": "bands", "values": ["B1", "B2"]},
72+
},
73+
"links": [],
74+
}
75+
)
76+
)
77+
78+
metadata = stac_to_cube_metadata(stac_ref=path)
79+
80+
[dim_x, dim_y] = metadata.spatial_dimensions
81+
assert (dim_x.name, dim_x.extent, dim_x.crs, dim_x.step) == ("x", [-180, 180], 4326, None)
82+
assert (dim_y.name, dim_y.extent, dim_y.crs, dim_y.step) == ("y", [-56, 83], 4326, None)
83+
84+
dim_t = metadata.temporal_dimension
85+
assert (dim_t.name, dim_t.extent) == ("t", ["2015-06-23T00:00:00Z", "2019-07-10T13:44:56Z"])
86+
87+
dim_bands = metadata.band_dimension
88+
assert (dim_bands.name, dim_bands.band_names) == ("bands", ["B1", "B2"])

0 commit comments

Comments
 (0)