Skip to content

Commit 032de2c

Browse files
author
dsamaey
committed
Merge remote-tracking branch 'origin/master' into 747-robustranged-download-support
2 parents d414361 + c6bc1c9 commit 032de2c

File tree

14 files changed

+437
-16
lines changed

14 files changed

+437
-16
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Added
1111

1212
- Support `collection_property` based property filtering in `load_stac` ([#246](https://github.com/Open-EO/openeo-python-client/issues/246))
13+
- Add `validate()` method to `SaveResult`, `VectorCube`, `MlModel` and `StacResource` classes ([#766](https://github.com/Open-EO/openeo-python-client/issues/766))
1314

1415
### Changed
1516

1617
- Eliminate deprecated `utcnow` usage patterns. Introduce `Rfc3339.now_utc()` method (as replacement for deprecated `utcnow()` method) to simplify finding deprecated `utcnow` usage in user code. ([#760](https://github.com/Open-EO/openeo-python-client/issues/760))
18+
- `Connection.list_jobs()`: change default `limit` to 100 (instead of fake "unlimited" which was arbitrarily capped in practice anyway) ([#677](https://github.com/Open-EO/openeo-python-client/issues/677))
1719

1820
### Removed
1921

docs/examples/udf/udf_modify_spatial.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88

99
def apply_metadata(input_metadata: CollectionMetadata, context: dict) -> CollectionMetadata:
10-
1110
xstep = input_metadata.get("x", "step")
1211
ystep = input_metadata.get("y", "step")
1312
new_metadata = {
@@ -24,8 +23,6 @@ def fancy_upsample_function(array: np.array, factor: int = 2) -> np.array:
2423

2524

2625
def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
27-
array: xarray.DataArray = cube.get_array()
28-
2926
cubearray: xarray.DataArray = cube.get_array().copy() + 60
3027

3128
# We make prediction and transform numpy array back to datacube
@@ -37,7 +34,7 @@ def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
3734
if cubearray.data.ndim == 4 and cubearray.data.shape[0] == 1:
3835
cubearray = cubearray[0]
3936
predicted_array = fancy_upsample_function(cubearray.data, 2)
40-
inspect(predicted_array, "test message")
37+
inspect(data=predicted_array, message="predicted array")
4138
coord_x = np.linspace(
4239
start=cube.get_array().coords["x"].min(),
4340
stop=cube.get_array().coords["x"].max() + init_pixel_size_x,
@@ -50,6 +47,10 @@ def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
5047
num=predicted_array.shape[-1],
5148
endpoint=False,
5249
)
53-
predicted_cube = xarray.DataArray(predicted_array, dims=["bands", "x", "y"], coords=dict(x=coord_x, y=coord_y))
50+
predicted_cube = xarray.DataArray(
51+
predicted_array,
52+
dims=["bands", "x", "y"],
53+
coords=dict(x=coord_x, y=coord_y),
54+
)
5455

5556
return XarrayDataCube(predicted_cube)

docs/udf.rst

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,9 @@ instead of the original digital number range (thousands):
294294

295295
UDF's that transform cube metadata
296296
==================================
297-
This is a new/experimental feature so may still be subject to change.
297+
298+
.. warning::
299+
This is a new/experimental feature so may still be subject to change.
298300

299301
In some cases, a UDF can have impact on the metadata of a cube, but this can not always
300302
be easily inferred by process graph evaluation logic without running the actual
@@ -314,9 +316,9 @@ To invoke a UDF like this, the apply_neighborhood method is most suitable:
314316

315317
.. code-block:: python
316318
317-
udf_code = Path("udf_modify_spatial.py").read_text()
319+
udf = openeo.UDF.from_file("udf_modify_spatial.py", runtime="Python-Jep")
318320
cube_updated = cube.apply_neighborhood(
319-
lambda data: data.run_udf(udf=udf_code, runtime="Python-Jep", context=dict()),
321+
udf,
320322
size=[
321323
{"dimension": "x", "value": 128, "unit": "px"},
322324
{"dimension": "y", "value": 128, "unit": "px"},
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import abc
2+
import math
3+
from typing import Dict, List, NamedTuple, Optional, Union
4+
5+
import shapely
6+
from shapely.geometry import MultiPolygon, Polygon
7+
8+
from openeo.util import normalize_crs
9+
10+
11+
class JobSplittingFailure(Exception):
12+
pass
13+
14+
15+
class _BoundingBox(NamedTuple):
16+
"""Simple NamedTuple container for a bounding box"""
17+
18+
# TODO: this should be moved to more general utility module, and/or merged with existing BBoxDict
19+
20+
west: float
21+
south: float
22+
east: float
23+
north: float
24+
crs: int = 4326
25+
26+
@classmethod
27+
def from_dict(cls, d: Dict) -> "_BoundingBox":
28+
"""Create a bounding box from a dictionary"""
29+
if d.get("crs") is not None:
30+
d["crs"] = normalize_crs(d["crs"])
31+
return cls(**{k: d[k] for k in cls._fields if k not in cls._field_defaults or k in d})
32+
33+
@classmethod
34+
def from_polygon(cls, polygon: Union[MultiPolygon, Polygon], crs: Optional[int] = None) -> "_BoundingBox":
35+
"""Create a bounding box from a shapely Polygon or MultiPolygon"""
36+
crs = normalize_crs(crs)
37+
return cls(*polygon.bounds, crs=4326 if crs is None else crs)
38+
39+
def as_dict(self) -> Dict:
40+
return self._asdict()
41+
42+
def as_polygon(self) -> Polygon:
43+
"""Get bounding box as a shapely Polygon"""
44+
return shapely.geometry.box(minx=self.west, miny=self.south, maxx=self.east, maxy=self.north)
45+
46+
47+
class _TileGridInterface(metaclass=abc.ABCMeta):
48+
"""Interface for tile grid classes"""
49+
50+
@abc.abstractmethod
51+
# TODO: is it intentional that this method returns a list of non-multi polygons even if the input can be multi-polygon?
52+
# TODO: typehint states that geometry can be a dict too, but that is very liberal, it's probably just about bounding box kind of dicts?
53+
def get_tiles(self, geometry: Union[Dict, MultiPolygon, Polygon]) -> List[Polygon]:
54+
"""Calculate tiles to cover given bounding box"""
55+
...
56+
57+
58+
class _SizeBasedTileGrid(_TileGridInterface):
59+
"""
60+
Specification of a tile grid, parsed from a size and a projection.
61+
The size is in m for UTM projections or degrees for WGS84.
62+
"""
63+
64+
def __init__(self, *, epsg: int, size: float):
65+
# TODO: normalize_crs does not necessarily return an int (could also be a WKT2 string, or even None), but further logic seems to assume it's an int
66+
self.epsg = normalize_crs(epsg)
67+
self.size = size
68+
69+
@classmethod
70+
def from_size_projection(cls, *, size: float, projection: str) -> "_SizeBasedTileGrid":
71+
"""Create a tile grid from size and projection"""
72+
# TODO: the constructor also does normalize_crs, so this factory looks like overkill at the moment
73+
return cls(epsg=normalize_crs(projection), size=size)
74+
75+
def _epsg_is_meters(self) -> bool:
76+
"""Check if the projection unit is in meters. (EPSG:3857 or UTM)"""
77+
# TODO: this is a bit misleading: this code just checks some EPSG ranges (UTM and 3857) and calls all the rest to be not in meters.
78+
# It would be better to raise an exception on unknown EPSG codes than claiming they're not in meter
79+
return 32601 <= self.epsg <= 32660 or 32701 <= self.epsg <= 32760 or self.epsg == 3857
80+
81+
@staticmethod
82+
def _split_bounding_box(to_cover: _BoundingBox, x_offset: float, tile_size: float) -> List[Polygon]:
83+
"""
84+
Split a bounding box into tiles of given size and projection.
85+
:param to_cover: bounding box dict with keys "west", "south", "east", "north", "crs"
86+
:param x_offset: offset to apply to the west and east coordinates
87+
:param tile_size: size of tiles in unit of measure of the projection
88+
:return: list of tiles (polygons)
89+
"""
90+
xmin = int(math.floor((to_cover.west - x_offset) / tile_size))
91+
xmax = int(math.ceil((to_cover.east - x_offset) / tile_size)) - 1
92+
ymin = int(math.floor(to_cover.south / tile_size))
93+
ymax = int(math.ceil(to_cover.north / tile_size)) - 1
94+
95+
tiles = []
96+
for x in range(xmin, xmax + 1):
97+
for y in range(ymin, ymax + 1):
98+
tiles.append(
99+
_BoundingBox(
100+
west=max(x * tile_size + x_offset, to_cover.west),
101+
south=max(y * tile_size, to_cover.south),
102+
east=min((x + 1) * tile_size + x_offset, to_cover.east),
103+
north=min((y + 1) * tile_size, to_cover.north),
104+
).as_polygon()
105+
)
106+
107+
return tiles
108+
109+
def get_tiles(self, geometry: Union[Dict, MultiPolygon, Polygon]) -> List[Polygon]:
110+
if isinstance(geometry, dict):
111+
bbox = _BoundingBox.from_dict(geometry)
112+
113+
elif isinstance(geometry, Polygon) or isinstance(geometry, MultiPolygon):
114+
bbox = _BoundingBox.from_polygon(geometry, crs=self.epsg)
115+
116+
else:
117+
raise JobSplittingFailure("geometry must be a dict or a shapely.geometry.Polygon or MultiPolygon")
118+
119+
# TODO: being a meter based EPSG does not imply that offset should be 500_000
120+
x_offset = 500_000 if self._epsg_is_meters() else 0
121+
122+
tiles = _SizeBasedTileGrid._split_bounding_box(to_cover=bbox, x_offset=x_offset, tile_size=self.size)
123+
124+
return tiles
125+
126+
127+
def split_area(
128+
aoi: Union[Dict, MultiPolygon, Polygon], projection: str = "EPSG:3857", tile_size: float = 20_000.0
129+
) -> List[Polygon]:
130+
"""
131+
Split area of interest into tiles of given size and projection.
132+
:param aoi: area of interest (bounding box or shapely polygon)
133+
:param projection: projection to use for splitting. Default is web mercator (EPSG:3857)
134+
:param tile_size: size of tiles in unit of measure of the projection
135+
:return: list of tiles (polygons).
136+
"""
137+
# TODO EPSG 3857 is probably not a good default projection. Probably better to make it a required parameter
138+
if isinstance(aoi, dict):
139+
# TODO: this possibly overwrites the given projection without the user noticing, making usage confusing
140+
projection = aoi.get("crs", projection)
141+
142+
tile_grid = _SizeBasedTileGrid.from_size_projection(size=tile_size, projection=projection)
143+
return tile_grid.get_tiles(aoi)

openeo/rest/_datacube.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
)
2020
from openeo.internal.warnings import UserDeprecationWarning
2121
from openeo.rest import OpenEoClientException
22+
from openeo.rest.models.general import ValidationResponse
2223
from openeo.util import dict_no_none, str_truncate
2324

2425
if typing.TYPE_CHECKING:
@@ -104,6 +105,16 @@ def _build_pgnode(
104105

105106
# TODO #278 also move process graph "execution" methods here: `download`, `execute`, `execute_batch`, `create_job`, `save_udf`, ...
106107

108+
def validate(self) -> ValidationResponse:
109+
"""
110+
Validate a process graph without executing it.
111+
112+
:return: container of validation of errors (dictionaries with "code" and "message" fields)
113+
114+
.. versionadded:: 0.41.0
115+
"""
116+
return self._connection.validate_process_graph(self)
117+
107118
def _repr_html_(self):
108119
process = {"process_graph": self.flat_graph()}
109120
parameters = {

openeo/rest/connection.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -889,11 +889,13 @@ def describe_process(self, id: str, namespace: Optional[str] = None) -> dict:
889889

890890
raise OpenEoClientException("Process does not exist.")
891891

892-
def list_jobs(self, limit: Union[int, None] = None) -> JobListingResponse:
892+
def list_jobs(self, limit: Union[int, None] = 100) -> JobListingResponse:
893893
"""
894894
Lists (batch) jobs metadata of the authenticated user.
895895
896-
:param limit: maximum number of jobs to return. Setting this limit enables pagination.
896+
:param limit: maximum number of jobs to return (with pagination).
897+
Can be set to ``None`` to disable pagination,
898+
but note that the backend might still silently cap the listing in practice.
897899
898900
:return: job_list: Dict of all jobs of the user.
899901
@@ -903,6 +905,9 @@ def list_jobs(self, limit: Union[int, None] = None) -> JobListingResponse:
903905
.. versionchanged:: 0.38.0
904906
Returns a :py:class:`~openeo.rest.models.general.JobListingResponse` object
905907
instead of simple ``List[dict]``.
908+
909+
.. versionchanged:: 0.41.0
910+
Change default value of ``limit`` to 100 (instead of unlimited).
906911
"""
907912
# TODO: Parse the result so that Job classes returned?
908913
# TODO: when pagination is enabled: how to expose link to next page?
@@ -994,7 +999,11 @@ def validate_process_graph(
994999
a local file path or URL to a JSON representation,
9951000
a :py:class:`~openeo.rest.multiresult.MultiResult` object, ...
9961001
997-
:return: list of errors (dictionaries with "code" and "message" fields)
1002+
:return: container of validation errors (dictionaries with "code" and "message" fields)
1003+
1004+
.. versionchanged:: 0.38.0
1005+
returns a :py:class:`~openeo.rest.models.general.ValidationResponse` object
1006+
instead of a simple list of error dictionaries.
9981007
"""
9991008
pg_with_metadata = self._build_request_with_process_graph(process_graph)["process"]
10001009
data = self.post(path="/validation", json=pg_with_metadata, expected_status=200).json()

openeo/rest/datacube.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
from openeo.rest.graph_building import CollectionProperty
7575
from openeo.rest.job import BatchJob, RESTJob
7676
from openeo.rest.mlmodel import MlModel
77+
from openeo.rest.models.general import ValidationResponse
7778
from openeo.rest.result import SaveResult
7879
from openeo.rest.service import Service
7980
from openeo.rest.udp import RESTUserDefinedProcess
@@ -2499,13 +2500,20 @@ def download(
24992500
on_response_headers=on_response_headers,
25002501
)
25012502

2502-
def validate(self) -> List[dict]:
2503+
def validate(self) -> ValidationResponse:
25032504
"""
25042505
Validate a process graph without executing it.
25052506
2506-
:return: list of errors (dictionaries with "code" and "message" fields)
2507+
:return: container of validation of errors (dictionaries with "code" and "message" fields)
2508+
2509+
.. versionchanged:: 0.38.0
2510+
returns a :py:class:`~openeo.rest.models.general.ValidationResponse` object
2511+
instead of a simple list of error dictionaries.
25072512
"""
2508-
return self._connection.validate_process_graph(self.flat_graph())
2513+
# TODO this method implementation does not really override something
2514+
# it is just kept to override the doc.
2515+
# At some point this should be removed for simplicity.
2516+
return super().validate()
25092517

25102518
def tiled_viewing_service(self, type: str, **kwargs) -> Service:
25112519
return self._connection.create_service(self.flat_graph(), type=type, **kwargs)

0 commit comments

Comments
 (0)