-
Notifications
You must be signed in to change notification settings - Fork 41
First version of tile based job splitter #756
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
VincentVerelst
wants to merge
6
commits into
master
Choose a base branch
from
issue745-tile-splitter
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+299
−0
Open
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
016dd2e
First version of tile based job splitter #745
VincentVerelst dc7f57d
Make type hints python 3.8 compatible #745
VincentVerelst bc00789
changed some more type hints to be python3.8 compatible #745
VincentVerelst e2da1b3
fixed typo in projection #745
VincentVerelst 28e0de8
Merge branch 'master' into issue745-tile-splitter
VincentVerelst 05ccf7f
pr review changes #745
VincentVerelst File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
import abc | ||
import math | ||
from typing import Dict, List, NamedTuple, Optional, Union | ||
|
||
import shapely | ||
from shapely.geometry import MultiPolygon, Polygon | ||
|
||
from openeo.util import normalize_crs | ||
|
||
|
||
class JobSplittingFailure(Exception): | ||
pass | ||
|
||
class _BoundingBox(NamedTuple): | ||
"""Simple NamedTuple container for a bounding box""" | ||
|
||
west: float | ||
south: float | ||
east: float | ||
north: float | ||
crs: int = 4326 | ||
|
||
@classmethod | ||
def from_dict(cls, d: Dict) -> "_BoundingBox": | ||
"""Create a bounding box from a dictionary""" | ||
if d.get("crs") is not None: | ||
d["crs"] = normalize_crs(d["crs"]) | ||
return cls(**{k: d[k] for k in cls._fields if k not in cls._field_defaults or k in d}) | ||
|
||
@classmethod | ||
def from_polygon(cls, polygon: Union[MultiPolygon, Polygon], crs: Optional[int] = None) -> "_BoundingBox": | ||
"""Create a bounding box from a shapely Polygon or MultiPolygon""" | ||
crs = normalize_crs(crs) | ||
return cls(*polygon.bounds, crs=4326 if crs is None else crs) | ||
|
||
def as_dict(self) -> Dict: | ||
return self._asdict() | ||
|
||
def as_polygon(self) -> Polygon: | ||
"""Get bounding box as a shapely Polygon""" | ||
return shapely.geometry.box(minx=self.west, miny=self.south, maxx=self.east, maxy=self.north) | ||
|
||
|
||
class _TileGridInterface(metaclass=abc.ABCMeta): | ||
"""Interface for tile grid classes""" | ||
|
||
@abc.abstractmethod | ||
def get_tiles(self, geometry: Union[Dict, MultiPolygon, Polygon]) -> List[Polygon]: | ||
"""Calculate tiles to cover given bounding box""" | ||
... | ||
|
||
|
||
class _SizeBasedTileGrid(_TileGridInterface): | ||
""" | ||
Specification of a tile grid, parsed from a size and a projection. | ||
The size is in m for UTM projections or degrees for WGS84. | ||
""" | ||
|
||
def __init__(self, epsg: int, size: float): | ||
self.epsg = normalize_crs(epsg) | ||
self.size = size | ||
|
||
@classmethod | ||
def from_size_projection(cls, size: float, projection: str) -> "_SizeBasedTileGrid": | ||
"""Create a tile grid from size and projection""" | ||
return cls(normalize_crs(projection), size) | ||
|
||
def _epsg_is_meters(self) -> bool: | ||
"""Check if the projection unit is in meters. (EPSG:3857 or UTM)""" | ||
return 32601 <= self.epsg <= 32660 or 32701 <= self.epsg <= 32760 or self.epsg == 3857 | ||
|
||
@staticmethod | ||
def _split_bounding_box(to_cover: _BoundingBox, x_offset: float, tile_size: float) -> List[Polygon]: | ||
""" | ||
Split a bounding box into tiles of given size and projection. | ||
:param to_cover: bounding box dict with keys "west", "south", "east", "north", "crs" | ||
:param x_offset: offset to apply to the west and east coordinates | ||
:param tile_size: size of tiles in unit of measure of the projection | ||
:return: list of tiles (polygons) | ||
""" | ||
xmin = int(math.floor((to_cover.west - x_offset) / tile_size)) | ||
xmax = int(math.ceil((to_cover.east - x_offset) / tile_size)) - 1 | ||
ymin = int(math.floor(to_cover.south / tile_size)) | ||
ymax = int(math.ceil(to_cover.north / tile_size)) - 1 | ||
|
||
tiles = [] | ||
for x in range(xmin, xmax + 1): | ||
for y in range(ymin, ymax + 1): | ||
tiles.append( | ||
_BoundingBox( | ||
west=max(x * tile_size + x_offset, to_cover.west), | ||
south=max(y * tile_size, to_cover.south), | ||
east=min((x + 1) * tile_size + x_offset, to_cover.east), | ||
north=min((y + 1) * tile_size, to_cover.north), | ||
).as_polygon() | ||
) | ||
|
||
return tiles | ||
|
||
def get_tiles(self, geometry: Union[Dict, MultiPolygon, Polygon]) -> List[Polygon]: | ||
if isinstance(geometry, dict): | ||
bbox = _BoundingBox.from_dict(geometry) | ||
|
||
elif isinstance(geometry, Polygon) or isinstance(geometry, MultiPolygon): | ||
bbox = _BoundingBox.from_polygon(geometry, crs=self.epsg) | ||
|
||
else: | ||
raise JobSplittingFailure("geometry must be a dict or a shapely.geometry.Polygon or MultiPolygon") | ||
|
||
x_offset = 500_000 if self._epsg_is_meters() else 0 | ||
|
||
tiles = _SizeBasedTileGrid._split_bounding_box(bbox, x_offset, self.size) | ||
|
||
return tiles | ||
|
||
|
||
def split_area( | ||
aoi: Union[Dict, MultiPolygon, Polygon], projection: str = "EPSG:3857", tile_size: float = 20_000.0 | ||
) -> List[Polygon]: | ||
""" | ||
Split area of interest into tiles of given size and projection. | ||
:param aoi: area of interest (bounding box or shapely polygon) | ||
:param projection: projection to use for splitting. Default is web mercator (EPSG:3857) | ||
:param tile_size: size of tiles in unit of measure of the projection | ||
:return: list of tiles (polygons). | ||
""" | ||
if isinstance(aoi, dict): | ||
projection = aoi.get("crs", projection) | ||
|
||
tile_grid = _SizeBasedTileGrid.from_size_projection(tile_size, projection) | ||
return tile_grid.get_tiles(aoi) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
import pytest | ||
import shapely | ||
|
||
from openeo.extra.job_management.job_splitting import ( | ||
JobSplittingFailure, | ||
_BoundingBox, | ||
_SizeBasedTileGrid, | ||
split_area, | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def mock_polygon_wgs(): | ||
return shapely.geometry.box(0.0, 0.0, 1.0, 1.0) | ||
|
||
|
||
@pytest.fixture | ||
def mock_polygon_utm(): | ||
return shapely.geometry.box(0.0, 0.0, 100_000.0, 100_000.0) | ||
|
||
|
||
@pytest.fixture | ||
def mock_dict_no_crs(): | ||
return { | ||
"west": 0.0, | ||
"south": 0.0, | ||
"east": 1.0, | ||
"north": 1.0, | ||
} | ||
|
||
|
||
@pytest.fixture | ||
def mock_dict_with_crs_utm(): | ||
return { | ||
"west": 0.0, | ||
"south": 0.0, | ||
"east": 100_000.0, | ||
"north": 100_000.0, | ||
"crs": "EPSG:3857", | ||
} | ||
|
||
|
||
|
||
class TestBoundingBox: | ||
def test_basic(self): | ||
bbox = _BoundingBox(1, 2, 3, 4) | ||
assert bbox.west == 1 | ||
assert bbox.south == 2 | ||
assert bbox.east == 3 | ||
assert bbox.north == 4 | ||
assert bbox.crs == 4326 | ||
|
||
def test_from_dict(self): | ||
bbox = _BoundingBox.from_dict({"west": 1, "south": 2, "east": 3, "north": 4, "crs": "epsg:32633"}) | ||
assert (bbox.west, bbox.south, bbox.east, bbox.north) == (1, 2, 3, 4) | ||
assert bbox.crs == 32633 | ||
|
||
def test_from_dict_defaults(self): | ||
bbox = _BoundingBox.from_dict({"west": 1, "south": 2, "east": 3, "north": 4}) | ||
assert (bbox.west, bbox.south, bbox.east, bbox.north) == (1, 2, 3, 4) | ||
assert bbox.crs == 4326 | ||
|
||
def test_from_dict_underspecified(self): | ||
with pytest.raises(KeyError): | ||
_ = _BoundingBox.from_dict({"west": 1, "south": 2, "color": "red"}) | ||
|
||
def test_from_dict_overspecified(self): | ||
bbox = _BoundingBox.from_dict( | ||
{"west": 1, "south": 2, "east": 3, "north": 4, "crs": "EPSG:4326", "color": "red"} | ||
) | ||
assert (bbox.west, bbox.south, bbox.east, bbox.north) == (1, 2, 3, 4) | ||
assert bbox.crs == 4326 | ||
|
||
def test_from_polygon(self): | ||
polygon = shapely.geometry.box(1, 2, 3, 4) | ||
bbox = _BoundingBox.from_polygon(polygon) | ||
assert (bbox.west, bbox.south, bbox.east, bbox.north) == (1, 2, 3, 4) | ||
assert bbox.crs == 4326 | ||
|
||
def test_as_dict(self): | ||
bbox = _BoundingBox(1, 2, 3, 4) | ||
assert bbox.as_dict() == {"west": 1, "south": 2, "east": 3, "north": 4, "crs": 4326} | ||
|
||
def test_as_polygon(self): | ||
bbox = _BoundingBox(1, 2, 3, 4) | ||
polygon = bbox.as_polygon() | ||
assert isinstance(polygon, shapely.geometry.Polygon) | ||
assert set(polygon.exterior.coords) == {(1, 2), (3, 2), (3, 4), (1, 4)} | ||
|
||
|
||
class TestSizeBasedTileGrid: | ||
|
||
def test_from_size_projection(self): | ||
splitter = _SizeBasedTileGrid.from_size_projection(0.1, "EPSG:4326") | ||
assert splitter.epsg == 4326 | ||
assert splitter.size == 0.1 | ||
|
||
def test_get_tiles_raises_exception(self): | ||
"""test get_tiles when the input geometry is not a dict or shapely.geometry.Polygon""" | ||
tile_grid = _SizeBasedTileGrid.from_size_projection(0.1, "EPSG:4326") | ||
with pytest.raises(JobSplittingFailure): | ||
tile_grid.get_tiles("invalid_geometry") | ||
|
||
def test_simple_get_tiles_dict(self, mock_dict_with_crs_utm, mock_polygon_utm): | ||
"""test get_tiles when the the tile grid size is equal to the size of the input geometry. The original geometry should be returned as polygon.""" | ||
tile_grid = _SizeBasedTileGrid.from_size_projection(100_000, "EPSG:3857") | ||
tiles = tile_grid.get_tiles(mock_dict_with_crs_utm) | ||
assert len(tiles) == 1 | ||
assert tiles[0] == mock_polygon_utm | ||
|
||
def test_multiple_get_tile_dict(self, mock_dict_with_crs_utm): | ||
"""test get_tiles when the the tile grid size is smaller than the size of the input geometry. The input geometry should be split into multiple tiles.""" | ||
tile_grid = _SizeBasedTileGrid.from_size_projection(20_000, "EPSG:3857") | ||
tiles = tile_grid.get_tiles(mock_dict_with_crs_utm) | ||
assert len(tiles) == 25 | ||
assert tiles[0] == shapely.geometry.box(0.0, 0.0, 20_000.0, 20_000.0) | ||
|
||
def test_larger_get_tile_dict(self, mock_dict_with_crs_utm, mock_polygon_utm): | ||
"""test get_tiles when the the tile grid size is larger than the size of the input geometry. The original geometry should be returned.""" | ||
tile_grid = _SizeBasedTileGrid.from_size_projection(200_000, "EPSG:3857") | ||
tiles = tile_grid.get_tiles(mock_dict_with_crs_utm) | ||
assert len(tiles) == 1 | ||
assert tiles[0] == mock_polygon_utm | ||
|
||
def test_get_tiles_polygon_wgs(self, mock_polygon_wgs): | ||
"""test get_tiles when the input geometry is a polygon in wgs and the tile grid is in wgs""" | ||
tile_grid = _SizeBasedTileGrid.from_size_projection(0.1, "EPSG:4326") | ||
tiles = tile_grid.get_tiles(mock_polygon_wgs) | ||
assert len(tiles) == 100 | ||
assert tiles[0] == shapely.geometry.box(0.0, 0.0, 0.1, 0.1) | ||
|
||
def test_simple_get_tiles_polygon(self, mock_polygon_utm): | ||
"""test get_tiles when the the tile grid size is equal to the size of the input geometry. The original geometry should be returned.""" | ||
tile_grid = _SizeBasedTileGrid.from_size_projection(100_000.0, "EPSG:3857") | ||
tiles = tile_grid.get_tiles(mock_polygon_utm) | ||
assert len(tiles) == 1 | ||
assert tiles[0] == mock_polygon_utm | ||
|
||
def test_larger_get_tiles_polygon(self, mock_polygon_utm): | ||
"""test get_tiles when the the tile grid size is larger than the size of the input geometry. The original geometry should be returned.""" | ||
tile_grid = _SizeBasedTileGrid.from_size_projection(200_000.0, "EPSG:3857") | ||
tiles = tile_grid.get_tiles(mock_polygon_utm) | ||
assert len(tiles) == 1 | ||
assert tiles[0] == mock_polygon_utm | ||
|
||
|
||
def test_split_area_default(): | ||
"""test split_area with default parameters""" | ||
aoi = {"west": 0.0, "south": 0.0, "east": 20_000.0, "north": 20_000.0, "crs": "EPSG:3857"} | ||
tiles = split_area(aoi) | ||
assert len(tiles) == 1 | ||
assert tiles[0] == shapely.geometry.box(0.0, 0.0, 20_000.0, 20_000.0) | ||
|
||
|
||
def test_split_area_custom(): | ||
"""test split_area with wgs projection""" | ||
aoi = {"west": 0.0, "south": 0.0, "east": 1.0, "north": 1.0, "crs": "EPSG:4326"} | ||
tiles = split_area(aoi, "EPSG:4326", 1.0) | ||
assert len(tiles) == 1 | ||
assert tiles[0] == shapely.geometry.box(0.0, 0.0, 1.0, 1.0) | ||
|
||
|
||
def test_split_area_custom_no_crs_specified(): | ||
"""test split_area with crs in dict, but not in split_area. The crs in the dict should be used.""" | ||
aoi = {"west": 0.0, "south": 0.0, "east": 1.0, "north": 1.0, "crs": "EPSG:4326"} | ||
tiles = split_area(aoi=aoi, tile_size=1.0) | ||
assert len(tiles) == 1 | ||
assert tiles[0] == shapely.geometry.box(0.0, 0.0, 1.0, 1.0) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess it's a matter of taste, but these four fixtures
mock_polygon_wgs
, ...mock_dict_with_crs_utm
feel like overkill to me.I guess it is to be DRY, but in testing I think this extra level of indirection for these simple constructs is worse for readability than violating DRY.
Also, if you still to be DRY, I think you should just use constants instead of pytest fixtures (you are not really using them as fixtures, in terms of setup/teardown)