diff --git a/cubedash/_stac.py b/cubedash/_stac.py index e94f2d59e..55eea131e 100644 --- a/cubedash/_stac.py +++ b/cubedash/_stac.py @@ -283,11 +283,10 @@ def as_stac_collection(res: CollectionItem) -> pystac.Collection: providers=[], extent=Extent( pystac.SpatialExtent( - bboxes=[ - res.footprint_wgs84.bounds - if res.footprint_wgs84 - else [-180.0, -90.0, 180.0, 90.0] - ] + # TODO: Find a nicer way to make the typechecker happier + # pystac is too specific in wanting a list[float | int] + # odc-geo BoundingBox class is a Sequence[float] + bboxes=[list(res.bbox) if res.bbox else [-180.0, -90.0, 180.0, 90.0]] ), temporal=pystac.TemporalExtent( intervals=[ diff --git a/cubedash/index/api.py b/cubedash/index/api.py index a7d732507..54bc06888 100644 --- a/cubedash/index/api.py +++ b/cubedash/index/api.py @@ -140,10 +140,6 @@ def put_summary( @abstractmethod def product_summary_cols(self, product_name: str) -> Row: ... - @abstractmethod - def collection_cols(self) -> Select: - """Get all columns necessary for creating a Collection""" - @abstractmethod def collections_search_query( self, diff --git a/cubedash/index/postgis/_api.py b/cubedash/index/postgis/_api.py index 75ee71c52..39373a300 100644 --- a/cubedash/index/postgis/_api.py +++ b/cubedash/index/postgis/_api.py @@ -42,7 +42,7 @@ union_all, update, ) -from sqlalchemy.dialects.postgresql import TSTZRANGE, insert +from sqlalchemy.dialects.postgresql import TSTZRANGE, array, insert from sqlalchemy.orm import aliased from sqlalchemy.sql import ColumnElement from sqlalchemy.types import TIMESTAMP @@ -346,27 +346,6 @@ def product_time_summary( ) ) - @override - def collection_cols(self) -> Select: - product_overview = ( - select( - ProductSpatial.name, - TimeOverview.footprint_geometry, - ProductSpatial.time_earliest, - ProductSpatial.time_latest, - TimeOverview.period_type, - ) - .select_from(TimeOverview) - .join(ProductSpatial) - .cte("product_overview") - ) - - return ( - select(ODC_PRODUCT.definition, product_overview) - .select_from(product_overview) - .join(ODC_PRODUCT, product_overview.c.name == ODC_PRODUCT.name) - ) - @override def collections_search_query( self, @@ -376,31 +355,59 @@ def collections_search_query( bbox: tuple[float, float, float, float] | None = None, time: tuple[datetime, datetime] | None = None, q: Sequence[str] | None = None, - ) -> Result: - collection = self.collection_cols().subquery() - query = select(collection).where(collection.c.period_type == "all") - + ) -> list[Row]: + # STAC Collections only hold a bounding box in EPSG:4326, no polygons + # Calculate the bounding box on the server, it's far more efficient + + # The Cubedash Product (which maps to a STAC Collection) doesn't have + # any bounding box or geometry attached, all the geometries are in the + # TimeOverview table, grouped by different `period_types`. In this case + # we use the `period_type=="all"` to get the one that covers all time. + collection_bbox = func.Box2D( + func.ST_Transform(TimeOverview.footprint_geometry, 4326) + ) + bbox_array = array( + [ + func.ST_XMin(collection_bbox), + func.ST_YMin(collection_bbox), + func.ST_XMax(collection_bbox), + func.ST_YMax(collection_bbox), + ] + ) + query = ( + select( + ODC_PRODUCT.definition, + ProductSpatial.name, + case( + (func.ST_XMin(collection_bbox).is_(None), None), else_=bbox_array + ).label("bbox"), + ProductSpatial.time_earliest, + ProductSpatial.time_latest, + ) + .select_from(TimeOverview) + .join(ProductSpatial, ProductSpatial.id == TimeOverview.product_ref) + .join(ODC_PRODUCT, ProductSpatial.name == ODC_PRODUCT.name) + .where(TimeOverview.period_type == "all") + ) if name: - query = query.where(collection.c.name == name) + query = query.where(ProductSpatial.name == name) if bbox: query = query.where( - collection.c.footprint_geometry.intersects(func.ST_MakeEnvelope(*bbox)) + TimeOverview.footprint_geometry.intersects(func.ST_MakeEnvelope(*bbox)) ) if time: query = query.where( - and_( - default_utc(time[0]) <= default_utc(collection.c.time_latest), - default_utc(collection.c.time_earliest) <= default_utc(time[1]), - ) + default_utc(time[0]) <= ProductSpatial.time_latest, + ProductSpatial.time_earliest <= default_utc(time[1]), ) if q: title = SimpleDocField( name="title", description="product title", - alchemy_column=collection.c.definition, + alchemy_column=ODC_PRODUCT.definition, indexed=False, offset=("metadata", "title"), ) @@ -408,9 +415,9 @@ def collections_search_query( description = SimpleDocField( name="description", description="product description", - alchemy_column=collection.c.definition, + alchemy_column=ODC_PRODUCT.definition, indexed=False, - offset=("description"), + offset=("description",), ) expressions = [] @@ -419,7 +426,7 @@ def collections_search_query( [ title.alchemy_expression.icontains(value), description.alchemy_expression.icontains(value), - collection.c.name.icontains(value), + ODC_PRODUCT.name.icontains(value), ] ) query = query.where(or_(*expressions)) diff --git a/cubedash/index/postgres/_api.py b/cubedash/index/postgres/_api.py index 58fd9b191..e1449e95c 100644 --- a/cubedash/index/postgres/_api.py +++ b/cubedash/index/postgres/_api.py @@ -41,7 +41,7 @@ text, union_all, ) -from sqlalchemy.dialects.postgresql import TSTZRANGE, insert +from sqlalchemy.dialects.postgresql import TSTZRANGE, array, insert from sqlalchemy.sql import ColumnElement import cubedash.summary._schema as _schema @@ -392,26 +392,6 @@ def product_time_summary( ) ) - @override - def collection_cols(self) -> Select: - product_overview = ( - select( - PRODUCT.c.name, - TIME_OVERVIEW.c.footprint_geometry, - PRODUCT.c.time_earliest, - PRODUCT.c.time_latest, - TIME_OVERVIEW.c.period_type, - ) - .select_from(TIME_OVERVIEW.join(PRODUCT)) - .cte("product_overview") - ) - - return select(ODC_PRODUCT.c.definition, product_overview).select_from( - product_overview.join( - ODC_PRODUCT, product_overview.c.name == ODC_PRODUCT.c.name - ) - ) - @override def collections_search_query( self, @@ -422,30 +402,64 @@ def collections_search_query( time: tuple[datetime, datetime] | None = None, q: Sequence[str] | None = None, ) -> Result: - collection = self.collection_cols().alias("collection") - query = select(collection).where(collection.c.period_type == "all") + # STAC Collections only hold a bounding box in EPSG:4326, no polygons + # Calculate the bounding box on the server, it's far more efficient. + + # The Cubedash Product (which maps to a STAC Collection) doesn't have + # any bounding box or geometry attached, all the geometries are in the + # TimeOverview table, grouped by different `period_types`. In this case + # we use the `period_type=="all"` to get the one that covers all time. + + collection_bbox = func.Box2D( + func.ST_Transform(TIME_OVERVIEW.c.footprint_geometry, 4326) + ) + bbox_array = array( + [ + func.ST_XMin(collection_bbox), + func.ST_YMin(collection_bbox), + func.ST_XMax(collection_bbox), + func.ST_YMax(collection_bbox), + ] + ) + query = ( + select( + ODC_PRODUCT.c.definition, + PRODUCT.c.name, + case( + (func.ST_XMin(collection_bbox).is_(None), None), else_=bbox_array + ).label("bbox"), + PRODUCT.c.time_earliest, + PRODUCT.c.time_latest, + ) + .select_from( + TIME_OVERVIEW.join( + PRODUCT, PRODUCT.c.id == TIME_OVERVIEW.c.product_ref + ).join(ODC_PRODUCT, PRODUCT.c.name == ODC_PRODUCT.c.name) + ) + .where(TIME_OVERVIEW.c.period_type == "all") + ) if name: - query = query.where(collection.c.name == name) + query = query.where(PRODUCT.c.name == name) if bbox: query = query.where( - collection.c.footprint_geometry.intersects(func.ST_MakeEnvelope(*bbox)) + TIME_OVERVIEW.c.footprint_geometry.intersects( + func.ST_MakeEnvelope(*bbox) + ) ) if time: query = query.where( - and_( - default_utc(time[0]) <= default_utc(collection.c.time_latest), - default_utc(collection.c.time_earliest) <= default_utc(time[1]), - ) + default_utc(time[0]) <= PRODUCT.c.time_latest, + PRODUCT.c.time_earliest <= default_utc(time[1]), ) if q: title = SimpleDocField( name="title", description="product title", - alchemy_column=collection.c.definition, + alchemy_column=ODC_PRODUCT.c.definition, indexed=False, offset=("metadata", "title"), ) @@ -453,9 +467,9 @@ def collections_search_query( description = SimpleDocField( name="description", description="product description", - alchemy_column=collection.c.definition, + alchemy_column=ODC_PRODUCT.c.definition, indexed=False, - offset=("description"), + offset=("description",), ) expressions = [] @@ -464,7 +478,7 @@ def collections_search_query( [ title.alchemy_expression.icontains(value), description.alchemy_expression.icontains(value), - collection.c.name.icontains(value), + ODC_PRODUCT.c.name.icontains(value), ] ) query = query.where(or_(*expressions)) diff --git a/cubedash/summary/_stores.py b/cubedash/summary/_stores.py index e065b0828..4d83a3ffb 100644 --- a/cubedash/summary/_stores.py +++ b/cubedash/summary/_stores.py @@ -17,13 +17,12 @@ from geoalchemy2 import WKBElement from geoalchemy2 import shape as geo_shape from geoalchemy2.shape import from_shape, to_shape -from odc.geo import MaybeCRS +from odc.geo import BoundingBox, MaybeCRS from pygeofilter.backends.sqlalchemy.evaluate import ( SQLAlchemyFilterEvaluator as FilterEvaluator, ) from pygeofilter.parsers.cql2_json import parse as parse_cql2_json from pygeofilter.parsers.cql2_text import parse as parse_cql2_text -from shapely.geometry import MultiPolygon from shapely.geometry.base import BaseGeometry from sqlalchemy import Row, RowMapping, func, select from sqlalchemy.dialects.postgresql import TSTZRANGE @@ -187,22 +186,7 @@ class CollectionItem: definition: dict[str, Any] time_earliest: datetime | None time_latest: datetime | None - footprint_geometry: Geometry | None - footprint_crs: str | None - - @property - def footprint_wgs84(self) -> MultiPolygon | None: - if not self.footprint_geometry: - return None - if not self.footprint_crs: - _LOG.warning(f"Geometry without a crs for {self.name}", stacklevel=2) - return None - - return ( - Geometry(self.footprint_geometry, crs=self.footprint_crs) - .to_crs("EPSG:4326", wrapdateline=True) - .geom - ) + bbox: BoundingBox | None @property def title(self) -> str: @@ -1602,6 +1586,9 @@ def _summary_from_row( def _row_to_collection( res: Row, grouping_timezone: tzinfo = default_timezone ) -> CollectionItem: + # the 'res' at the moment has + # ('definition', 'name', 'bbox', 'time_earliest', 'time_latest') + return CollectionItem( name=res.name, time_earliest=res.time_earliest.astimezone(grouping_timezone) @@ -1610,16 +1597,7 @@ def _row_to_collection( time_latest=res.time_latest.astimezone(grouping_timezone) if res.time_latest else None, - footprint_geometry=( - None - if res.footprint_geometry is None - else geo_shape.to_shape(res.footprint_geometry) - ), - footprint_crs=( - None - if res.footprint_geometry is None or res.footprint_geometry.srid == -1 - else "EPSG:{}".format(res.footprint_geometry.srid) - ), + bbox=res.bbox, definition=res.definition, ) diff --git a/integration_tests/asserts.py b/integration_tests/asserts.py index b13dae343..2b4f010d5 100644 --- a/integration_tests/asserts.py +++ b/integration_tests/asserts.py @@ -101,7 +101,7 @@ def get_json(client: FlaskClient, url: str, expect_status_code=200) -> dict: f"Expected status {expect_status_code} not {rv.status_code}." f"\nGot:\n{indent(rv.data.decode('utf-8'), ' ' * 6)}" ) - assert rv.is_json, "Expected json content type in response" + assert rv.is_json, "Expected JSON content type in response" data = rv.json assert data is not None, "Empty response from server" except AssertionError: diff --git a/integration_tests/test_stac.py b/integration_tests/test_stac.py index ff261405d..c1586e2f1 100644 --- a/integration_tests/test_stac.py +++ b/integration_tests/test_stac.py @@ -707,6 +707,38 @@ def test_stac_collection(stac_client: FlaskClient): validate_items(_iter_items_across_pages(stac_client, item_links), expect_count=306) +# TODO +# We probably should check the conformance classes being returned. +# They're in the root /stac/ response under the `conformsTo` item. +# They are also served up at /stac/conformance in recent releases + +####### +# Tests for STAC API - Features/Collections +# https://api.stacspec.org/v1.0.0/ogcapi-features/ +# https://github.com/radiantearth/stac-api-spec/tree/release/v1.0.0/ogcapi-features +# +# These cover the responses available at /stac/collections/* +# The spec offers either a cut down version called 'Collections', but Explorer +# supports the full 'Features' specification. + + +@pytest.mark.parametrize("env_name", ("default",), indirect=True) +@pytest.mark.benchmark +def test_stac_collection_toplevel(stac_client: FlaskClient) -> None: + # Retrieve the top level response and check that it's JSON + res = get_json(stac_client, "/stac/collections") + # The response must include some links + assert "links" in res + # Must have a root and self link + link_rels = set(link["rel"] for link in res["links"]) + assert "root" in link_rels + assert "self" in link_rels + # The response must include a set of collections + assert "collections" in res + for collection in res["collections"]: + assert_collection(collection) + + @pytest.mark.parametrize("env_name", ("default",), indirect=True) def test_stac_collection_query(stac_client: FlaskClient) -> None: res = get_json(stac_client, "/stac/collections?q=ard")