-
Notifications
You must be signed in to change notification settings - Fork 33
Speedup STAC collections endpoint #871
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
edb409e
ec2c78d
e1c7817
85bbc66
9e270b9
950a435
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,7 +42,7 @@ | |
union_all, | ||
update, | ||
) | ||
from sqlalchemy.dialects.postgresql import TSTZRANGE, insert | ||
from sqlalchemy.dialects.postgresql import TSTZRANGE, array, insert | ||
from sqlalchemy.orm import aliased | ||
from sqlalchemy.sql import ColumnElement | ||
from sqlalchemy.types import TIMESTAMP | ||
|
@@ -346,27 +346,6 @@ def product_time_summary( | |
) | ||
) | ||
|
||
@override | ||
def collection_cols(self) -> Select: | ||
product_overview = ( | ||
select( | ||
ProductSpatial.name, | ||
TimeOverview.footprint_geometry, | ||
ProductSpatial.time_earliest, | ||
ProductSpatial.time_latest, | ||
TimeOverview.period_type, | ||
) | ||
.select_from(TimeOverview) | ||
.join(ProductSpatial) | ||
.cte("product_overview") | ||
) | ||
|
||
return ( | ||
select(ODC_PRODUCT.definition, product_overview) | ||
.select_from(product_overview) | ||
.join(ODC_PRODUCT, product_overview.c.name == ODC_PRODUCT.name) | ||
) | ||
|
||
@override | ||
def collections_search_query( | ||
self, | ||
|
@@ -376,41 +355,69 @@ def collections_search_query( | |
bbox: tuple[float, float, float, float] | None = None, | ||
time: tuple[datetime, datetime] | None = None, | ||
q: Sequence[str] | None = None, | ||
) -> Result: | ||
collection = self.collection_cols().subquery() | ||
query = select(collection).where(collection.c.period_type == "all") | ||
|
||
) -> list[Row]: | ||
# STAC Collections only hold a bounding box in EPSG:4326, no polygons | ||
# Calculate the bounding box on the server, it's far more efficient | ||
|
||
# The Cubedash Product (which maps to a STAC Collection) doesn't have | ||
# any bounding box or geometry attached, all the geometries are in the | ||
# TimeOverview table, grouped by different `period_types`. In this case | ||
# we use the `period_type=="all"` to get the one that covers all time. | ||
collection_bbox = func.Box2D( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The RHS takes its data from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm. I might add some comments. The Cubedash Product (which maps to a STAC Collection) doesn't have any bounding box or geometry attached, all the geometries are in the TimeOverview table, grouped by different |
||
func.ST_Transform(TimeOverview.footprint_geometry, 4326) | ||
) | ||
bbox_array = array( | ||
[ | ||
func.ST_XMin(collection_bbox), | ||
omad marked this conversation as resolved.
Show resolved
Hide resolved
|
||
func.ST_YMin(collection_bbox), | ||
func.ST_XMax(collection_bbox), | ||
func.ST_YMax(collection_bbox), | ||
] | ||
) | ||
query = ( | ||
select( | ||
ODC_PRODUCT.definition, | ||
ProductSpatial.name, | ||
case( | ||
(func.ST_XMin(collection_bbox).is_(None), None), else_=bbox_array | ||
).label("bbox"), | ||
ProductSpatial.time_earliest, | ||
ProductSpatial.time_latest, | ||
) | ||
.select_from(TimeOverview) | ||
.join(ProductSpatial, ProductSpatial.id == TimeOverview.product_ref) | ||
pjonsson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.join(ODC_PRODUCT, ProductSpatial.name == ODC_PRODUCT.name) | ||
.where(TimeOverview.period_type == "all") | ||
) | ||
if name: | ||
query = query.where(collection.c.name == name) | ||
query = query.where(ProductSpatial.name == name) | ||
|
||
if bbox: | ||
query = query.where( | ||
collection.c.footprint_geometry.intersects(func.ST_MakeEnvelope(*bbox)) | ||
TimeOverview.footprint_geometry.intersects(func.ST_MakeEnvelope(*bbox)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no test coverage for most of these if-branches, but I'll trust you know what you are doing with these changes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, there's a least one bug that I found in these if-branches. The 'time' filter was crashing by trying to apply a python datetime function to a SQLAlchemy Column. I only added one test in this PR. There's room for more... In this case, |
||
) | ||
|
||
if time: | ||
query = query.where( | ||
and_( | ||
default_utc(time[0]) <= default_utc(collection.c.time_latest), | ||
default_utc(collection.c.time_earliest) <= default_utc(time[1]), | ||
) | ||
default_utc(time[0]) <= ProductSpatial.time_latest, | ||
ProductSpatial.time_earliest <= default_utc(time[1]), | ||
) | ||
|
||
if q: | ||
title = SimpleDocField( | ||
name="title", | ||
description="product title", | ||
alchemy_column=collection.c.definition, | ||
alchemy_column=ODC_PRODUCT.definition, | ||
indexed=False, | ||
offset=("metadata", "title"), | ||
) | ||
|
||
description = SimpleDocField( | ||
name="description", | ||
description="product description", | ||
alchemy_column=collection.c.definition, | ||
alchemy_column=ODC_PRODUCT.definition, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The corresponding postgres change also fixes the type of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that This is ugly though, I'll make them match. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I fixed type annotations around that (and a bunch of usages) in datacube-core, I'm quite sure you need the tuple. A bit surprised the type checker wasn't complaining about these lines. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! pyright was complaining, but mypy wasn't for some reason. It's fixed now regardless. |
||
indexed=False, | ||
offset=("description"), | ||
offset=("description",), | ||
) | ||
|
||
expressions = [] | ||
|
@@ -419,7 +426,7 @@ def collections_search_query( | |
[ | ||
title.alchemy_expression.icontains(value), | ||
description.alchemy_expression.icontains(value), | ||
collection.c.name.icontains(value), | ||
ODC_PRODUCT.name.icontains(value), | ||
pjonsson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
] | ||
) | ||
query = query.where(or_(*expressions)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not going to help right now, but it's possible pystac would accept a PR that changes the type annotation from list to Sequence.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, I was thinking that's the best solution too.