Skip to content

Commit 23b57ab

Browse files
authored
feat: switch to collections for config (#12)
* feat: switch to collections for config * fix: validation script
1 parent b992a84 commit 23b57ab

File tree

12 files changed

+130
-76
lines changed

12 files changed

+130
-76
lines changed

.env.local

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
STAC_FASTAPI_GEOPARQUET_HREF = "data/naip.parquet"
1+
STAC_FASTAPI_COLLECTIONS_HREF = "data/collections.json"

data/collections.json

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
[
2+
{
3+
"type": "Collection",
4+
"stac_version": "1.1.0",
5+
"id": "naip",
6+
"description": "This collection was generated by rustac v0.12.0 from 10000 items",
7+
"license": "other",
8+
"extent": {
9+
"spatial": {
10+
"bbox": [
11+
[
12+
-109.130426,
13+
36.933639,
14+
-101.993445,
15+
41.067413
16+
]
17+
]
18+
},
19+
"temporal": {
20+
"interval": [
21+
[
22+
"2019-09-19T00:00:00Z",
23+
"2022-08-27T16:00:00Z"
24+
]
25+
]
26+
}
27+
},
28+
"links": [],
29+
"assets": {
30+
"data": {
31+
"href": "./naip.parquet",
32+
"type": "application/vnd.apache.parquet"
33+
}
34+
}
35+
}
36+
]

data/config.toml

Lines changed: 0 additions & 1 deletion
This file was deleted.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dependencies = [
99
"fastapi>=0.115.8",
1010
"geojson-pydantic>=1.2.0",
1111
"pydantic>=2.10.4",
12-
"rustac==0.7.0b1",
12+
"rustac @ git+https://github.com/stac-utils/rustac-py",
1313
"stac-fastapi-api>=5.0.2",
1414
"stac-fastapi-extensions>=5.0.2",
1515
"stac-fastapi-types>=5.0.2",

scripts/dev

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22

33
set -e
44

5-
STAC_FASTAPI_GEOPARQUET_HREF=data/naip.parquet uv run fastapi dev src/stac_fastapi/geoparquet/main.py
5+
STAC_FASTAPI_COLLECTIONS_HREF=data/collections.json uv run fastapi dev src/stac_fastapi/geoparquet/main.py

scripts/generate-collections

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/usr/bin/env python3
2+
3+
import asyncio
4+
import json
5+
import sys
6+
from pathlib import Path
7+
8+
import pystac.client
9+
import pystac.utils
10+
import rustac
11+
12+
OUTPUT_PATH = Path(__file__).parents[1] / "data" / "collections.json"
13+
14+
15+
async def main() -> None:
16+
if len(sys.argv) == 1:
17+
raise Exception("ERROR: must provide at least one href on the command line")
18+
19+
collections = []
20+
for href in sys.argv[1:]:
21+
items = await rustac.read(href)
22+
collection = rustac.collection_from_id_and_items(
23+
Path(href).stem, items["features"]
24+
)
25+
collection["links"] = []
26+
collection["assets"] = {
27+
"data": {
28+
"href": pystac.utils.make_relative_href(
29+
str(Path(href).absolute()), OUTPUT_PATH
30+
),
31+
"type": "application/vnd.apache.parquet",
32+
}
33+
}
34+
collections.append(collection)
35+
36+
with open(OUTPUT_PATH, "w") as f:
37+
json.dump(collections, f, indent=2)
38+
39+
40+
if __name__ == "__main__":
41+
asyncio.run(main())

src/stac_fastapi/geoparquet/api.py

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1+
import json
2+
from collections import defaultdict
13
from contextlib import asynccontextmanager
2-
from pathlib import Path
34
from typing import Any, AsyncIterator, TypedDict
45

5-
import tomllib
6-
from fastapi import FastAPI
6+
import pystac.utils
7+
from fastapi import FastAPI, HTTPException
78
from rustac import DuckdbClient
89

910
import stac_fastapi.api.models
@@ -12,7 +13,9 @@
1213

1314
from .client import Client
1415
from .search import SearchGetRequest, SearchPostRequest
15-
from .settings import Settings, StacFastapiGeoparquetSettings
16+
from .settings import Settings
17+
18+
GEOPARQUET_MEDIA_TYPE = "application/vnd.apache.parquet"
1619

1720
GetSearchRequestModel = stac_fastapi.api.models.create_request_model(
1821
model_name="SearchGetRequest",
@@ -40,7 +43,7 @@ class State(TypedDict):
4043
collections: dict[str, dict[str, Any]]
4144
"""A mapping of collection id to collection."""
4245

43-
hrefs: dict[str, str]
46+
hrefs: dict[str, list[str]]
4447
"""A mapping of collection id to geoparquet href."""
4548

4649

@@ -57,41 +60,36 @@ def create(
5760
stac_fastapi_description="A stac-fastapi server backend by stac-geoparquet",
5861
)
5962

60-
if settings.stac_fastapi_geoparquet_href.endswith(".toml"):
61-
with open(settings.stac_fastapi_geoparquet_href, "rb") as f:
62-
data = tomllib.load(f)
63-
stac_fastapi_geoparquet_settings = StacFastapiGeoparquetSettings.model_validate(
64-
data
65-
)
66-
config_directory = Path(settings.stac_fastapi_geoparquet_href).parent
67-
hrefs = []
68-
for href in stac_fastapi_geoparquet_settings.hrefs:
69-
if Path(href).is_absolute():
70-
hrefs.append(href)
71-
else:
72-
hrefs.append(str(config_directory.joinpath(href).resolve()))
73-
else:
74-
hrefs = [settings.stac_fastapi_geoparquet_href]
63+
with open(settings.stac_fastapi_collections_href, "rb") as f:
64+
collections = json.load(f)
7565

7666
@asynccontextmanager
7767
async def lifespan(app: FastAPI) -> AsyncIterator[State]:
7868
client = app.extra["duckdb_client"]
79-
collections = dict()
80-
href_dict = dict()
81-
for href in hrefs:
82-
for collection in client.get_collections(href):
83-
if collection["id"] in collections:
84-
raise ValueError(
85-
"cannot have two items in the same collection in different "
86-
"geoparquet files"
69+
settings: Settings = app.extra["settings"]
70+
collections = app.extra["collections"]
71+
collection_dict = dict()
72+
hrefs = defaultdict(list)
73+
for collection in collections:
74+
if collection["id"] in collection_dict:
75+
raise HTTPException(
76+
500, f"two collections with the same id: {collection.id}"
77+
)
78+
else:
79+
collection_dict[collection["id"]] = collection
80+
for key, asset in collection["assets"].items():
81+
if asset["type"] == GEOPARQUET_MEDIA_TYPE:
82+
hrefs[collection["id"]].append(
83+
pystac.utils.make_absolute_href(
84+
asset["href"],
85+
settings.stac_fastapi_collections_href,
86+
start_is_dir=False,
87+
)
8788
)
88-
else:
89-
collections[collection["id"]] = collection
90-
href_dict[collection["id"]] = href
9189
yield {
9290
"client": client,
93-
"collections": collections,
94-
"hrefs": href_dict,
91+
"collections": collection_dict,
92+
"hrefs": hrefs,
9593
}
9694

9795
api = StacApi(
@@ -102,6 +100,8 @@ async def lifespan(app: FastAPI) -> AsyncIterator[State]:
102100
openapi_url=settings.openapi_url,
103101
docs_url=settings.docs_url,
104102
redoc_url=settings.docs_url,
103+
settings=settings,
104+
collections=collections,
105105
duckdb_client=duckdb_client,
106106
),
107107
search_get_request_model=GetSearchRequestModel,

src/stac_fastapi/geoparquet/client.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -162,15 +162,16 @@ async def search(
162162
**kwargs: Any,
163163
) -> ItemCollection:
164164
client = cast(DuckdbClient, request.state.client)
165-
hrefs = cast(dict[str, str], request.state.hrefs)
165+
hrefs = cast(dict[str, list[str]], request.state.hrefs)
166166

167-
hrefs_to_search = set()
167+
hrefs_to_search = []
168168
if search.collections:
169169
for collection in search.collections:
170-
if href := hrefs.get(collection):
171-
hrefs_to_search.add(href)
170+
if collection_hrefs := hrefs.get(collection):
171+
hrefs_to_search.extend(collection_hrefs)
172172
else:
173-
hrefs_to_search.update(hrefs.values())
173+
for collection_hrefs in hrefs.values():
174+
hrefs_to_search.extend(collection_hrefs)
174175

175176
if len(hrefs) > 1:
176177
raise ValidationError(
Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,10 @@
1-
from pydantic import BaseModel
2-
31
from stac_fastapi.types.config import ApiSettings
42

53

64
class Settings(ApiSettings): # type: ignore
75
"""stac-fastapi-geoparquet settings"""
86

9-
stac_fastapi_geoparquet_href: str
10-
"""This can either be the href of a single geoparquet file, or the href of a TOML
11-
configuration file.
12-
"""
13-
14-
stac_fastapi_duckdb_extension_directory: str | None = None
15-
"""DuckDB extension directory (if none, the default will be used)"""
16-
7+
stac_fastapi_collections_href: str
8+
"""A file containing JSON list of collections.
179
18-
class StacFastapiGeoparquetSettings(BaseModel):
19-
hrefs: list[str]
20-
"""Geoparquet hrefs"""
10+
Any parquet assets on the collection will be loaded into the server."""

tests/conftest.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,16 @@
33

44
import pytest
55
from fastapi.testclient import TestClient
6-
from pytest import FixtureRequest
76

87
import stac_fastapi.geoparquet.api
98
from stac_fastapi.geoparquet import Settings
109

11-
GEOPARQUET_FILE = Path(__file__).parents[1] / "data" / "naip.parquet"
12-
TOML_FILE = Path(__file__).parents[1] / "data" / "config.toml"
10+
COLLECTIONS_PATH = Path(__file__).parents[1] / "data" / "collections.json"
1311

1412

15-
@pytest.fixture(params=[GEOPARQUET_FILE, TOML_FILE])
16-
async def client(request: FixtureRequest) -> AsyncIterator[TestClient]:
17-
settings = Settings(stac_fastapi_geoparquet_href=str(request.param))
13+
@pytest.fixture
14+
async def client() -> AsyncIterator[TestClient]:
15+
settings = Settings(stac_fastapi_collections_href=str(COLLECTIONS_PATH))
1816
api = stac_fastapi.geoparquet.api.create(settings)
1917
with TestClient(api.app) as client:
2018
yield client

tests/test_api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import stac_fastapi.geoparquet.api
88
from stac_fastapi.geoparquet import Settings
99

10-
from .conftest import GEOPARQUET_FILE
10+
from .conftest import COLLECTIONS_PATH
1111

1212

1313
@pytest.fixture
@@ -17,7 +17,7 @@ def extension_directory() -> Path:
1717

1818
def test_create(extension_directory: Path) -> None:
1919
duckdb_client = DuckdbClient(extension_directory=str(extension_directory))
20-
settings = Settings(stac_fastapi_geoparquet_href=str(GEOPARQUET_FILE))
20+
settings = Settings(stac_fastapi_collections_href=str(COLLECTIONS_PATH))
2121
api = stac_fastapi.geoparquet.api.create(
2222
duckdb_client=duckdb_client, settings=settings
2323
)

uv.lock

Lines changed: 2 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)