Skip to content

Commit 25b8e69

Browse files
authored
docs: talk about collections (#14)
* docs: talk about collections * deps: update rustac
1 parent 2a34f39 commit 25b8e69

File tree

7 files changed

+117
-332
lines changed

7 files changed

+117
-332
lines changed

README.md

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
A [stac-fastapi](https://github.com/stac-utils/stac-fastapi) with a [stac-geoparquet](https://github.com/stac-utils/stac-geoparquet/blob/main/spec/stac-geoparquet-spec.md) backend.
88

9-
**stac-fastapi-geoparquet** can serve a full-featured STAC API from a **stac-geoparquet** file located (e.g.) in blob storage — no database required.
9+
**stac-fastapi-geoparquet** can serve a full-featured STAC API from one or more **stac-geoparquet** files located (e.g.) in blob storage — no database required.
1010

1111
> [!WARNING]
1212
> 👷 This project is under active development and may change and break at any time.
@@ -24,7 +24,32 @@ INFO: Application startup complete.
2424
INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
2525
```
2626

27-
To explore the API, you can use [stac browser](https://radiantearth.github.io/stac-browser/#/external/http:/127.0.0.1:8000/?.language=en).
27+
This will start the server on <http://127.0.0.1:8000>.
28+
The collection will be auto-generated from the items in the **stac-geoparquet** file.
29+
30+
### Using collections
31+
32+
Instead of providing the href to a single file, you can provide the href to a file containing a JSON list of collections.
33+
Any [collection assets](https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#assets) with a [application/vnd.apache.parquet](https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md#media-type) `type` field will be added to the server as sources of items.
34+
For an example, see [data/collections.json](./data/collections.json).
35+
36+
To start a server with one or more collections:
37+
38+
```shell
39+
$ STAC_FASTAPI_COLLECTIONS_HREF=data/collections.json uvicorn stac_fastapi.geoparquet.main:app
40+
INFO: Started server process [47920]
41+
INFO: Waiting for application startup.
42+
INFO: Application startup complete.
43+
INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
44+
```
45+
46+
To auto-generate the collections file, we provide a [script](./scripts/generate-collections):
47+
48+
```shell
49+
scripts/generate-collections s3://my-bucket/a.parquet s3://my-bucket/b.parquet
50+
```
51+
52+
This will update `./data/collections.json`.
2853

2954
## Development
3055

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@ name = "stac-fastapi-geoparquet"
33
version = "0.0.1"
44
description = "A stac-fastapi implementation with a stac-geoparquet backend"
55
readme = "README.md"
6-
requires-python = ">=3.10"
6+
requires-python = ">=3.11"
77
dependencies = [
88
"attr>=0.3.2",
99
"fastapi>=0.115.8",
1010
"geojson-pydantic>=1.2.0",
1111
"pydantic>=2.10.4",
12-
"rustac==0.7.0b2",
12+
"rustac==0.7.0b3",
1313
"stac-fastapi-api>=5.0.2",
1414
"stac-fastapi-extensions>=5.0.2",
1515
"stac-fastapi-types>=5.0.2",

src/stac_fastapi/geoparquet/api.py

Lines changed: 54 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import pystac.utils
77
from fastapi import FastAPI, HTTPException
8-
from rustac import DuckdbClient
8+
from rustac import Collection, DuckdbClient
99

1010
import stac_fastapi.api.models
1111
from stac_fastapi.api.app import StacApi
@@ -47,6 +47,36 @@ class State(TypedDict):
4747
"""A mapping of collection id to geoparquet href."""
4848

4949

50+
@asynccontextmanager
51+
async def lifespan(app: FastAPI) -> AsyncIterator[State]:
52+
client = app.extra["duckdb_client"]
53+
settings: Settings = app.extra["settings"]
54+
collections = app.extra["collections"]
55+
collection_dict = dict()
56+
hrefs = defaultdict(list)
57+
for collection in collections:
58+
if collection["id"] in collection_dict:
59+
raise HTTPException(
60+
500, f"two collections with the same id: {collection.id}"
61+
)
62+
else:
63+
collection_dict[collection["id"]] = collection
64+
for key, asset in collection["assets"].items():
65+
if asset.get("type") == GEOPARQUET_MEDIA_TYPE:
66+
hrefs[collection["id"]].append(
67+
pystac.utils.make_absolute_href(
68+
asset["href"],
69+
settings.stac_fastapi_collections_href,
70+
start_is_dir=False,
71+
)
72+
)
73+
yield {
74+
"client": client,
75+
"collections": collection_dict,
76+
"hrefs": hrefs,
77+
}
78+
79+
5080
def create(
5181
settings: Settings | None = None,
5282
duckdb_client: DuckdbClient | None = None,
@@ -60,37 +90,19 @@ def create(
6090
stac_fastapi_description="A stac-fastapi server backend by stac-geoparquet",
6191
)
6292

63-
with open(settings.stac_fastapi_collections_href, "rb") as f:
64-
collections = json.load(f)
65-
66-
@asynccontextmanager
67-
async def lifespan(app: FastAPI) -> AsyncIterator[State]:
68-
client = app.extra["duckdb_client"]
69-
settings: Settings = app.extra["settings"]
70-
collections = app.extra["collections"]
71-
collection_dict = dict()
72-
hrefs = defaultdict(list)
73-
for collection in collections:
74-
if collection["id"] in collection_dict:
75-
raise HTTPException(
76-
500, f"two collections with the same id: {collection.id}"
77-
)
78-
else:
79-
collection_dict[collection["id"]] = collection
80-
for key, asset in collection["assets"].items():
81-
if asset["type"] == GEOPARQUET_MEDIA_TYPE:
82-
hrefs[collection["id"]].append(
83-
pystac.utils.make_absolute_href(
84-
asset["href"],
85-
settings.stac_fastapi_collections_href,
86-
start_is_dir=False,
87-
)
88-
)
89-
yield {
90-
"client": client,
91-
"collections": collection_dict,
92-
"hrefs": hrefs,
93-
}
93+
if settings.stac_fastapi_collections_href:
94+
with open(settings.stac_fastapi_collections_href, "rb") as f:
95+
collections = json.load(f)
96+
else:
97+
collections = []
98+
99+
if settings.stac_fastapi_geoparquet_href:
100+
collections.extend(
101+
collections_from_geoparquet_href(
102+
settings.stac_fastapi_geoparquet_href,
103+
duckdb_client,
104+
)
105+
)
94106

95107
api = StacApi(
96108
settings=settings,
@@ -108,3 +120,13 @@ async def lifespan(app: FastAPI) -> AsyncIterator[State]:
108120
search_post_request_model=PostSearchRequestModel,
109121
)
110122
return api
123+
124+
125+
def collections_from_geoparquet_href(
126+
href: str, duckdb_client: DuckdbClient
127+
) -> list[Collection]:
128+
collections = duckdb_client.get_collections(href)
129+
for collection in collections:
130+
collection["links"] = []
131+
collection["assets"] = {"data": {"href": href, "type": GEOPARQUET_MEDIA_TYPE}}
132+
return collections

src/stac_fastapi/geoparquet/settings.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@
44
class Settings(ApiSettings): # type: ignore
55
"""stac-fastapi-geoparquet settings"""
66

7-
stac_fastapi_collections_href: str
8-
"""A file containing JSON list of collections.
7+
stac_fastapi_collections_href: str | None = None
8+
"""The href of a file containing JSON list of collections.
99
1010
Any parquet assets on the collection will be loaded into the server."""
11+
12+
stac_fastapi_geoparquet_href: str | None = None
13+
"""The href of a stac-geoparquet file.
14+
15+
The items in the file will be used to auto-generate one or more collections."""

tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from stac_fastapi.geoparquet import Settings
99

1010
COLLECTIONS_PATH = Path(__file__).parents[1] / "data" / "collections.json"
11+
NAIP_PATH = Path(__file__).parents[1] / "data" / "naip.parquet"
1112

1213

1314
@pytest.fixture

tests/test_api.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import stac_fastapi.geoparquet.api
88
from stac_fastapi.geoparquet import Settings
99

10-
from .conftest import COLLECTIONS_PATH
10+
from .conftest import COLLECTIONS_PATH, NAIP_PATH
1111

1212

1313
@pytest.fixture
@@ -24,3 +24,11 @@ def test_create(extension_directory: Path) -> None:
2424
with TestClient(api.app) as client:
2525
response = client.get("/search")
2626
assert response.status_code == 200
27+
28+
29+
def test_create_from_parquet_file() -> None:
30+
settings = Settings(stac_fastapi_geoparquet_href=str(NAIP_PATH))
31+
api = stac_fastapi.geoparquet.api.create(settings=settings)
32+
with TestClient(api.app) as client:
33+
response = client.get("/search")
34+
assert response.status_code == 200

0 commit comments

Comments
 (0)