Skip to content

Commit 5ed701e

Browse files
author
Tom Augspurger
authored
Break out mypy to separate check (#44)
* Break out mypy
1 parent 96fe45d commit 5ed701e

File tree

9 files changed

+47
-41
lines changed

9 files changed

+47
-41
lines changed

.github/workflows/continuous-integration.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,7 @@ jobs:
2828
run: pytest tests -v
2929

3030
- name: Lint
31-
run: pre-commit run --all-files
31+
run: pre-commit run --all-files
32+
33+
- name: Type check
34+
run: mypy .

.pre-commit-config.yaml

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,3 @@ repos:
1818
hooks:
1919
- id: flake8
2020
language_version: python3
21-
- repo: https://github.com/pre-commit/mirrors-mypy
22-
rev: v1.9.0
23-
hooks:
24-
- id: mypy
25-
# Override default --ignore-missing-imports
26-
args: []
27-
additional_dependencies:
28-
# Type stubs
29-
- types-PyYAML
30-
- types-requests
31-
- types-python-dateutil
32-
# Typed libraries
33-
- numpy
34-
- pystac
35-
- azure-data-tables
36-
- pytest

pyproject.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ test = [
4141
"pre-commit",
4242
"stac-geoparquet[pgstac]",
4343
"stac-geoparquet[pc]",
44+
"types-python-dateutil",
45+
"types-requests",
46+
"mypy",
4447
]
4548

4649

@@ -72,3 +75,7 @@ module = [
7275
]
7376

7477
ignore_missing_imports = true
78+
79+
[[tool.mypy.overrides]]
80+
module = "stac_geoparquet.*"
81+
disallow_untyped_defs = true

stac_geoparquet/cli.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
import logging
33
import sys
44
import os
5+
from typing import List, Optional
6+
57
from stac_geoparquet import pc_runner
68

79
logger = logging.getLogger("stac_geoparquet.pgstac_reader")
810

911

10-
def parse_args(args=None):
12+
def parse_args(args: Optional[List[str]] = None) -> argparse.Namespace:
1113
parser = argparse.ArgumentParser()
1214
parser.add_argument(
1315
"--output-protocol",
@@ -47,7 +49,7 @@ def parse_args(args=None):
4749
return parser.parse_args(args)
4850

4951

50-
def setup_logging():
52+
def setup_logging() -> None:
5153
import logging
5254
import warnings
5355
import rich.logging
@@ -88,10 +90,10 @@ def setup_logging():
8890
}
8991

9092

91-
def main(args=None):
93+
def main(inp: Optional[List[str]] = None) -> int:
9294
import azure.data.tables
9395

94-
args = parse_args(args)
96+
args = parse_args(inp)
9597

9698
skip = set(SKIP)
9799
if args.extra_skip:
@@ -112,7 +114,7 @@ def main(args=None):
112114
"credential": args.storage_options_credential,
113115
}
114116

115-
def f(config):
117+
def f(config: pc_runner.CollectionConfig) -> None:
116118
config.export_collection(
117119
args.connection_info,
118120
args.output_protocol,

stac_geoparquet/from_arrow.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
"""Convert STAC Items in Arrow Table format to JSON Lines or Python dicts."""
22

3+
import os
34
import json
4-
from typing import Iterable, List
5+
from typing import Iterable, List, Union
56

67
import numpy as np
78
import pyarrow as pa
89
import pyarrow.compute as pc
910
import shapely
1011

1112

12-
def stac_table_to_ndjson(table: pa.Table, dest: str):
13+
def stac_table_to_ndjson(table: pa.Table, dest: Union[str, os.PathLike[str]]) -> None:
1314
"""Write a STAC Table to a newline-delimited JSON file."""
1415
with open(dest, "w") as f:
1516
for item_dict in stac_table_to_items(table):

stac_geoparquet/pc_runner.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import json
4+
from typing import Any
45

56
import azure.data.tables
67
import requests
@@ -79,7 +80,7 @@
7980
}
8081

8182

82-
def build_render_config(render_params, assets):
83+
def build_render_config(render_params: dict[str, Any], assets: dict[str, Any]) -> str:
8384
flat = []
8485
if assets:
8586
for asset in assets:
@@ -93,7 +94,9 @@ def build_render_config(render_params, assets):
9394
return urllib.parse.urlencode(flat)
9495

9596

96-
def generate_configs_from_storage_table(table_client: azure.data.tables.TableClient):
97+
def generate_configs_from_storage_table(
98+
table_client: azure.data.tables.TableClient,
99+
) -> dict[str, CollectionConfig]:
97100
configs = {}
98101
for entity in table_client.list_entities():
99102
collection_id = entity["RowKey"]
@@ -109,7 +112,7 @@ def generate_configs_from_storage_table(table_client: azure.data.tables.TableCli
109112
return configs
110113

111114

112-
def generate_configs_from_api(url):
115+
def generate_configs_from_api(url: str) -> dict[str, CollectionConfig]:
113116
configs = {}
114117
r = requests.get(url)
115118
r.raise_for_status()
@@ -131,7 +134,7 @@ def generate_configs_from_api(url):
131134

132135
def merge_configs(
133136
table_configs: dict[str, CollectionConfig], api_configs: dict[str, CollectionConfig]
134-
):
137+
) -> dict[str, CollectionConfig]:
135138
# what a mess. Get partitioning config from the API, render from the table.
136139
configs = {}
137140
for k in table_configs.keys() | api_configs.keys():
@@ -142,9 +145,12 @@ def merge_configs(
142145
if api_config:
143146
config.partition_frequency = api_config.partition_frequency
144147
configs[k] = config
148+
return configs
145149

146150

147-
def get_configs(table_client):
151+
def get_configs(
152+
table_client: azure.data.tables.TableClient,
153+
) -> dict[str, CollectionConfig]:
148154
table_configs = generate_configs_from_storage_table(table_client)
149155
api_configs = generate_configs_from_api(
150156
"https://planetarycomputer.microsoft.com/api/stac/v1/collections"

stac_geoparquet/pgstac_reader.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class CollectionConfig:
4646
should_inject_dynamic_properties: bool = True
4747
render_config: str | None = None
4848

49-
def __post_init__(self):
49+
def __post_init__(self) -> None:
5050
self._collection: pystac.Collection | None = None
5151

5252
@property
@@ -146,8 +146,8 @@ def export_partition(
146146
output_protocol: str,
147147
output_path: str,
148148
storage_options: dict[str, Any] | None = None,
149-
rewrite=False,
150-
skip_empty_partitions=False,
149+
rewrite: bool = False,
150+
skip_empty_partitions: bool = False,
151151
) -> str | None:
152152
storage_options = storage_options or {}
153153
az_fs = fsspec.filesystem(output_protocol, **storage_options)
@@ -157,6 +157,7 @@ def export_partition(
157157

158158
db = pypgstac.db.PgstacDB(conninfo)
159159
with db:
160+
assert db.connection is not None
160161
db.connection.execute("set statement_timeout = 300000;")
161162
# logger.debug("Reading base item")
162163
# TODO: proper escaping
@@ -169,7 +170,7 @@ def export_partition(
169170
logger.debug("No records found for query %s.", query)
170171
return None
171172

172-
items = self.make_pgstac_items(records, base_item)
173+
items = self.make_pgstac_items(records, base_item) # type: ignore[arg-type]
173174
df = to_geodataframe(items)
174175
filesystem = pyarrow.fs.PyFileSystem(pyarrow.fs.FSSpecHandler(az_fs))
175176
df.to_parquet(output_path, index=False, filesystem=filesystem)
@@ -184,8 +185,8 @@ def export_partition_for_endpoints(
184185
storage_options: dict[str, Any],
185186
part_number: int | None = None,
186187
total: int | None = None,
187-
rewrite=False,
188-
skip_empty_partitions=False,
188+
rewrite: bool = False,
189+
skip_empty_partitions: bool = False,
189190
) -> str | None:
190191
"""
191192
Export results for a pair of endpoints.
@@ -221,8 +222,8 @@ def export_collection(
221222
output_protocol: str,
222223
output_path: str,
223224
storage_options: dict[str, Any],
224-
rewrite=False,
225-
skip_empty_partitions=False,
225+
rewrite: bool = False,
226+
skip_empty_partitions: bool = False,
226227
) -> list[str | None]:
227228
base_query = textwrap.dedent(
228229
f"""\

stac_geoparquet/stac_geoparquet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
SELF_LINK_COLUMN = "self_link"
2525

2626

27-
def _fix_array(v):
27+
def _fix_array(v: Any) -> Any:
2828
if isinstance(v, np.ndarray):
2929
v = v.tolist()
3030

stac_geoparquet/to_arrow.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from copy import deepcopy
55
from datetime import datetime
66
from pathlib import Path
7-
from typing import Any, Dict, List, Optional, Sequence, Union
7+
from typing import Any, Dict, List, Optional, Sequence, Union, Generator
88

99
import ciso8601
1010
import numpy as np
@@ -14,7 +14,9 @@
1414
import shapely.geometry
1515

1616

17-
def _chunks(lst: Sequence[Dict[str, Any]], n: int):
17+
def _chunks(
18+
lst: Sequence[Dict[str, Any]], n: int
19+
) -> Generator[Sequence[Dict[str, Any]], None, None]:
1820
"""Yield successive n-sized chunks from lst."""
1921
for i in range(0, len(lst), n):
2022
yield lst[i : i + n]
@@ -67,7 +69,7 @@ def parse_stac_ndjson_to_arrow(
6769
*,
6870
chunk_size: int = 8192,
6971
schema: Optional[pa.Schema] = None,
70-
):
72+
) -> pa.Table:
7173
# Define outside of if/else to make mypy happy
7274
items: List[dict] = []
7375

0 commit comments

Comments
 (0)