Skip to content

Commit a1bced1

Browse files
authored
Merge pull request #228 from tokern/athena
feature: Athena
2 parents f89670a + b5911ca commit a1bced1

File tree

4 files changed

+36
-11
lines changed

4 files changed

+36
-11
lines changed

piicatcher/dbinfo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def get_sample_query(
143143

144144

145145
class Athena(Postgres):
146-
pass
146+
_sample_query_template = "SELECT {column_list} FROM {schema_name}.{table_name} ORDER BY RAND() LIMIT {num_rows}"
147147

148148

149149
def get_dbinfo(source_type: str, *args, **kwargs) -> DbInfo:

poetry.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "piicatcher"
3-
version = "0.21.1"
3+
version = "0.21.2"
44
description = "Find PII data in databases"
55
authors = ["Tokern <info@tokern.io>"]
66
license = "Apache 2.0"
@@ -28,7 +28,7 @@ pyyaml = "*"
2828
click = "*"
2929
python-json-logger = "^2.0.2"
3030
commonregex-improved = "1.0.2"
31-
dbcat = "0.14.1"
31+
dbcat = "0.14.2"
3232
typer = "^0.4.0"
3333
goog-stats = "^0.1.2"
3434
tabulate = "^0.8.9"

tests/test_generators.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,6 @@ def test_get_sample_query(sqlalchemy_engine):
174174
'SELECT "column" FROM public.table ORDER BY RANDOM() LIMIT 1',
175175
),
176176
("snowflake", "SELECT column FROM public.table TABLESAMPLE BERNOULLI (1 ROWS)"),
177-
(
178-
"athena",
179-
'SELECT "column" FROM public.table TABLESAMPLE BERNOULLI (10) LIMIT 1',
180-
),
181177
],
182178
)
183179
def test_get_sample_query_redshift(mocker, source_type, expected_query):
@@ -257,6 +253,35 @@ def test_get_select_query_bigquery(mocker, source_type, expected_query):
257253
assert query == expected_query
258254

259255

256+
@pytest.mark.parametrize(
257+
("source_type", "expected_query"),
258+
[
259+
(
260+
"athena",
261+
'SELECT "column" FROM public.table ORDER BY RAND() LIMIT 1',
262+
),
263+
],
264+
)
265+
def test_get_sample_query_athena(mocker, source_type, expected_query):
266+
source = CatSource(name="src", source_type=source_type)
267+
schema = CatSchema(source=source, name="public")
268+
table = CatTable(schema=schema, name="table")
269+
column = CatColumn(table=table, name="column")
270+
271+
mocker.patch("piicatcher.generators._get_table_count", return_value=100)
272+
query = _get_query(
273+
schema=schema,
274+
table=table,
275+
column_list=[column],
276+
dbinfo=get_dbinfo(source.source_type, schema, table),
277+
connection=None,
278+
sample_size=1,
279+
source=source,
280+
)
281+
282+
assert query == expected_query
283+
284+
260285
def test_row_generator(sqlalchemy_engine):
261286
catalog, source, conn = sqlalchemy_engine
262287
schemata = catalog.search_schema(source_like=source.name, schema_like="%")

0 commit comments

Comments
 (0)