From 2764e0f9074a67cf0114907bd3262ed00093e688 Mon Sep 17 00:00:00 2001 From: "Peter A. Jonsson" Date: Tue, 2 Sep 2025 14:21:21 +0200 Subject: [PATCH 1/2] s3_to_dc: remove dead code The variable is assigned to in both branches in the following if-statement. --- apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py index 395f68e1..3b5a5e6c 100755 --- a/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py @@ -286,7 +286,6 @@ def cli( "Any wildcard characters will be escaped." ) # Get a generator from supplied S3 Uri for candidate documents - fetcher = None # Grab the URL from the resulting S3 item if is_glob: fetcher = S3Fetcher(aws_unsigned=no_sign_request) From c2faafab1395b25074712d43dca48d5f9ab062b7 Mon Sep 17 00:00:00 2001 From: "Peter A. Jonsson" Date: Tue, 2 Sep 2025 14:06:34 +0200 Subject: [PATCH 2/2] Use function from datacube The parse_doc_stream function has been integrated into datacube with the 1.9.9 release. --- apps/dc_tools/odc/apps/dc_tools/_docs.py | 34 +-------------------- apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py | 2 +- apps/dc_tools/pyproject.toml | 2 +- tests/test-env.yml | 2 +- 4 files changed, 4 insertions(+), 36 deletions(-) diff --git a/apps/dc_tools/odc/apps/dc_tools/_docs.py b/apps/dc_tools/odc/apps/dc_tools/_docs.py index 30d0cd7e..40468c2e 100644 --- a/apps/dc_tools/odc/apps/dc_tools/_docs.py +++ b/apps/dc_tools/odc/apps/dc_tools/_docs.py @@ -1,12 +1,11 @@ """These should probably be in datacube library.""" -import json import sys from typing import Sequence, Union from uuid import UUID, uuid5 from datacube.index.hl import Doc2Dataset -from datacube.utils.documents import parse_yaml +from datacube.utils.documents import parse_doc_stream # Some random UUID to be ODC namespace ODC_NS = UUID("6f34c6f4-13d6-43c0-8e4e-42b6c13203af") @@ -73,37 +72,6 @@ def from_metadata_stream(metadata_stream, index, **kwargs): yield (None, f"Error: {uri}, {err}") -def parse_doc_stream(doc_stream, on_error=None, transform=None): - """ - Replace doc bytes/strings with parsed dicts. - - Stream[(uri, bytes)] -> Stream[(uri, dict)] - - - :param doc_stream: sequence of (uri, doc: bytes|string) - :param on_error: Callback uri, doc -> None - :param transform: dict -> dict if supplied also apply further transform on parsed document - - On output doc is replaced with python dict parsed from yaml, or with None - if parsing/transform error occurred. - """ - for uri, doc in doc_stream: - try: - if uri.endswith(".json"): - metadata = json.loads(doc) - else: - metadata = parse_yaml(doc) - - if transform is not None: - metadata = transform(metadata) - except Exception: # pylint: disable=broad-except - if on_error is not None: - on_error(uri, doc) - metadata = None - - yield uri, metadata - - def from_yaml_doc_stream(doc_stream, index, logger=None, transform=None, **kwargs): """ Stream of yaml documents to a stream of Dataset results. diff --git a/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py b/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py index 3b5a5e6c..d2b04ef4 100755 --- a/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py +++ b/apps/dc_tools/odc/apps/dc_tools/s3_to_dc.py @@ -13,8 +13,8 @@ from datacube import Datacube from datacube.index.hl import Doc2Dataset from datacube.ui.click import environment_option, pass_config +from datacube.utils.documents import parse_doc_stream from odc.aio import S3Fetcher, s3_find_glob -from odc.apps.dc_tools._docs import parse_doc_stream from odc.apps.dc_tools.utils import ( IndexingException, SkippedException, diff --git a/apps/dc_tools/pyproject.toml b/apps/dc_tools/pyproject.toml index 7071de69..f945307c 100644 --- a/apps/dc_tools/pyproject.toml +++ b/apps/dc_tools/pyproject.toml @@ -7,7 +7,7 @@ authors = [ ] dependencies = [ "click", - "datacube>=1.9.6", + "datacube>=1.9.9", "datadog", "eodatasets3>=1.9", "fsspec", diff --git a/tests/test-env.yml b/tests/test-env.yml index 3f89422a..404458bb 100644 --- a/tests/test-env.yml +++ b/tests/test-env.yml @@ -10,7 +10,7 @@ dependencies: - python=3.12 # Datacube - - datacube>=1.9.0 + - datacube>=1.9.9 - sqlalchemy>=2.0 # odc.ui