Skip to content

Commit f928250

Browse files
author
Emma Ai
committed
unhacking lineage and metadata assemble
1 parent 3c91b5b commit f928250

File tree

1 file changed

+32
-49
lines changed

1 file changed

+32
-49
lines changed

odc/stats/model.py

Lines changed: 32 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import pystac
1313
import xarray as xr
1414
from datacube.model import Dataset
15+
from datacube.model.lineage import LineageTree
1516
from datacube.utils.dates import normalise_dt
1617
from odc.geo.geobox import GeoBox
1718
from ._text import split_and_check
@@ -20,7 +21,7 @@
2021
from rasterio.crs import CRS
2122
import warnings
2223

23-
from eodatasets3.assemble import DatasetAssembler, serialise
24+
from eodatasets3.assemble import DatasetAssembler, serialise, _validate_property_name
2425
from eodatasets3.images import GridSpec
2526

2627
from .plugins import StatsPluginInterface
@@ -313,14 +314,13 @@ def location(self) -> str:
313314
return "/".join([p1, p2, self.short_time])
314315

315316
def _lineage(self) -> tuple[UUID, ...]:
316-
ds, *_ = self.datasets
317-
318-
if ds.metadata_doc["properties"].get("fused", False):
319-
lineage = tuple({x for ds in self.datasets for x in ds.metadata.sources})
320-
else:
321-
lineage = tuple(ds.id for ds in self.datasets)
322-
323-
return lineage
317+
lineage = set()
318+
for ds in self.datasets:
319+
tree = LineageTree.from_eo3_doc(ds.metadata_doc)
320+
lineage |= (
321+
tree.child_datasets() if tree.child_datasets() else {tree.dataset_id}
322+
)
323+
return tuple(lineage)
324324

325325
def _prefix(self, relative_to: str = "dataset") -> str:
326326
product = self.product
@@ -386,7 +386,7 @@ def render_assembler_metadata(
386386
Put together metadata document for the output of this task. It needs the source_dataset to inherit
387387
several properties and lineages. It also needs the output_dataset to get the measurement information.
388388
"""
389-
# pylint:disable=too-many-branches
389+
# pylint:disable=too-many-branches,protected-access
390390
dataset_assembler = DatasetAssembler(
391391
naming_conventions=self.product.naming_conventions_values,
392392
dataset_location=Path(self.product.explorer_path),
@@ -398,47 +398,30 @@ def render_assembler_metadata(
398398

399399
platforms, instruments = ([], [])
400400

401+
_validate_property_name(self.product.classifier)
401402
for dataset in self.datasets:
402-
if dataset.metadata_doc["properties"].get("fused", False):
403-
if dataset.metadata_doc["properties"].get("eo:platform") is not None:
404-
platforms.append(dataset.metadata_doc["properties"]["eo:platform"])
405-
if dataset.metadata_doc["properties"].get("eo:instrument") is not None:
406-
if isinstance(
407-
dataset.metadata_doc["properties"]["eo:instrument"], list
408-
):
409-
instruments += dataset.metadata_doc["properties"][
410-
"eo:instrument"
411-
]
412-
else:
413-
instruments += [
414-
dataset.metadata_doc["properties"]["eo:instrument"]
415-
]
416-
dataset_assembler.note_source_datasets(
417-
self.product.classifier, *dataset.metadata.sources
418-
)
419-
else:
420-
dataset.metadata_doc.setdefault("$schema", "")
421-
source_datasetdoc = serialise.from_doc(
422-
dataset.metadata_doc, skip_validation=True
423-
)
424-
dataset_assembler.add_source_dataset(
425-
source_datasetdoc,
426-
classifier=self.product.classifier,
427-
auto_inherit_properties=True, # it will grab all useful input dataset preperties
428-
inherit_geometry=False,
429-
inherit_skip_properties=self.product.inherit_skip_properties,
430-
)
431-
432-
if source_datasetdoc.properties.get("eo:platform") is not None:
433-
platforms.append(source_datasetdoc.properties["eo:platform"])
434-
if source_datasetdoc.properties.get("eo:instrument") is not None:
435-
if isinstance(source_datasetdoc.properties["eo:instrument"], list):
436-
instruments += source_datasetdoc.properties["eo:instrument"]
437-
else:
438-
instruments.append(
439-
source_datasetdoc.properties["eo:instrument"]
440-
)
403+
if dataset.metadata_doc["properties"].get("eo:platform") is not None:
404+
platforms.append(dataset.metadata_doc["properties"]["eo:platform"])
405+
if dataset.metadata_doc["properties"].get("eo:instrument") is not None:
406+
if isinstance(
407+
dataset.metadata_doc["properties"]["eo:instrument"], list
408+
):
409+
instruments += dataset.metadata_doc["properties"]["eo:instrument"]
410+
else:
411+
instruments += [dataset.metadata_doc["properties"]["eo:instrument"]]
412+
413+
dataset.metadata_doc.setdefault("$schema", "")
414+
source_datasetdoc = serialise.from_doc(
415+
dataset.metadata_doc, skip_validation=True
416+
)
417+
# it will grab all useful input dataset preperties
418+
dataset_assembler._inherit_properties_from(
419+
source_datasetdoc, self.product.inherit_skip_properties
420+
)
441421

422+
dataset_assembler.note_source_datasets(
423+
self.product.classifier, *self._lineage()
424+
)
442425
dataset_assembler.platform = ",".join(sorted(set(platforms)))
443426
dataset_assembler.instrument = "_".join(sorted(set(instruments)))
444427

0 commit comments

Comments
 (0)