Skip to content

Commit 302d50f

Browse files
issue 1111 merge new stac
1 parent 1e2f9a5 commit 302d50f

File tree

2 files changed

+24
-12
lines changed

2 files changed

+24
-12
lines changed

openeo_driver/workspace.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,29 +90,37 @@ def item_func(item: Item, parent_dir: str) -> str:
9090

9191
return CustomLayoutStrategy(collection_func=collection_func, item_func=item_func)
9292

93-
def replace_asset_href(asset_key: str, asset: Asset) -> Asset:
93+
def replace_asset_href(asset_key: str, asset: Asset, collection_href:str) -> Asset:
9494
if urlparse(asset.href).scheme not in ["", "file"]: # TODO: convenient place; move elsewhere?
9595
raise NotImplementedError(f"only importing files on disk is supported, found: {asset.href}")
9696

9797
# TODO: crummy way to export assets after STAC Collection has been written to disk with new asset hrefs;
9898
# it ends up in the asset metadata on disk
99-
asset.extra_fields["_original_absolute_href"] = asset.get_absolute_href()
100-
asset.href = Path(asset_key).name # asset key matches the asset filename, becomes the relative path
99+
asset_href = asset.get_absolute_href()
100+
asset.extra_fields["_original_absolute_href"] = asset_href
101+
if asset_href.startswith("s3"):
102+
asset.href = Path(asset_href).name
103+
else:
104+
common_path = os.path.commonpath([asset_href,collection_href])
105+
asset.href = os.path.relpath(asset_href,common_path)
101106
return asset
102107

108+
collection_href = new_collection.get_self_href()
103109
if not existing_collection:
104110
new_collection.normalize_hrefs(root_href=str(target.parent), strategy=href_layout_strategy())
105-
new_collection = new_collection.map_assets(replace_asset_href)
111+
new_collection = new_collection.map_assets(lambda k,v: replace_asset_href(k,v,collection_href))
106112
new_collection.save(CatalogType.SELF_CONTAINED)
107113

108114
for new_item in new_collection.get_items():
109115
for asset in new_item.get_assets().values():
116+
asset_path = Path(new_item.get_self_href()).parent / Path(asset.href).parent
117+
asset_path.mkdir(parents=True)
110118
file_operation(
111-
asset.extra_fields["_original_absolute_href"], str(Path(new_item.get_self_href()).parent)
119+
asset.extra_fields["_original_absolute_href"], str(asset_path)
112120
)
113121
else:
114122
merged_collection = _merge_collection_metadata(existing_collection, new_collection)
115-
new_collection = new_collection.map_assets(replace_asset_href)
123+
new_collection = new_collection.map_assets(lambda k,v: replace_asset_href(k,v,collection_href))
116124

117125
for new_item in new_collection.get_items():
118126
new_item.clear_links() # sever ties with previous collection
@@ -123,13 +131,15 @@ def replace_asset_href(asset_key: str, asset: Asset) -> Asset:
123131

124132
for new_item in new_collection.get_items():
125133
for asset in new_item.get_assets().values():
134+
asset_path = Path(new_item.get_self_href()).parent / Path(asset.href).parent
135+
asset_path.mkdir(parents=True)
126136
file_operation(
127-
asset.extra_fields["_original_absolute_href"], Path(new_item.get_self_href()).parent
137+
asset.extra_fields["_original_absolute_href"], str(asset_path)
128138
)
129139

130140
for item in new_collection.get_items():
131141
for asset in item.assets.values():
132-
workspace_uri = f"file:{Path(item.get_self_href()).parent / Path(asset.href).name}"
142+
workspace_uri = f"file:{Path(item.get_self_href()).parent / asset.href}"
133143
asset.extra_fields["alternate"] = {"file": workspace_uri}
134144

135145
return new_collection

tests/test_workspace.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_merge_from_disk_new(tmp_path):
6767
for asset_key, asset in item.get_assets().items()
6868
}
6969
assert asset_workspace_uris == {
70-
"asset.tif": f"file:{workspace.root_directory / 'path' / 'to' / 'collection.json_items' / 'asset.tif'}"
70+
"asset.tif": f"file:{workspace.root_directory / 'path' / 'to' / 'collection.json_items' / 'asset.tif' / 'asset.tif'}"
7171
}
7272

7373
# load it again
@@ -81,7 +81,8 @@ def test_merge_from_disk_new(tmp_path):
8181

8282
for item in exported_collection.get_items():
8383
for asset in item.get_assets().values():
84-
assert Path(item.get_self_href()).parent == Path(asset.get_absolute_href()).parent
84+
item_path_parts = Path(item.get_self_href()).parent.parts
85+
assert item_path_parts == Path(asset.get_absolute_href()).parts[0:len(item_path_parts)]
8586

8687

8788
def test_merge_from_disk_into_existing(tmp_path):
@@ -115,7 +116,7 @@ def test_merge_from_disk_into_existing(tmp_path):
115116
for asset_key, asset in item.get_assets().items()
116117
}
117118
assert asset_workspace_uris == {
118-
"asset2.tif": f"file:{workspace.root_directory / 'path' / 'to' / 'collection.json_items' / 'asset2.tif'}",
119+
"asset2.tif": f"file:{workspace.root_directory / 'path' / 'to' / 'collection.json_items'/ 'asset2.tif' / 'asset2.tif'}",
119120
}
120121

121122
# load it again
@@ -143,7 +144,8 @@ def test_merge_from_disk_into_existing(tmp_path):
143144

144145
for item in exported_collection.get_items():
145146
for asset in item.get_assets().values():
146-
assert Path(item.get_self_href()).parent == Path(asset.get_absolute_href()).parent
147+
item_path_parts = Path(item.get_self_href()).parent.parts
148+
assert item_path_parts == Path(asset.get_absolute_href()).parts[0:len(item_path_parts)]
147149

148150

149151
def test_adjacent_collections_do_not_have_interfering_items_and_assets(tmp_path):

0 commit comments

Comments
 (0)