@@ -84,35 +84,50 @@ def collection_func(_: Collection, parent_dir: str, is_root: bool) -> str:
84
84
return str (Path (parent_dir ) / target .name )
85
85
86
86
def item_func (item : Item , parent_dir : str ) -> str :
87
- # prevent items/assets of 2 adjacent Collection documents from interfering with each other:
87
+ # item ID can be a relative_asset_path but does not have to be
88
+ unique_item_filename = item .id .replace ("/" , "_" )
89
+ # prevent items/assets of 2 adjacent Collection documents from interfering with each other;
88
90
# unlike an object storage object, a Collection file cannot act as a parent "directory" as well
89
- return f"{ parent_dir } /{ target .name } _items/{ item . id } .json"
91
+ return f"{ parent_dir } /{ target .name } _items/{ unique_item_filename } .json"
90
92
91
93
return CustomLayoutStrategy (collection_func = collection_func , item_func = item_func )
92
94
93
- def replace_asset_href (asset_key : str , asset : Asset ) -> Asset :
94
- if urlparse (asset .href ).scheme not in ["" , "file" ]: # TODO: convenient place; move elsewhere?
95
+ def replace_asset_href (asset : Asset , src_collection_path : Path ) -> Asset :
96
+ # pystac will handle STAC but not underlying assets; set asset hrefs up front
97
+ asset_uri_parts = urlparse (asset .get_absolute_href ())
98
+ if asset_uri_parts .scheme not in ["" , "file" ]: # TODO: convenient place; move elsewhere?
95
99
raise NotImplementedError (f"only importing files on disk is supported, found: { asset .href } " )
96
100
101
+ absolute_asset_path = Path (asset_uri_parts .path )
97
102
# TODO: crummy way to export assets after STAC Collection has been written to disk with new asset hrefs;
98
103
# it ends up in the asset metadata on disk
99
- asset .extra_fields ["_original_absolute_href" ] = asset .get_absolute_href ()
100
- asset .href = Path (asset_key ).name # asset key matches the asset filename, becomes the relative path
104
+ asset .extra_fields ["_original_absolute_path" ] = str (absolute_asset_path )
105
+ relative_asset_path = absolute_asset_path .relative_to (src_collection_path .parent )
106
+ asset .href = str (relative_asset_path ) # relative to item document
101
107
return asset
102
108
109
+ new_collection_path = Path (new_collection .get_self_href ())
110
+
103
111
if not existing_collection :
104
112
new_collection .normalize_hrefs (root_href = str (target .parent ), strategy = href_layout_strategy ())
105
- new_collection = new_collection .map_assets (replace_asset_href )
113
+ new_collection = new_collection .map_assets (
114
+ lambda _ , asset : replace_asset_href (asset , src_collection_path = new_collection_path )
115
+ )
106
116
new_collection .save (CatalogType .SELF_CONTAINED )
107
117
108
118
for new_item in new_collection .get_items ():
109
119
for asset in new_item .get_assets ().values ():
110
- file_operation (
111
- asset .extra_fields ["_original_absolute_href" ], str (Path (new_item .get_self_href ()).parent )
112
- )
120
+ relative_asset_path = asset .href
121
+ asset_parent_dir = (
122
+ Path (new_collection .get_self_href ()).parent / f"{ target .name } _items" / relative_asset_path
123
+ ).parent
124
+ asset_parent_dir .mkdir (parents = True , exist_ok = True ) # asset might not end up next to item
125
+ file_operation (asset .extra_fields ["_original_absolute_path" ], str (asset_parent_dir ))
113
126
else :
114
127
merged_collection = _merge_collection_metadata (existing_collection , new_collection )
115
- new_collection = new_collection .map_assets (replace_asset_href )
128
+ new_collection = new_collection .map_assets (
129
+ lambda _ , asset : replace_asset_href (asset , src_collection_path = new_collection_path )
130
+ )
116
131
117
132
for new_item in new_collection .get_items ():
118
133
new_item .clear_links () # sever ties with previous collection
@@ -123,9 +138,14 @@ def replace_asset_href(asset_key: str, asset: Asset) -> Asset:
123
138
124
139
for new_item in new_collection .get_items ():
125
140
for asset in new_item .get_assets ().values ():
126
- file_operation (
127
- asset .extra_fields ["_original_absolute_href" ], Path (new_item .get_self_href ()).parent
128
- )
141
+ relative_asset_path = asset .href
142
+ asset_parent_dir = (
143
+ Path (merged_collection .get_self_href ()).parent
144
+ / f"{ target .name } _items"
145
+ / relative_asset_path
146
+ ).parent
147
+ asset_parent_dir .mkdir (parents = True , exist_ok = True )
148
+ file_operation (asset .extra_fields ["_original_absolute_path" ], str (asset_parent_dir ))
129
149
130
150
for item in new_collection .get_items ():
131
151
for asset in item .assets .values ():
0 commit comments