|
17 | 17 | from zarr.errors import ZarrUserWarning |
18 | 18 | from zarr.storage import FsspecStore |
19 | 19 | from zarr.storage._fsspec import _make_async |
20 | | -from zarr.storage._utils import normalize_path |
21 | 20 | from zarr.testing.store import StoreTests |
22 | 21 |
|
23 | 22 | if TYPE_CHECKING: |
@@ -287,25 +286,114 @@ def array_roundtrip(store: FsspecStore) -> None: |
287 | 286 | np.testing.assert_array_equal(arr[:], data) |
288 | 287 |
|
289 | 288 |
|
290 | | -@pytest.mark.skipif( |
291 | | - parse_version(fsspec.__version__) < parse_version("2024.12.0"), |
292 | | - reason="No AsyncFileSystemWrapper", |
| 289 | +@pytest.mark.parametrize( |
| 290 | + ("root", "key", "expected"), |
| 291 | + [ |
| 292 | + # `"/"` as root collapses so that bare-key backends (notably |
| 293 | + # ReferenceFileSystem) get the right key. Regression test for |
| 294 | + # https://github.com/zarr-developers/zarr-python/issues/3922 . |
| 295 | + ("/", "zarr.json", "zarr.json"), |
| 296 | + ("", "zarr.json", "zarr.json"), |
| 297 | + # Trailing slashes on the root are stripped before joining. |
| 298 | + ("foo/", "zarr.json", "foo/zarr.json"), |
| 299 | + ("foo", "zarr.json", "foo/zarr.json"), |
| 300 | + # Leading slashes on the root are preserved -- absolute filesystem |
| 301 | + # paths must stay absolute. Regression test for the titiler-xarray |
| 302 | + # breakage that #3924 introduced when `normalize_path` was applied to |
| 303 | + # `FsspecStore.path`. |
| 304 | + ("/home/runner/data.zarr", "zarr.json", "/home/runner/data.zarr/zarr.json"), |
| 305 | + ("/home/runner/data.zarr/", "zarr.json", "/home/runner/data.zarr/zarr.json"), |
| 306 | + # Multi-segment keys. |
| 307 | + ("/home/foo", "a/b/zarr.json", "/home/foo/a/b/zarr.json"), |
| 308 | + ("", "a/b/zarr.json", "a/b/zarr.json"), |
| 309 | + # Trailing slash on the result is stripped (relevant when key is ""). |
| 310 | + ("/home/foo", "", "/home/foo"), |
| 311 | + ], |
293 | 312 | ) |
294 | | -@pytest.mark.parametrize("path", ["", "/", "//", "foo", "foo/", "/foo", "/foo/", "//foo//"]) |
295 | | -def test_fsspec_store_path_normalization(path: str) -> None: |
296 | | - """`FsspecStore.path` is normalized to the canonical form, matching |
297 | | - `normalize_path`, regardless of the surface representation the caller |
298 | | - supplies. |
299 | | -
|
300 | | - Regression test for https://github.com/zarr-developers/zarr-python/issues/3922 |
301 | | - -- when a caller passed `path="/"` the leading slash flowed through |
302 | | - unmodified to subsequent `_join_paths([self.path, key])` calls, producing |
303 | | - `"//key"` and missing the underlying object. |
| 313 | +def test_dereference_path(root: str, key: str, expected: str) -> None: |
| 314 | + """Verify the contract `_dereference_path` provides for `FsspecStore`. |
| 315 | +
|
| 316 | + `FsspecStore.path` is stored verbatim; the join with a key must collapse a |
| 317 | + sentinel `"/"` root, strip trailing slashes, and preserve leading |
| 318 | + slashes on absolute paths. |
| 319 | + """ |
| 320 | + from zarr.storage._utils import _dereference_path |
| 321 | + |
| 322 | + assert _dereference_path(root, key) == expected |
| 323 | + |
| 324 | + |
| 325 | +async def test_fsspec_store_open_group_via_reference_filesystem() -> None: |
| 326 | + """End-to-end regression test for |
| 327 | + https://github.com/zarr-developers/zarr-python/issues/3922 . |
| 328 | +
|
| 329 | + ``ReferenceFileSystem`` keys its refs by bare strings like ``"zarr.json"``. |
| 330 | + The bug was that ``FsspecStore(fs=ref_fs, path="/")`` produced |
| 331 | + ``"//zarr.json"`` at the join site and failed to find the entry, raising |
| 332 | + ``GroupNotFoundError``. This test pins ``path="/"`` explicitly to keep |
| 333 | + coverage even if the default value changes later. |
| 334 | + """ |
| 335 | + import json |
| 336 | + |
| 337 | + from fsspec.implementations.reference import ReferenceFileSystem |
| 338 | + |
| 339 | + group_json = json.dumps({"zarr_format": 3, "node_type": "group", "attributes": {}}) |
| 340 | + fs = ReferenceFileSystem( |
| 341 | + fo={"version": 1, "refs": {"zarr.json": group_json}}, |
| 342 | + asynchronous=True, |
| 343 | + ) |
| 344 | + store = FsspecStore(fs=fs, path="/", read_only=True) |
| 345 | + group = await zarr.api.asynchronous.open_group(store, mode="r") |
| 346 | + assert group.metadata.zarr_format == 3 |
| 347 | + |
| 348 | + |
| 349 | +async def test_fsspec_store_read_array_chunk_via_reference_filesystem() -> None: |
| 350 | + """End-to-end regression test that exercises the byte-range read path |
| 351 | + against ``ReferenceFileSystem``. |
| 352 | +
|
| 353 | + Beyond opening a group (covered by |
| 354 | + ``test_fsspec_store_open_group_via_reference_filesystem``), this test |
| 355 | + constructs a small zarr v3 array whose chunk lives in the refs dict and |
| 356 | + reads it through the store. Path-handling bugs on the byte-range |
| 357 | + fetch path (used by kerchunk-style virtualization) would surface here |
| 358 | + rather than at metadata-open time. |
304 | 359 | """ |
305 | | - sync_fs = fsspec.filesystem("memory") |
306 | | - fs = _make_async(sync_fs) |
307 | | - store = FsspecStore(fs=fs, path=path) |
308 | | - assert store.path == normalize_path(path) |
| 360 | + import json |
| 361 | + |
| 362 | + import numpy as np |
| 363 | + from fsspec.implementations.reference import ReferenceFileSystem |
| 364 | + |
| 365 | + # Construct a minimal v3 zarr: a single 1-D uint8 array of length 4 with |
| 366 | + # one chunk of size 4. The chunk bytes are little-endian uint8s 1..4. |
| 367 | + array_meta = json.dumps( |
| 368 | + { |
| 369 | + "zarr_format": 3, |
| 370 | + "node_type": "array", |
| 371 | + "shape": [4], |
| 372 | + "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": [4]}}, |
| 373 | + "data_type": "uint8", |
| 374 | + "chunk_key_encoding": {"name": "default", "configuration": {"separator": "/"}}, |
| 375 | + "fill_value": 0, |
| 376 | + "codecs": [{"name": "bytes", "configuration": {"endian": "little"}}], |
| 377 | + "attributes": {}, |
| 378 | + } |
| 379 | + ) |
| 380 | + chunk_bytes = bytes([1, 2, 3, 4]) |
| 381 | + |
| 382 | + refs: dict[str, str] = { |
| 383 | + "zarr.json": array_meta, |
| 384 | + # ReferenceFileSystem accepts raw bytes via base64 encoding or |
| 385 | + # latin-1-decoded strings; latin-1 round-trips bytes 1:1. |
| 386 | + "c/0": chunk_bytes.decode("latin-1"), |
| 387 | + } |
| 388 | + |
| 389 | + fs = ReferenceFileSystem( |
| 390 | + fo={"version": 1, "refs": refs}, |
| 391 | + asynchronous=True, |
| 392 | + ) |
| 393 | + store = FsspecStore(fs=fs, path="/", read_only=True) |
| 394 | + array = await zarr.api.asynchronous.open_array(store=store, mode="r") |
| 395 | + data = await array.getitem(slice(None)) |
| 396 | + np.testing.assert_array_equal(data, np.array([1, 2, 3, 4], dtype="uint8")) |
309 | 397 |
|
310 | 398 |
|
311 | 399 | @pytest.mark.skipif( |
|
0 commit comments