Skip to content

Commit 2cfee9c

Browse files
authored
Zarr v3: support root path (#1085)
* support path='/' for zarr v3 to create a root array or group v3 spec states path = '/' for arrays gives /meta/root.array.json path = '/' for groups gives /meta/root.array.json In this implementation path = None or path = '' will also result in a root array. Creation routines default to path=None, so this makes it so that the path argument does not have to be manually specified. * revert change to normalize_storage_path update additional tests * fix * update TestArrayV3 to inherit all tests from TestArray * remove test bypass * fix nbytes_stored for v3 array without path update test_nbytes_stored to handle both v3 and v2 cases properly * pep8 * fix incorrect default value for at_root in _init_creation_kwargs previous behavior corresponded to at_root=True by default * flake8
1 parent 43266ee commit 2cfee9c

File tree

8 files changed

+143
-136
lines changed

8 files changed

+143
-136
lines changed

zarr/_storage/store.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -420,23 +420,23 @@ def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str
420420

421421
def _prefix_to_array_key(store: StoreLike, prefix: str) -> str:
422422
if getattr(store, "_store_version", 2) == 3:
423+
sfx = _get_metadata_suffix(store) # type: ignore
423424
if prefix:
424-
sfx = _get_metadata_suffix(store) # type: ignore
425425
key = meta_root + prefix.rstrip("/") + ".array" + sfx
426426
else:
427-
raise ValueError("prefix must be supplied to get a v3 array key")
427+
key = meta_root[:-1] + '.array' + sfx
428428
else:
429429
key = prefix + array_meta_key
430430
return key
431431

432432

433433
def _prefix_to_group_key(store: StoreLike, prefix: str) -> str:
434434
if getattr(store, "_store_version", 2) == 3:
435+
sfx = _get_metadata_suffix(store) # type: ignore
435436
if prefix:
436-
sfx = _get_metadata_suffix(store) # type: ignore
437437
key = meta_root + prefix.rstrip('/') + ".group" + sfx
438438
else:
439-
raise ValueError("prefix must be supplied to get a v3 group key")
439+
key = meta_root[:-1] + '.group' + sfx
440440
else:
441441
key = prefix + group_meta_key
442442
return key
@@ -449,7 +449,7 @@ def _prefix_to_attrs_key(store: StoreLike, prefix: str) -> str:
449449
if prefix:
450450
key = meta_root + prefix.rstrip('/') + ".array" + sfx
451451
else:
452-
raise ValueError("prefix must be supplied to get a v3 array key")
452+
key = meta_root[:-1] + '.array' + sfx
453453
else:
454454
key = prefix + attrs_key
455455
return key

zarr/creation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def create(shape, chunks=True, dtype=None, compressor='default',
163163
dimension_separator = normalize_dimension_separator(dimension_separator)
164164

165165
if zarr_version > 2 and path is None:
166-
raise ValueError("path must be supplied to initialize a zarr v3 array")
166+
path = '/'
167167

168168
# initialize array metadata
169169
init_array(store, shape=shape, chunks=chunks, dtype=dtype, compressor=compressor,

zarr/hierarchy.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,8 +1276,6 @@ def group(store=None, overwrite=False, chunk_store=None,
12761276
if zarr_version != 2:
12771277
assert_zarr_v3_api_available()
12781278

1279-
if zarr_version == 3 and path is None:
1280-
raise ValueError(f"path must be provided for a v{zarr_version} group")
12811279
path = normalize_storage_path(path)
12821280

12831281
if zarr_version == 2:
@@ -1366,10 +1364,6 @@ def open_group(store=None, mode='a', cache_attrs=True, synchronizer=None, path=N
13661364
"zarr_version of store and chunk_store must match"
13671365
)
13681366

1369-
store_version = getattr(store, '_store_version', 2)
1370-
if store_version == 3 and path is None:
1371-
raise ValueError("path must be supplied to initialize a zarr v3 group")
1372-
13731367
path = normalize_storage_path(path)
13741368

13751369
# ensure store is initialized

zarr/storage.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,14 @@ def _getsize(store: BaseStore, path: Path = None) -> int:
230230
size = 0
231231
store_version = getattr(store, '_store_version', 2)
232232
if store_version == 3:
233-
members = store.list_prefix(data_root + path) # type: ignore
234-
members += store.list_prefix(meta_root + path) # type: ignore
233+
if path == '':
234+
# have to list the root folders without trailing / in this case
235+
members = store.list_prefix(data_root.rstrip('/')) # type: ignore
236+
members += store.list_prefix(meta_root.rstrip('/')) # type: ignore
237+
else:
238+
members = store.list_prefix(data_root + path) # type: ignore
239+
members += store.list_prefix(meta_root + path) # type: ignore
240+
# also include zarr.json?
235241
# members += ['zarr.json']
236242
else:
237243
members = listdir(store, path)

zarr/tests/test_convenience.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,6 @@ def test_open_array(path_type, zarr_version):
7373
assert isinstance(z, Array)
7474
assert z.shape == (200,)
7575

76-
if zarr_version == 3:
77-
# cannot open a v3 array without path
78-
with pytest.raises(ValueError):
79-
open(store, mode='w', shape=200, zarr_version=3)
80-
8176
# open array, read-only
8277
z = open(store, mode='r', **kwargs)
8378
assert isinstance(z, Array)
@@ -108,11 +103,6 @@ def test_open_group(path_type, zarr_version):
108103
assert isinstance(g, Group)
109104
assert 'foo' not in g
110105

111-
if zarr_version == 3:
112-
# cannot open a v3 group without path
113-
with pytest.raises(ValueError):
114-
open(store, mode='w', zarr_version=3)
115-
116106
# open group, read-only
117107
g = open(store, mode='r', **kwargs)
118108
assert isinstance(g, Group)

zarr/tests/test_core.py

Lines changed: 64 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919

2020
from zarr._storage.store import (
2121
v3_api_available,
22-
_prefix_to_array_key,
23-
_prefix_to_attrs_key,
24-
_prefix_to_group_key
2522
)
2623
from zarr.core import Array
2724
from zarr.errors import ArrayNotFoundError, ContainsGroupError
@@ -64,56 +61,64 @@
6461
class TestArray(unittest.TestCase):
6562

6663
version = 2
64+
root = ''
65+
KVStoreClass = KVStore
6766

6867
def test_array_init(self):
6968

7069
# normal initialization
71-
store = KVStore(dict())
70+
store = self.KVStoreClass(dict())
7271
init_array(store, shape=100, chunks=10, dtype="<f8")
73-
a = Array(store)
72+
a = Array(store, zarr_version=self.version)
7473
assert isinstance(a, Array)
7574
assert (100,) == a.shape
7675
assert (10,) == a.chunks
7776
assert '' == a.path
7877
assert a.name is None
7978
assert a.basename is None
8079
assert store is a.store
81-
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()
80+
if self.version == 2:
81+
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()
82+
else:
83+
assert "968dccbbfc0139f703ead2fd1d503ad6e44db307" == a.hexdigest()
8284
store.close()
8385

8486
# initialize at path
85-
store = KVStore(dict())
87+
store = self.KVStoreClass(dict())
8688
init_array(store, shape=100, chunks=10, path='foo/bar', dtype='<f8')
87-
a = Array(store, path='foo/bar')
89+
a = Array(store, path='foo/bar', zarr_version=self.version)
8890
assert isinstance(a, Array)
8991
assert (100,) == a.shape
9092
assert (10,) == a.chunks
9193
assert 'foo/bar' == a.path
9294
assert '/foo/bar' == a.name
9395
assert 'bar' == a.basename
9496
assert store is a.store
95-
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()
96-
97+
if self.version == 2:
98+
assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest()
99+
else:
100+
assert "968dccbbfc0139f703ead2fd1d503ad6e44db307" == a.hexdigest()
97101
# store not initialized
98-
store = KVStore(dict())
102+
store = self.KVStoreClass(dict())
99103
with pytest.raises(ValueError):
100-
Array(store)
104+
Array(store, zarr_version=self.version)
101105

102106
# group is in the way
103-
store = KVStore(dict())
107+
store = self.KVStoreClass(dict())
104108
init_group(store, path='baz')
105109
with pytest.raises(ValueError):
106-
Array(store, path='baz')
110+
Array(store, path='baz', zarr_version=self.version)
107111

108112
def create_array(self, read_only=False, **kwargs):
109-
store = KVStore(dict())
113+
store = self.KVStoreClass(dict())
110114
kwargs.setdefault('compressor', Zlib(level=1))
111115
cache_metadata = kwargs.pop('cache_metadata', True)
112116
cache_attrs = kwargs.pop('cache_attrs', True)
113117
write_empty_chunks = kwargs.pop('write_empty_chunks', True)
114118
init_array(store, **kwargs)
115119
return Array(store, read_only=read_only, cache_metadata=cache_metadata,
116-
cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks)
120+
cache_attrs=cache_attrs, write_empty_chunks=write_empty_chunks,
121+
zarr_version=self.version)
117122

118123
def test_store_has_text_keys(self):
119124
# Initialize array
@@ -162,15 +167,28 @@ def test_nbytes_stored(self):
162167

163168
# dict as store
164169
z = self.create_array(shape=1000, chunks=100)
165-
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
170+
if self.version == 3:
171+
expect_nbytes_stored = sum(
172+
buffer_size(v) for k, v in z.store.items() if k != 'zarr.json'
173+
)
174+
else:
175+
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
166176
assert expect_nbytes_stored == z.nbytes_stored
167177
z[:] = 42
168-
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
178+
if self.version == 3:
179+
expect_nbytes_stored = sum(
180+
buffer_size(v) for k, v in z.store.items() if k != 'zarr.json'
181+
)
182+
else:
183+
expect_nbytes_stored = sum(buffer_size(v) for v in z.store.values())
169184
assert expect_nbytes_stored == z.nbytes_stored
170185

171186
# mess with store
172187
try:
173-
z.store[z._key_prefix + 'foo'] = list(range(10))
188+
if self.version == 2:
189+
z.store[z._key_prefix + 'foo'] = list(range(10))
190+
else:
191+
z.store['meta/root/foo'] = list(range(10))
174192
assert -1 == z.nbytes_stored
175193
except TypeError:
176194
pass
@@ -1003,7 +1021,7 @@ def test_nchunks_initialized(self):
10031021

10041022
assert 0 == z.nchunks_initialized
10051023
# manually put something into the store to confuse matters
1006-
z.store['foo'] = b'bar'
1024+
z.store[self.root + 'foo'] = b'bar'
10071025
assert 0 == z.nchunks_initialized
10081026
z[:] = 42
10091027
assert 10 == z.nchunks_initialized
@@ -2703,36 +2721,25 @@ def test_read_from_all_blocks(self):
27032721
# StoreV3 test classes inheriting from the above below this point
27042722
####
27052723

2706-
# Start with TestArrayWithPathV3 not TestArrayV3 since path must be supplied
2707-
27082724
@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled")
2709-
class TestArrayV3(unittest.TestCase):
2725+
class TestArrayV3(TestArray):
27102726

27112727
version = 3
2728+
root = meta_root
2729+
KVStoreClass = KVStoreV3
27122730

2713-
def test_array_init(self):
2714-
2715-
# normal initialization
2716-
store = KVStoreV3(dict())
2717-
with pytest.raises(ValueError):
2718-
# cannot init_array for v3 without a path
2719-
init_array(store, shape=100, chunks=10, dtype="<f8")
2720-
2721-
init_array(store, path='x', shape=100, chunks=10, dtype="<f8")
2722-
with pytest.raises(ValueError):
2723-
# cannot initialize a v3 array without a path
2724-
Array(store)
2725-
2726-
def test_prefix_exceptions(self):
2727-
store = KVStoreV3(dict())
2728-
with pytest.raises(ValueError):
2729-
_prefix_to_array_key(store, '')
2730-
2731-
with pytest.raises(ValueError):
2732-
_prefix_to_group_key(store, '')
2731+
def expected(self):
2732+
# tests for array without path will not be run for v3 stores
2733+
assert self.version == 3
2734+
return [
2735+
"73ab8ace56719a5c9308c3754f5e2d57bc73dc20",
2736+
"5fb3d02b8f01244721582929b3cad578aec5cea5",
2737+
"26b098bedb640846e18dc2fbc1c27684bb02b532",
2738+
"799a458c287d431d747bec0728987ca4fe764549",
2739+
"c780221df84eb91cb62f633f12d3f1eaa9cee6bd"
2740+
]
27332741

2734-
with pytest.raises(ValueError):
2735-
_prefix_to_attrs_key(store, '')
2742+
# TODO: fix test_nbytes_stored
27362743

27372744

27382745
@pytest.mark.skipif(not v3_api_available, reason="V3 is disabled")
@@ -2754,10 +2761,19 @@ def create_array(array_path='arr1', read_only=False, **kwargs):
27542761

27552762
def test_array_init(self):
27562763

2757-
# should not be able to initialize without a path in V3
27582764
store = KVStoreV3(dict())
2759-
with pytest.raises(ValueError):
2760-
init_array(store, shape=100, chunks=10, dtype="<f8")
2765+
# can initialize an array without a path
2766+
init_array(store, shape=100, chunks=10, dtype="<f8")
2767+
b = Array(store)
2768+
assert not b.is_view
2769+
assert isinstance(b, Array)
2770+
assert (100,) == b.shape
2771+
assert (10,) == b.chunks
2772+
assert '' == b.path
2773+
assert b.name is None
2774+
assert b.basename is None
2775+
assert store is b.store
2776+
assert "968dccbbfc0139f703ead2fd1d503ad6e44db307" == b.hexdigest()
27612777

27622778
# initialize at path
27632779
store = KVStoreV3(dict())
@@ -2797,11 +2813,6 @@ def test_array_init(self):
27972813
assert group_key not in store
27982814
assert (meta_root + path + '.array.json') in store
27992815

2800-
def test_array_no_path(self):
2801-
# passing path=None to init_array will raise an exception
2802-
with pytest.raises(ValueError):
2803-
self.create_array(shape=1000, chunks=100, array_path=None)
2804-
28052816
def expected(self):
28062817
return [
28072818
"73ab8ace56719a5c9308c3754f5e2d57bc73dc20",

0 commit comments

Comments
 (0)