Skip to content

Commit 280d969

Browse files
nhz2jakirkham
andauthored
Allow reading utf-8 encoded json files (#1312)
* read utf-8 in json * update release * Update zarr/util.py Co-authored-by: jakirkham <jakirkham@gmail.com> * allow str --------- Co-authored-by: jakirkham <jakirkham@gmail.com>
1 parent 4dc6f1f commit 280d969

File tree

6 files changed

+24
-10
lines changed

6 files changed

+24
-10
lines changed

docs/release.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ Major changes
3131
Bug fixes
3232
~~~~~~~~~
3333

34+
* Allow reading utf-8 encoded json files
35+
By :user:`Nathan Zimmerberg <nhz2>` :issue:`1308`.
36+
3437
* Ensure contiguous data is give to ``FSStore``. Only copying if needed.
3538
By :user:`Mads R. B. Kristensen <madsbk>` :issue:`1285`.
3639
* NestedDirectoryStore.listdir now returns chunk keys with the correct '/' dimension_separator.

fixture/utf8attrs/.zattrs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"foo": "た"}

fixture/utf8attrs/.zgroup

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"zarr_format": 2
3+
}

zarr/meta.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class Metadata2:
9292
ZARR_FORMAT = ZARR_FORMAT
9393

9494
@classmethod
95-
def parse_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]:
95+
def parse_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]:
9696

9797
# Here we allow that a store may return an already-parsed metadata object,
9898
# or a string of JSON that we will parse here. We allow for an already-parsed
@@ -110,7 +110,7 @@ def parse_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]:
110110
return meta
111111

112112
@classmethod
113-
def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]:
113+
def decode_array_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]:
114114
meta = cls.parse_metadata(s)
115115

116116
# check metadata format
@@ -198,7 +198,7 @@ def decode_dtype(cls, d) -> np.dtype:
198198
return np.dtype(d)
199199

200200
@classmethod
201-
def decode_group_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]:
201+
def decode_group_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]:
202202
meta = cls.parse_metadata(s)
203203

204204
# check metadata format version
@@ -351,7 +351,7 @@ def encode_dtype(cls, d):
351351
return get_extended_dtype_info(np.dtype(d))
352352

353353
@classmethod
354-
def decode_group_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]:
354+
def decode_group_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]:
355355
meta = cls.parse_metadata(s)
356356
# 1 / 0
357357
# # check metadata format version
@@ -390,7 +390,7 @@ def encode_hierarchy_metadata(cls, meta=None) -> bytes:
390390

391391
@classmethod
392392
def decode_hierarchy_metadata(
393-
cls, s: Union[MappingType, str]
393+
cls, s: Union[MappingType, bytes, str]
394394
) -> MappingType[str, Any]:
395395
meta = cls.parse_metadata(s)
396396
# check metadata format
@@ -495,7 +495,7 @@ def _decode_storage_transformer_metadata(cls, meta: Mapping) -> "StorageTransfor
495495
return StorageTransformerCls.from_config(transformer_type, conf)
496496

497497
@classmethod
498-
def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]:
498+
def decode_array_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]:
499499
meta = cls.parse_metadata(s)
500500

501501
# extract array metadata fields

zarr/tests/test_attrs.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44

55
from zarr._storage.store import meta_root
66
from zarr.attrs import Attributes
7-
from zarr.storage import KVStore
7+
from zarr.storage import KVStore, DirectoryStore
88
from zarr._storage.v3 import KVStoreV3
99
from zarr.tests.util import CountingDict, CountingDictV3
10+
from zarr.hierarchy import group
1011

1112

1213
@pytest.fixture(params=[2, 3])
@@ -42,11 +43,17 @@ def test_storage(self, zarr_version):
4243
a['baz'] = 42
4344
assert attrs_key in store
4445
assert isinstance(store[attrs_key], bytes)
45-
d = json.loads(str(store[attrs_key], 'ascii'))
46+
d = json.loads(str(store[attrs_key], 'utf-8'))
4647
if zarr_version == 3:
4748
d = d['attributes']
4849
assert dict(foo='bar', baz=42) == d
4950

51+
def test_utf8_encoding(self, zarr_version):
52+
53+
# fixture data
54+
fixture = group(store=DirectoryStore('fixture'))
55+
assert fixture['utf8attrs'].attrs.asdict() == dict(foo='た')
56+
5057
def test_get_set_del_contains(self, zarr_version):
5158

5259
store = _init_store(zarr_version)

zarr/util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ def json_dumps(o: Any) -> bytes:
5656
separators=(',', ': '), cls=NumberEncoder).encode('ascii')
5757

5858

59-
def json_loads(s: str) -> Dict[str, Any]:
59+
def json_loads(s: Union[bytes, str]) -> Dict[str, Any]:
6060
"""Read JSON in a consistent way."""
61-
return json.loads(ensure_text(s, 'ascii'))
61+
return json.loads(ensure_text(s, 'utf-8'))
6262

6363

6464
def normalize_shape(shape) -> Tuple[int]:

0 commit comments

Comments
 (0)