@@ -111,9 +111,6 @@ def full_name(self):
111
111
return self .name
112
112
return f"{ self .category } /{ self .name } "
113
113
114
- # TODO add method here to choose a good set compressor and
115
- # filters default here for this field.
116
-
117
114
def smallest_dtype (self ):
118
115
"""
119
116
Returns the smallest dtype suitable for this field based
@@ -123,7 +120,13 @@ def smallest_dtype(self):
123
120
if self .vcf_type == "Float" :
124
121
ret = "f4"
125
122
elif self .vcf_type == "Integer" :
126
- ret = core .min_int_dtype (s .min_value , s .max_value )
123
+ if not math .isfinite (s .max_value ):
124
+ # All missing values; use i1. Note we should have some API to
125
+ # check more explicitly for missingness:
126
+ # https://github.com/sgkit-dev/bio2zarr/issues/131
127
+ ret = "i1"
128
+ else :
129
+ ret = core .min_int_dtype (s .min_value , s .max_value )
127
130
elif self .vcf_type == "Flag" :
128
131
ret = "bool"
129
132
elif self .vcf_type == "Character" :
@@ -1300,17 +1303,19 @@ def _choose_compressor_settings(self):
1300
1303
1301
1304
See https://github.com/pystatgen/bio2zarr/discussions/74
1302
1305
"""
1303
- dt = np .dtype (self .dtype )
1304
1306
# Default is to not shuffle, because autoshuffle isn't recognised
1305
1307
# by many Zarr implementations, and shuffling can lead to worse
1306
1308
# performance in some cases anyway. Turning on shuffle should be a
1307
1309
# deliberate choice.
1308
1310
shuffle = numcodecs .Blosc .NOSHUFFLE
1309
- if self .name == "call_genotype" and dt . itemsize == 1 :
1311
+ if self .name == "call_genotype" and self . dtype == "i1" :
1310
1312
# call_genotype gets BITSHUFFLE by default as it gets
1311
1313
# significantly better compression (at a cost of slower
1312
1314
# decoding)
1313
1315
shuffle = numcodecs .Blosc .BITSHUFFLE
1316
+ elif self .dtype == "bool" :
1317
+ shuffle = numcodecs .Blosc .BITSHUFFLE
1318
+
1314
1319
self .compressor ["shuffle" ] = shuffle
1315
1320
1316
1321
@@ -1440,7 +1445,6 @@ def fixed_field_spec(
1440
1445
shape = [m , n ]
1441
1446
chunks = [variants_chunk_size , samples_chunk_size ]
1442
1447
dimensions = ["variants" , "samples" ]
1443
-
1444
1448
colspecs .append (
1445
1449
ZarrColumnSpec .new (
1446
1450
vcf_field = None ,
0 commit comments