@@ -111,9 +111,6 @@ def full_name(self):
111111 return self .name
112112 return f"{ self .category } /{ self .name } "
113113
114- # TODO add method here to choose a good set compressor and
115- # filters default here for this field.
116-
117114 def smallest_dtype (self ):
118115 """
119116 Returns the smallest dtype suitable for this field based
@@ -123,7 +120,13 @@ def smallest_dtype(self):
123120 if self .vcf_type == "Float" :
124121 ret = "f4"
125122 elif self .vcf_type == "Integer" :
126- ret = core .min_int_dtype (s .min_value , s .max_value )
123+ if not math .isfinite (s .max_value ):
124+ # All missing values; use i1. Note we should have some API to
125+ # check more explicitly for missingness:
126+ # https://github.com/sgkit-dev/bio2zarr/issues/131
127+ ret = "i1"
128+ else :
129+ ret = core .min_int_dtype (s .min_value , s .max_value )
127130 elif self .vcf_type == "Flag" :
128131 ret = "bool"
129132 elif self .vcf_type == "Character" :
@@ -1300,17 +1303,19 @@ def _choose_compressor_settings(self):
13001303
13011304 See https://github.com/pystatgen/bio2zarr/discussions/74
13021305 """
1303- dt = np .dtype (self .dtype )
13041306 # Default is to not shuffle, because autoshuffle isn't recognised
13051307 # by many Zarr implementations, and shuffling can lead to worse
13061308 # performance in some cases anyway. Turning on shuffle should be a
13071309 # deliberate choice.
13081310 shuffle = numcodecs .Blosc .NOSHUFFLE
1309- if self .name == "call_genotype" and dt . itemsize == 1 :
1311+ if self .name == "call_genotype" and self . dtype == "i1" :
13101312 # call_genotype gets BITSHUFFLE by default as it gets
13111313 # significantly better compression (at a cost of slower
13121314 # decoding)
13131315 shuffle = numcodecs .Blosc .BITSHUFFLE
1316+ elif self .dtype == "bool" :
1317+ shuffle = numcodecs .Blosc .BITSHUFFLE
1318+
13141319 self .compressor ["shuffle" ] = shuffle
13151320
13161321
@@ -1440,7 +1445,6 @@ def fixed_field_spec(
14401445 shape = [m , n ]
14411446 chunks = [variants_chunk_size , samples_chunk_size ]
14421447 dimensions = ["variants" , "samples" ]
1443-
14441448 colspecs .append (
14451449 ZarrColumnSpec .new (
14461450 vcf_field = None ,
0 commit comments