Skip to content

Commit bd63b95

Browse files
Add min_int_dtype function
1 parent dddd068 commit bd63b95

File tree

4 files changed

+72
-13
lines changed

4 files changed

+72
-13
lines changed

bio2zarr/core.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,16 @@
1616
numcodecs.blosc.use_threads = False
1717

1818

19+
def min_int_dtype(min_value, max_value):
20+
if min_value > max_value:
21+
raise ValueError("min_value must be <= max_value")
22+
for a_dtype in ["i1", "i2", "i4", "i8"]:
23+
info = np.iinfo(a_dtype)
24+
if info.min <= min_value and max_value <= info.max:
25+
return a_dtype
26+
raise OverflowError("Integer cannot be represented")
27+
28+
1929
def chunk_aligned_slices(z, n, max_chunks=None):
2030
"""
2131
Returns at n slices in the specified zarr array, aligned

bio2zarr/vcf.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,7 @@ def smallest_dtype(self):
123123
if self.vcf_type == "Float":
124124
ret = "f4"
125125
elif self.vcf_type == "Integer":
126-
dtype = "i4"
127-
for a_dtype in ["i1", "i2"]:
128-
info = np.iinfo(a_dtype)
129-
if info.min <= s.min_value and s.max_value <= info.max:
130-
dtype = a_dtype
131-
break
132-
ret = dtype
126+
ret = core.min_int_dtype(s.min_value, s.max_value)
133127
elif self.vcf_type == "Flag":
134128
ret = "bool"
135129
elif self.vcf_type == "Character":
@@ -187,6 +181,14 @@ def format_fields(self):
187181
fields.append(field)
188182
return fields
189183

184+
@property
185+
def num_contigs(self):
186+
return len(self.contig_names)
187+
188+
@property
189+
def num_filters(self):
190+
return len(self.filters)
191+
190192
@property
191193
def num_records(self):
192194
return sum(self.contig_record_counts.values())
@@ -1388,24 +1390,22 @@ def fixed_field_spec(
13881390

13891391
alt_col = icf.columns["ALT"]
13901392
max_alleles = alt_col.vcf_field.summary.max_number + 1
1391-
num_filters = len(icf.metadata.filters)
13921393

1393-
# # FIXME get dtype from lookup table
13941394
colspecs = [
13951395
fixed_field_spec(
13961396
name="variant_contig",
1397-
dtype="i2", # FIXME
1397+
dtype=core.min_int_dtype(0, icf.metadata.num_contigs),
13981398
),
13991399
fixed_field_spec(
14001400
name="variant_filter",
14011401
dtype="bool",
1402-
shape=(m, num_filters),
1402+
shape=(m, icf.metadata.num_filters),
14031403
dimensions=["variants", "filters"],
14041404
),
14051405
fixed_field_spec(
14061406
name="variant_allele",
14071407
dtype="str",
1408-
shape=[m, max_alleles],
1408+
shape=(m, max_alleles),
14091409
dimensions=["variants", "alleles"],
14101410
),
14111411
fixed_field_spec(

tests/test_core.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,55 @@
55
from bio2zarr import core
66

77

8+
class TestMinIntDtype:
9+
@pytest.mark.parametrize(
10+
("min_value", "max_value", "dtype"),
11+
[
12+
(0, 1, "i1"),
13+
(0, 0, "i1"),
14+
(0, 127, "i1"),
15+
(127, 128, "i2"),
16+
(-127, 0, "i1"),
17+
(-127, -126, "i1"),
18+
(0, 2**15 - 1, "i2"),
19+
(-(2**15), 2**15 - 1, "i2"),
20+
(0, 2**15, "i4"),
21+
(-(2**15), 2**15, "i4"),
22+
(0, 2**31 - 1, "i4"),
23+
(-(2**31), 2**31 - 1, "i4"),
24+
(2**31 - 1, 2**31 - 1, "i4"),
25+
(0, 2**31, "i8"),
26+
(0, 2**32, "i8"),
27+
],
28+
)
29+
def test_values(self, min_value, max_value, dtype):
30+
assert core.min_int_dtype(min_value, max_value) == dtype
31+
32+
@pytest.mark.parametrize(
33+
("min_value", "max_value"),
34+
[
35+
(0, 2**63),
36+
(-(2**63) - 1, 0),
37+
(0, 2**65),
38+
],
39+
)
40+
def test_overflow(self, min_value, max_value):
41+
with pytest.raises(OverflowError, match="Integer cannot"):
42+
core.min_int_dtype(min_value, max_value)
43+
44+
@pytest.mark.parametrize(
45+
("min_value", "max_value"),
46+
[
47+
(1, 0),
48+
(-1, -2),
49+
(2**31, 2**31 - 1),
50+
],
51+
)
52+
def test_bad_min_max(self, min_value, max_value):
53+
with pytest.raises(ValueError, match="must be <="):
54+
core.min_int_dtype(min_value, max_value)
55+
56+
857
class TestParallelWorkManager:
958
@pytest.mark.parametrize("total", [1, 10, 2**63])
1059
@pytest.mark.parametrize("workers", [0, 1])

tests/test_vcf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def test_filter_id(self, schema):
225225
def test_variant_contig(self, schema):
226226
assert schema["columns"]["variant_contig"] == {
227227
"name": "variant_contig",
228-
"dtype": "i2",
228+
"dtype": "i1",
229229
"shape": [9],
230230
"chunks": [10000],
231231
"dimensions": ["variants"],

0 commit comments

Comments
 (0)