Skip to content

Commit 9afe435

Browse files
tomwhitejeromekelleher
authored andcommitted
Add filter_description field
1 parent 9a8745f commit 9afe435

File tree

3 files changed

+20
-4
lines changed

3 files changed

+20
-4
lines changed

bio2zarr/vcz.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,7 @@ def init(
520520
# Doing this synchronously - this is fine surely
521521
self.encode_samples(root)
522522
if self.source.filters is not None:
523-
self.encode_filter_id(root)
523+
self.encode_filters(root)
524524
if self.source.contigs is not None:
525525
self.encode_contigs(root)
526526

@@ -581,9 +581,7 @@ def encode_contigs(self, root):
581581
)
582582
array.attrs["_ARRAY_DIMENSIONS"] = ["contigs"]
583583

584-
def encode_filter_id(self, root):
585-
# TODO need a way to store description also
586-
# https://github.com/sgkit-dev/vcf-zarr-spec/issues/19
584+
def encode_filters(self, root):
587585
filters = self.source.filters
588586
array = root.array(
589587
"filter_id",
@@ -593,6 +591,14 @@ def encode_filter_id(self, root):
593591
compressor=DEFAULT_ZARR_COMPRESSOR,
594592
)
595593
array.attrs["_ARRAY_DIMENSIONS"] = ["filters"]
594+
array = root.array(
595+
"filter_description",
596+
data=[filt.description for filt in filters],
597+
shape=len(filters),
598+
dtype="str",
599+
compressor=DEFAULT_ZARR_COMPRESSOR,
600+
)
601+
array.attrs["_ARRAY_DIMENSIONS"] = ["filters"]
596602

597603
def init_array(self, root, array_spec, variants_dim_size):
598604
kwargs = dict(zarr_utils.ZARR_FORMAT_KWARGS)

tests/test_vcf_examples.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ def ds(self, tmp_path_factory):
3030

3131
def test_filters(self, ds):
3232
nt.assert_array_equal(ds["filter_id"], ["PASS", "s50", "q10"])
33+
nt.assert_array_equal(
34+
ds["filter_description"],
35+
[
36+
"All filters passed",
37+
"Less than 50% of samples have data",
38+
"Quality below 10",
39+
],
40+
)
3341
nt.assert_array_equal(
3442
ds["variant_filter"],
3543
[
@@ -957,6 +965,7 @@ def test_info_fields(self, ds):
957965
"contig_id",
958966
"contig_length",
959967
"filter_id",
968+
"filter_description",
960969
"region_index",
961970
"sample_id",
962971
]

tests/test_vcz.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,7 @@ def test_vcz(self, zarr_path):
740740
"/sample_id",
741741
"/variant_id_mask",
742742
"/filter_id",
743+
"/filter_description",
743744
"/contig_id",
744745
]
745746
nt.assert_array_equal(sorted(df["name"]), sorted(fields))

0 commit comments

Comments
 (0)