Skip to content

Commit a7c9743

Browse files
Add explicit testswith indels
Closes #106
1 parent e8184f1 commit a7c9743

File tree

5 files changed

+12
-1
lines changed

5 files changed

+12
-1
lines changed

bio2zarr/vcf_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,9 +441,9 @@ def count_variants(self, region):
441441
return sum(1 for _ in self.variants(region))
442442

443443
def variants(self, region):
444-
# Need to filter because of indels overlapping the region
445444
start = 1 if region.start is None else region.start
446445
for var in self.vcf(str(region)):
446+
# Need to filter because of indels overlapping the region
447447
if var.POS >= start:
448448
yield var
449449

tests/data/vcf/chr_m_indels.vcf.gz

12.6 KB
Binary file not shown.
116 Bytes
Binary file not shown.

tests/test_vcf_examples.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,7 @@ def test_duplicate_paths(self, tmp_path):
837837
"1kg_2020_chrM.vcf.gz",
838838
"field_type_combos.vcf.gz",
839839
"out_of_order_contigs.vcf.gz",
840+
"chr_m_indels.vcf.gz",
840841
],
841842
)
842843
def test_by_validating(name, tmp_path):

tests/test_vcf_utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ def test_context_manager_error(self):
2929
with vcf_utils.IndexedVcf(data_path / "no-such-file.bcf"):
3030
pass
3131

32+
def test_indels_filtered(self):
33+
with vcf_utils.IndexedVcf(data_path / "chr_m_indels.vcf.gz") as vfile:
34+
# Hand-picked example that results in filtering
35+
region = vcf_utils.Region("chrM", 300, 314)
36+
pos = [var.POS for var in vfile.variants(region)]
37+
assert pos == [307, 308, 309, 312, 313, 314]
38+
3239
# values computed using bcftools index -s
3340
@pytest.mark.parametrize(
3441
("index_file", "expected"),
@@ -58,6 +65,7 @@ def test_context_manager_error(self):
5865
("1kg_2020_chr20_annotations.bcf.csi", {"chr20": 21}),
5966
("NA12878.prod.chr20snippet.g.vcf.gz.tbi", {"20": 301778}),
6067
("multi_contig.vcf.gz.tbi", {str(j): 933 for j in range(5)}),
68+
("chr_m_indels.vcf.gz.csi", {"chrM": 155}),
6169
],
6270
)
6371
def test_contig_record_counts(self, index_file, expected):
@@ -82,6 +90,7 @@ def test_contig_record_counts(self, index_file, expected):
8290
("1kg_2020_chr20_annotations.bcf.csi", ["chr20:60070-"]),
8391
("NA12878.prod.chr20snippet.g.vcf.gz.tbi", ["20:60001-"]),
8492
("multi_contig.vcf.gz.tbi", [f"{j}:1-" for j in range(5)]),
93+
("chr_m_indels.vcf.gz.csi", ["chrM:26-"]),
8594
],
8695
)
8796
def test_partition_into_one_part(self, index_file, expected):
@@ -106,6 +115,7 @@ def test_partition_into_one_part(self, index_file, expected):
106115
("1kg_2020_chr20_annotations.bcf.csi", 1, 21),
107116
("NA12878.prod.chr20snippet.g.vcf.gz.tbi", 59, 301778),
108117
("multi_contig.vcf.gz.tbi", 5, 5 * 933),
118+
("chr_m_indels.vcf.gz.csi", 1, 155),
109119
],
110120
)
111121
def test_partition_into_max_parts(self, index_file, num_expected, total_records):

0 commit comments

Comments
 (0)