Skip to content

Commit 635cac1

Browse files
committed
Check dimension sizes for named VCF Number fields
1 parent 8915f73 commit 635cac1

File tree

2 files changed

+24
-14
lines changed

2 files changed

+24
-14
lines changed

bio2zarr/icf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,6 +1042,7 @@ def generate_schema(
10421042
if local_alleles is None:
10431043
local_alleles = False
10441044

1045+
max_alleles = max(self.fields["ALT"].vcf_field.summary.max_number + 1, 2)
10451046
dimensions = {
10461047
"variants": vcz.VcfZarrDimension(
10471048
size=m, chunk_size=variants_chunk_size or 1000
@@ -1050,9 +1051,8 @@ def generate_schema(
10501051
size=n, chunk_size=samples_chunk_size or 10000
10511052
),
10521053
# ploidy added conditionally below
1053-
"alleles": vcz.VcfZarrDimension(
1054-
size=max(self.fields["ALT"].vcf_field.summary.max_number + 1, 2)
1055-
),
1054+
"alleles": vcz.VcfZarrDimension(size=max_alleles),
1055+
"alt_alleles": vcz.VcfZarrDimension(size=max_alleles - 1),
10561056
"filters": vcz.VcfZarrDimension(size=self.metadata.num_filters),
10571057
}
10581058

bio2zarr/vcz.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -172,19 +172,29 @@ def from_field(
172172
array_name = prefix + vcf_field.name
173173

174174
max_number = vcf_field.max_number
175-
if (max_number > 0 and vcf_field.vcf_number in ("R", "A", "G")) or (
176-
max_number > 1 or vcf_field.full_name == "FORMAT/LAA"
177-
):
178-
# TODO we should really be checking this to see if the named dimensions
179-
# are actually correct.
180-
if vcf_field.vcf_number == "R":
175+
if vcf_field.vcf_number == "R":
176+
max_alleles = schema.dimensions["alleles"].size
177+
if max_number > max_alleles:
178+
raise ValueError(
179+
f"Max number of values {max_number} exceeds max alleles "
180+
f"{max_alleles} for {vcf_field.full_name}"
181+
)
182+
if max_alleles > 0:
181183
dimensions.append("alleles")
182-
elif vcf_field.vcf_number == "A":
184+
elif vcf_field.vcf_number == "A":
185+
max_alt_alleles = schema.dimensions["alt_alleles"].size
186+
if max_number > max_alt_alleles:
187+
raise ValueError(
188+
f"Max number of values {max_number} exceeds max alt alleles "
189+
f"{max_alt_alleles} for {vcf_field.full_name}"
190+
)
191+
if max_alt_alleles > 0:
183192
dimensions.append("alt_alleles")
184-
elif vcf_field.vcf_number == "G":
185-
dimensions.append("genotypes")
186-
else:
187-
dimensions.append(f"{vcf_field.category}_{vcf_field.name}_dim")
193+
elif max_number > 0 and vcf_field.vcf_number == "G":
194+
# TODO: need max_genotypes
195+
dimensions.append("genotypes")
196+
elif max_number > 1 or vcf_field.full_name == "FORMAT/LAA":
197+
dimensions.append(f"{vcf_field.category}_{vcf_field.name}_dim")
188198
if dimensions[-1] not in schema.dimensions:
189199
schema.dimensions[dimensions[-1]] = VcfZarrDimension(
190200
size=vcf_field.max_number

0 commit comments

Comments
 (0)