Skip to content

Commit df95589

Browse files
tomwhitejeromekelleher
authored andcommitted
Check dimension sizes for named VCF Number fields
1 parent b32d656 commit df95589

File tree

2 files changed

+24
-14
lines changed

2 files changed

+24
-14
lines changed

bio2zarr/icf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,7 @@ def generate_schema(
10561056
if local_alleles is None:
10571057
local_alleles = False
10581058

1059+
max_alleles = max(self.fields["ALT"].vcf_field.summary.max_number + 1, 2)
10591060
dimensions = {
10601061
"variants": vcz.VcfZarrDimension(
10611062
size=m, chunk_size=variants_chunk_size or vcz.DEFAULT_VARIANT_CHUNK_SIZE
@@ -1064,9 +1065,8 @@ def generate_schema(
10641065
size=n, chunk_size=samples_chunk_size or vcz.DEFAULT_SAMPLE_CHUNK_SIZE
10651066
),
10661067
# ploidy added conditionally below
1067-
"alleles": vcz.VcfZarrDimension(
1068-
size=max(self.fields["ALT"].vcf_field.summary.max_number + 1, 2)
1069-
),
1068+
"alleles": vcz.VcfZarrDimension(size=max_alleles),
1069+
"alt_alleles": vcz.VcfZarrDimension(size=max_alleles - 1),
10701070
"filters": vcz.VcfZarrDimension(size=self.metadata.num_filters),
10711071
}
10721072

bio2zarr/vcz.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -170,19 +170,29 @@ def from_field(
170170
array_name = prefix + vcf_field.name
171171

172172
max_number = vcf_field.max_number
173-
if (max_number > 0 and vcf_field.vcf_number in ("R", "A", "G")) or (
174-
max_number > 1 or vcf_field.full_name == "FORMAT/LAA"
175-
):
176-
# TODO we should really be checking this to see if the named dimensions
177-
# are actually correct.
178-
if vcf_field.vcf_number == "R":
173+
if vcf_field.vcf_number == "R":
174+
max_alleles = schema.dimensions["alleles"].size
175+
if max_number > max_alleles:
176+
raise ValueError(
177+
f"Max number of values {max_number} exceeds max alleles "
178+
f"{max_alleles} for {vcf_field.full_name}"
179+
)
180+
if max_alleles > 0:
179181
dimensions.append("alleles")
180-
elif vcf_field.vcf_number == "A":
182+
elif vcf_field.vcf_number == "A":
183+
max_alt_alleles = schema.dimensions["alt_alleles"].size
184+
if max_number > max_alt_alleles:
185+
raise ValueError(
186+
f"Max number of values {max_number} exceeds max alt alleles "
187+
f"{max_alt_alleles} for {vcf_field.full_name}"
188+
)
189+
if max_alt_alleles > 0:
181190
dimensions.append("alt_alleles")
182-
elif vcf_field.vcf_number == "G":
183-
dimensions.append("genotypes")
184-
else:
185-
dimensions.append(f"{vcf_field.category}_{vcf_field.name}_dim")
191+
elif max_number > 0 and vcf_field.vcf_number == "G":
192+
# TODO: need max_genotypes
193+
dimensions.append("genotypes")
194+
elif max_number > 1 or vcf_field.full_name == "FORMAT/LAA":
195+
dimensions.append(f"{vcf_field.category}_{vcf_field.name}_dim")
186196
if dimensions[-1] not in schema.dimensions:
187197
schema.dimensions[dimensions[-1]] = VcfZarrDimension(
188198
size=vcf_field.max_number

0 commit comments

Comments
 (0)