@@ -1050,12 +1050,32 @@ def generate_schema(
1050
1050
"samples" : vcz .VcfZarrDimension (
1051
1051
size = n , chunk_size = samples_chunk_size or 10000
1052
1052
),
1053
- # ploidy added conditionally below
1053
+ # ploidy and genotypes added conditionally below
1054
1054
"alleles" : vcz .VcfZarrDimension (size = max_alleles ),
1055
1055
"alt_alleles" : vcz .VcfZarrDimension (size = max_alleles - 1 ),
1056
1056
"filters" : vcz .VcfZarrDimension (size = self .metadata .num_filters ),
1057
1057
}
1058
1058
1059
+ # Add ploidy and genotypes dimensions only when needed
1060
+ gt_field = None
1061
+ max_genotypes = 0
1062
+ for field in self .metadata .format_fields :
1063
+ if field .name == "GT" :
1064
+ gt_field = field
1065
+ continue
1066
+ if field .vcf_number == "G" :
1067
+ max_genotypes = max (max_genotypes , field .summary .max_number )
1068
+ if gt_field is not None :
1069
+ ploidy = max (gt_field .summary .max_number - 1 , 1 )
1070
+ dimensions ["ploidy" ] = vcz .VcfZarrDimension (size = ploidy )
1071
+ max_genotypes = math .comb (max_alleles + ploidy - 1 , ploidy )
1072
+ dimensions ["genotypes" ] = vcz .VcfZarrDimension (size = max_genotypes )
1073
+ else :
1074
+ if max_genotypes > 0 :
1075
+ # there is no GT field, but there is at least one Number=G field,
1076
+ # so need to define genotypes dimension
1077
+ dimensions ["genotypes" ] = vcz .VcfZarrDimension (size = max_genotypes )
1078
+
1059
1079
schema_instance = vcz .VcfZarrSchema (
1060
1080
format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
1061
1081
dimensions = dimensions ,
@@ -1128,18 +1148,12 @@ def fixed_field_spec(name, dtype, source=None, dimensions=("variants",)):
1128
1148
[spec_from_field (field ) for field in self .metadata .info_fields ]
1129
1149
)
1130
1150
1131
- gt_field = None
1132
1151
for field in self .metadata .format_fields :
1133
1152
if field .name == "GT" :
1134
- gt_field = field
1135
1153
continue
1136
1154
array_specs .append (spec_from_field (field ))
1137
1155
1138
1156
if gt_field is not None and n > 0 :
1139
- ploidy = max (gt_field .summary .max_number - 1 , 1 )
1140
- # Add ploidy dimension only when needed
1141
- schema_instance .dimensions ["ploidy" ] = vcz .VcfZarrDimension (size = ploidy )
1142
-
1143
1157
array_specs .append (
1144
1158
vcz .ZarrArraySpec (
1145
1159
name = "call_genotype_phased" ,
0 commit comments