@@ -1050,12 +1050,31 @@ def generate_schema(
10501050 "samples" : vcz .VcfZarrDimension (
10511051 size = n , chunk_size = samples_chunk_size or 10000
10521052 ),
1053- # ploidy added conditionally below
1053+ # ploidy and genotypes added conditionally below
10541054 "alleles" : vcz .VcfZarrDimension (size = max_alleles ),
10551055 "alt_alleles" : vcz .VcfZarrDimension (size = max_alleles - 1 ),
10561056 "filters" : vcz .VcfZarrDimension (size = self .metadata .num_filters ),
10571057 }
10581058
1059+ # Add ploidy and genotypes dimensions only when needed
1060+ gt_field = None
1061+ max_genotypes = 0
1062+ for field in self .metadata .format_fields :
1063+ if field .name == "GT" :
1064+ gt_field = field
1065+ elif field .vcf_number == "G" :
1066+ max_genotypes = max (max_genotypes , field .summary .max_number )
1067+ if gt_field is not None :
1068+ ploidy = max (gt_field .summary .max_number - 1 , 1 )
1069+ dimensions ["ploidy" ] = vcz .VcfZarrDimension (size = ploidy )
1070+ max_genotypes = math .comb (max_alleles + ploidy - 1 , ploidy )
1071+ dimensions ["genotypes" ] = vcz .VcfZarrDimension (size = max_genotypes )
1072+ else :
1073+ if max_genotypes > 0 :
1074+ # there is no GT field, but there is at least one Number=G field,
1075+ # so need to define genotypes dimension
1076+ dimensions ["genotypes" ] = vcz .VcfZarrDimension (size = max_genotypes )
1077+
10591078 schema_instance = vcz .VcfZarrSchema (
10601079 format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
10611080 dimensions = dimensions ,
@@ -1128,18 +1147,12 @@ def fixed_field_spec(name, dtype, source=None, dimensions=("variants",)):
11281147 [spec_from_field (field ) for field in self .metadata .info_fields ]
11291148 )
11301149
1131- gt_field = None
11321150 for field in self .metadata .format_fields :
11331151 if field .name == "GT" :
1134- gt_field = field
11351152 continue
11361153 array_specs .append (spec_from_field (field ))
11371154
11381155 if gt_field is not None and n > 0 :
1139- ploidy = max (gt_field .summary .max_number - 1 , 1 )
1140- # Add ploidy dimension only when needed
1141- schema_instance .dimensions ["ploidy" ] = vcz .VcfZarrDimension (size = ploidy )
1142-
11431156 array_specs .append (
11441157 vcz .ZarrArraySpec (
11451158 name = "call_genotype_phased" ,
0 commit comments