@@ -822,6 +822,62 @@ def __exit__(self, exc_type, exc_val, exc_tb):
822822 return False
823823
824824
825+ def convert_local_allele_field_types (fields ):
826+ """
827+ Update the specified list of fields to include the LAA field, and to convert
828+ any supported localisable fields to the L* counterpart.
829+
830+ Note that we currently support only two ALT alleles per sample, and so the
831+ dimensions of these fields are fixed by that requirement. Later versions may
832+ use summary data storted in the ICF to make different choices, if information
833+ about subsequent alleles (not in the actual genotype calls) should also be
834+ stored.
835+ """
836+ fields_by_name = {field .name : field for field in fields }
837+ gt = fields_by_name ["call_genotype" ]
838+ if gt .shape [- 1 ] != 2 :
839+ raise ValueError ("Local alleles only supported on diploid data" )
840+
841+ # TODO check if LA is already in here
842+
843+ shape = gt .shape [:- 1 ]
844+ chunks = gt .chunks [:- 1 ]
845+ dimensions = gt .dimensions [:- 1 ]
846+
847+ la = schema .ZarrArraySpec .new (
848+ vcf_field = None ,
849+ name = "call_LA" ,
850+ dtype = "i1" ,
851+ shape = gt .shape ,
852+ chunks = gt .chunks ,
853+ dimensions = (* dimensions , "local_alleles" ),
854+ description = (
855+ "0-based indices into REF+ALT, indicating which alleles"
856+ " are relevant (local) for the current sample"
857+ ),
858+ )
859+ ad = fields_by_name .get ("call_AD" , None )
860+ if ad is not None :
861+ # TODO check if call_LAD is in the list already
862+ ad .name = "call_LAD"
863+ ad .vcf_field = None
864+ ad .shape = (* shape , 2 )
865+ ad .chunks = (* chunks , 2 )
866+ ad .dimensions = (* dimensions , "local_alleles" )
867+ ad .description += " (local-alleles)"
868+
869+ pl = fields_by_name .get ("call_PL" , None )
870+ if pl is not None :
871+ # TODO check if call_LPL is in the list already
872+ pl .name = "call_LPL"
873+ pl .vcf_field = None
874+ pl .shape = (* shape , 3 )
875+ pl .chunks = (* chunks , 3 )
876+ pl .description += " (local-alleles)"
877+ pl .dimensions = (* dimensions , "local_" + pl .dimensions [- 1 ])
878+ return [* fields , la ]
879+
880+
825881class IntermediateColumnarFormat (collections .abc .Mapping ):
826882 def __init__ (self , path ):
827883 self .path = pathlib .Path (path )
@@ -1110,8 +1166,6 @@ def fixed_field_spec(
11101166 )
11111167
11121168 if local_alleles :
1113- from bio2zarr .vcf2zarr .vcz import convert_local_allele_field_types
1114-
11151169 array_specs = convert_local_allele_field_types (array_specs )
11161170
11171171 return schema .VcfZarrSchema (
0 commit comments