5
5
import numpy as np
6
6
import zarr
7
7
8
- from bio2zarr import constants , schema , writer
8
+ from bio2zarr import constants , vcz
9
9
10
10
logger = logging .getLogger (__name__ )
11
11
@@ -58,12 +58,12 @@ def generate_schema(
58
58
m = self .bed .sid_count
59
59
logging .info (f"Scanned plink with { n } samples and { m } variants" )
60
60
61
- schema_instance = schema .VcfZarrSchema (
62
- format_version = schema .ZARR_SCHEMA_FORMAT_VERSION ,
61
+ schema_instance = vcz .VcfZarrSchema (
62
+ format_version = vcz .ZARR_SCHEMA_FORMAT_VERSION ,
63
63
samples_chunk_size = samples_chunk_size ,
64
64
variants_chunk_size = variants_chunk_size ,
65
65
fields = [],
66
- samples = [schema .Sample (id = sample ) for sample in self .bed .iid ],
66
+ samples = [vcz .Sample (id = sample ) for sample in self .bed .iid ],
67
67
contigs = [],
68
68
filters = [],
69
69
)
@@ -74,7 +74,7 @@ def generate_schema(
74
74
)
75
75
76
76
array_specs = [
77
- schema .ZarrArraySpec .new (
77
+ vcz .ZarrArraySpec .new (
78
78
vcf_field = "position" ,
79
79
name = "variant_position" ,
80
80
dtype = "i4" ,
@@ -83,7 +83,7 @@ def generate_schema(
83
83
chunks = [schema_instance .variants_chunk_size ],
84
84
description = None ,
85
85
),
86
- schema .ZarrArraySpec .new (
86
+ vcz .ZarrArraySpec .new (
87
87
vcf_field = None ,
88
88
name = "variant_allele" ,
89
89
dtype = "O" ,
@@ -92,7 +92,7 @@ def generate_schema(
92
92
chunks = [schema_instance .variants_chunk_size , 2 ],
93
93
description = None ,
94
94
),
95
- schema .ZarrArraySpec .new (
95
+ vcz .ZarrArraySpec .new (
96
96
vcf_field = None ,
97
97
name = "call_genotype_phased" ,
98
98
dtype = "bool" ,
@@ -104,7 +104,7 @@ def generate_schema(
104
104
],
105
105
description = None ,
106
106
),
107
- schema .ZarrArraySpec .new (
107
+ vcz .ZarrArraySpec .new (
108
108
vcf_field = None ,
109
109
name = "call_genotype" ,
110
110
dtype = "i1" ,
@@ -117,7 +117,7 @@ def generate_schema(
117
117
],
118
118
description = None ,
119
119
),
120
- schema .ZarrArraySpec .new (
120
+ vcz .ZarrArraySpec .new (
121
121
vcf_field = None ,
122
122
name = "call_genotype_mask" ,
123
123
dtype = "bool" ,
@@ -150,7 +150,7 @@ def convert(
150
150
samples_chunk_size = samples_chunk_size ,
151
151
)
152
152
zarr_path = pathlib .Path (zarr_path )
153
- vzw = writer .VcfZarrWriter (PlinkFormat , zarr_path )
153
+ vzw = vcz .VcfZarrWriter (PlinkFormat , zarr_path )
154
154
# Rough heuristic to split work up enough to keep utilisation high
155
155
target_num_partitions = max (1 , worker_processes * 4 )
156
156
vzw .init (
0 commit comments