@@ -39,6 +39,14 @@ def list_commands(self, ctx):
39
39
"zarr_path" , type = click .Path (file_okay = False , dir_okay = True )
40
40
)
41
41
42
+ zarr_path = click .argument (
43
+ "zarr_path" , type = click .Path (exists = True , file_okay = False , dir_okay = True )
44
+ )
45
+
46
+ num_partitions = click .argument ("num_partitions" , type = click .IntRange (min = 1 ))
47
+
48
+ partition = click .argument ("partition" , type = click .IntRange (min = 0 ))
49
+
42
50
verbose = click .option ("-v" , "--verbose" , count = True , help = "Increase verbosity" )
43
51
44
52
force = click .option (
@@ -92,6 +100,27 @@ def list_commands(self, ctx):
92
100
help = "Chunk size in the samples dimension" ,
93
101
)
94
102
103
+ schema = click .option ("-s" , "--schema" , default = None , type = click .Path (exists = True ))
104
+
105
+ max_variant_chunks = click .option (
106
+ "-V" ,
107
+ "--max-variant-chunks" ,
108
+ type = int ,
109
+ default = None ,
110
+ help = (
111
+ "Truncate the output in the variants dimension to have "
112
+ "this number of chunks. Mainly intended to help with "
113
+ "schema tuning."
114
+ ),
115
+ )
116
+
117
+ max_memory = click .option (
118
+ "-M" ,
119
+ "--max-memory" ,
120
+ default = None ,
121
+ help = "An approximate bound on overall memory usage (e.g. 10G)," ,
122
+ )
123
+
95
124
96
125
def setup_logging (verbosity ):
97
126
level = "WARNING"
@@ -158,7 +187,7 @@ def explode(
158
187
@click .command
159
188
@vcfs
160
189
@new_icf_path
161
- @click . argument ( " num_partitions" , type = click . IntRange ( min = 1 ))
190
+ @num_partitions
162
191
@force
163
192
@column_chunk_size
164
193
@compressor
@@ -194,7 +223,7 @@ def dexplode_init(
194
223
195
224
@click .command
196
225
@icf_path
197
- @click . argument ( " partition" , type = click . IntRange ( min = 0 ))
226
+ @partition
198
227
@verbose
199
228
def dexplode_partition (icf_path , partition , verbose ):
200
229
"""
@@ -207,14 +236,14 @@ def dexplode_partition(icf_path, partition, verbose):
207
236
208
237
209
238
@click .command
210
- @click . argument ( "path" , type = click . Path (), required = True )
239
+ @icf_path
211
240
@verbose
212
- def dexplode_finalise (path , verbose ):
241
+ def dexplode_finalise (icf_path , verbose ):
213
242
"""
214
243
Final step for distributed conversion of VCF(s) to intermediate columnar format.
215
244
"""
216
245
setup_logging (verbose )
217
- vcf .explode_finalise (path )
246
+ vcf .explode_finalise (icf_path )
218
247
219
248
220
249
@click .command
@@ -244,26 +273,11 @@ def mkschema(icf_path):
244
273
@new_zarr_path
245
274
@force
246
275
@verbose
247
- @click . option ( "-s" , "-- schema" , default = None , type = click . Path ( exists = True ))
276
+ @schema
248
277
@variants_chunk_size
249
278
@samples_chunk_size
250
- @click .option (
251
- "-V" ,
252
- "--max-variant-chunks" ,
253
- type = int ,
254
- default = None ,
255
- help = (
256
- "Truncate the output in the variants dimension to have "
257
- "this number of chunks. Mainly intended to help with "
258
- "schema tuning."
259
- ),
260
- )
261
- @click .option (
262
- "-M" ,
263
- "--max-memory" ,
264
- default = None ,
265
- help = "An approximate bound on overall memory usage (e.g. 10G)," ,
266
- )
279
+ @max_variant_chunks
280
+ @max_memory
267
281
@worker_processes
268
282
def encode (
269
283
icf_path ,
@@ -295,6 +309,68 @@ def encode(
295
309
)
296
310
297
311
312
+ @click .command
313
+ @icf_path
314
+ @new_zarr_path
315
+ @num_partitions
316
+ @force
317
+ @schema
318
+ @variants_chunk_size
319
+ @samples_chunk_size
320
+ @max_variant_chunks
321
+ @verbose
322
+ def dencode_init (
323
+ icf_path ,
324
+ zarr_path ,
325
+ num_partitions ,
326
+ force ,
327
+ schema ,
328
+ variants_chunk_size ,
329
+ samples_chunk_size ,
330
+ max_variant_chunks ,
331
+ verbose ,
332
+ ):
333
+ """
334
+ TODO DOCUMENT
335
+ """
336
+ setup_logging (verbose )
337
+ check_overwrite_dir (zarr_path , force )
338
+ num_partitions = vcf .encode_init (
339
+ icf_path ,
340
+ zarr_path ,
341
+ target_num_partitions = num_partitions ,
342
+ schema_path = schema ,
343
+ variants_chunk_size = variants_chunk_size ,
344
+ samples_chunk_size = samples_chunk_size ,
345
+ max_v_chunks = max_variant_chunks ,
346
+ show_progress = True ,
347
+ )
348
+ click .echo (num_partitions )
349
+
350
+
351
+ @click .command
352
+ @zarr_path
353
+ @partition
354
+ @verbose
355
+ def dencode_partition (zarr_path , partition , verbose ):
356
+ """
357
+ TODO DOCUMENT
358
+ """
359
+ setup_logging (verbose )
360
+ vcf .encode_partition (zarr_path , partition , show_progress = False )
361
+
362
+
363
+ @click .command
364
+ @zarr_path
365
+ @verbose
366
+ def dencode_finalise (zarr_path , verbose ):
367
+ """
368
+ TODO DOCUMENT
369
+ """
370
+ setup_logging (verbose )
371
+ vcf .encode_finalise (zarr_path )
372
+
373
+
298
374
@click .command (name = "convert" )
299
375
@vcfs
300
376
@new_zarr_path
@@ -382,6 +458,9 @@ def vcf2zarr():
382
458
vcf2zarr .add_command (dexplode_init )
383
459
vcf2zarr .add_command (dexplode_partition )
384
460
vcf2zarr .add_command (dexplode_finalise )
461
+ vcf2zarr .add_command (dencode_init )
462
+ vcf2zarr .add_command (dencode_partition )
463
+ vcf2zarr .add_command (dencode_finalise )
385
464
386
465
387
466
@click .command (name = "convert" )
0 commit comments