Skip to content

Commit eed60f0

Browse files
committed
Create common schema and writer for ICF and plink encoding
1 parent 3dca18e commit eed60f0

16 files changed

+1138
-1087
lines changed

bio2zarr/cli.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
import numcodecs
99
import tabulate
1010

11-
from . import plink, provenance, vcf2zarr, vcf_utils
12-
from .vcf2zarr import icf as icf_mod
11+
from . import icf as icf_mod
12+
from . import plink, provenance, vcf_utils
1313

1414
logger = logging.getLogger(__name__)
1515

@@ -236,7 +236,7 @@ def explode(
236236
"""
237237
setup_logging(verbose)
238238
check_overwrite_dir(icf_path, force)
239-
vcf2zarr.explode(
239+
icf_mod.explode(
240240
icf_path,
241241
vcfs,
242242
worker_processes=worker_processes,
@@ -276,7 +276,7 @@ def dexplode_init(
276276
setup_logging(verbose)
277277
check_overwrite_dir(icf_path, force)
278278
check_partitions(num_partitions)
279-
work_summary = vcf2zarr.explode_init(
279+
work_summary = icf_mod.explode_init(
280280
icf_path,
281281
vcfs,
282282
target_num_partitions=num_partitions,
@@ -304,7 +304,7 @@ def dexplode_partition(icf_path, partition, verbose, one_based):
304304
setup_logging(verbose)
305305
if one_based:
306306
partition -= 1
307-
vcf2zarr.explode_partition(icf_path, partition)
307+
icf_mod.explode_partition(icf_path, partition)
308308

309309

310310
@click.command
@@ -315,7 +315,7 @@ def dexplode_finalise(icf_path, verbose):
315315
Final step for distributed conversion of VCF(s) to intermediate columnar format.
316316
"""
317317
setup_logging(verbose)
318-
vcf2zarr.explode_finalise(icf_path)
318+
icf_mod.explode_finalise(icf_path)
319319

320320

321321
@click.command
@@ -326,7 +326,7 @@ def inspect(path, verbose):
326326
Inspect an intermediate columnar format or Zarr path.
327327
"""
328328
setup_logging(verbose)
329-
data = vcf2zarr.inspect(path)
329+
data = icf_mod.inspect(path)
330330
click.echo(tabulate.tabulate(data, headers="keys"))
331331

332332

@@ -345,7 +345,7 @@ def mkschema(icf_path, variants_chunk_size, samples_chunk_size, local_alleles):
345345
err=True,
346346
)
347347
stream = click.get_text_stream("stdout")
348-
vcf2zarr.mkschema(
348+
icf_mod.mkschema(
349349
icf_path,
350350
stream,
351351
variants_chunk_size=variants_chunk_size,
@@ -384,7 +384,7 @@ def encode(
384384
"""
385385
setup_logging(verbose)
386386
check_overwrite_dir(zarr_path, force)
387-
vcf2zarr.encode(
387+
icf_mod.encode(
388388
icf_path,
389389
zarr_path,
390390
schema_path=schema,
@@ -438,7 +438,7 @@ def dencode_init(
438438
setup_logging(verbose)
439439
check_overwrite_dir(zarr_path, force)
440440
check_partitions(num_partitions)
441-
work_summary = vcf2zarr.encode_init(
441+
work_summary = icf_mod.encode_init(
442442
icf_path,
443443
zarr_path,
444444
target_num_partitions=num_partitions,
@@ -466,7 +466,7 @@ def dencode_partition(zarr_path, partition, verbose, one_based):
466466
setup_logging(verbose)
467467
if one_based:
468468
partition -= 1
469-
vcf2zarr.encode_partition(zarr_path, partition)
469+
icf_mod.encode_partition(zarr_path, partition)
470470

471471

472472
@click.command
@@ -478,7 +478,7 @@ def dencode_finalise(zarr_path, verbose, progress):
478478
Final step for distributed conversion of ICF to VCF Zarr.
479479
"""
480480
setup_logging(verbose)
481-
vcf2zarr.encode_finalise(zarr_path, show_progress=progress)
481+
icf_mod.encode_finalise(zarr_path, show_progress=progress)
482482

483483

484484
@click.command(name="convert")
@@ -507,7 +507,7 @@ def convert_vcf(
507507
"""
508508
setup_logging(verbose)
509509
check_overwrite_dir(zarr_path, force)
510-
vcf2zarr.convert(
510+
icf_mod.convert(
511511
vcfs,
512512
zarr_path,
513513
variants_chunk_size=variants_chunk_size,

bio2zarr/core.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@ def display_size(n):
3434
return humanfriendly.format_size(n, binary=True)
3535

3636

37+
def parse_max_memory(max_memory):
38+
if max_memory is None:
39+
# Effectively unbounded
40+
return 2**63
41+
if isinstance(max_memory, str):
42+
max_memory = humanfriendly.parse_size(max_memory)
43+
logger.info(f"Set memory budget to {display_size(max_memory)}")
44+
return max_memory
45+
46+
3747
def min_int_dtype(min_value, max_value):
3848
if min_value > max_value:
3949
raise ValueError("min_value must be <= max_value")

0 commit comments

Comments
 (0)