Skip to content

Commit 4dd5b38

Browse files
committed
Make format dependancies optional
1 parent 587a29e commit 4dd5b38

File tree

12 files changed

+155
-12
lines changed

12 files changed

+155
-12
lines changed

.github/workflows/ci.yml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,48 @@ jobs:
8181
# https://github.com/coverallsapp/github-action
8282
fail-on-error: false
8383

84+
optional_dependencies:
85+
name: Optional dependencies
86+
runs-on: ubuntu-latest
87+
steps:
88+
- uses: actions/checkout@v4
89+
- uses: actions/setup-python@v5
90+
with:
91+
python-version: '3.11'
92+
- name: Check optional dependencies
93+
run: |
94+
python -m pip install --upgrade pip
95+
96+
python -m venv env-tskit
97+
source env-tskit/bin/activate
98+
python -m pip install .
99+
python -m bio2zarr tskit2zarr convert tests/data/ts/example.trees ts.vcz > ts.txt 2>&1 || echo $? > ts_exit.txt
100+
test "$(cat ts_exit.txt)" = "1"
101+
grep -q "This process requires the optional tskit module. Install it with: pip install bio2zarr\[tskit\]" ts.txt
102+
python -m pip install '.[tskit]'
103+
python -m bio2zarr tskit2zarr convert tests/data/ts/example.trees ts.vcz
104+
deactivate
105+
106+
python -m venv env-plink
107+
source env-plink/bin/activate
108+
python -m pip install .
109+
python -m bio2zarr plink2zarr convert tests/data/plink/example.bed plink.vcz > plink.txt 2>&1 || echo $? > plink_exit.txt
110+
test "$(cat plink_exit.txt)" = "1"
111+
grep -q "This process requires the optional bed_reader module. Install it with: pip install bio2zarr\[plink\]" plink.txt
112+
python -m pip install '.[plink]'
113+
python -m bio2zarr plink2zarr convert tests/data/plink/example.bed plink.vcz
114+
deactivate
115+
116+
python -m venv env-vcf
117+
source env-vcf/bin/activate
118+
python -m pip install .
119+
python -m bio2zarr vcf2zarr convert tests/data/vcf/sample.vcf.gz sample.vcz > vcf.txt 2>&1 || echo $? > vcf_exit.txt
120+
test "$(cat vcf_exit.txt)" = "1"
121+
grep -q "This process requires the optional cyvcf2 module. Install it with: pip install bio2zarr\[vcf\]" vcf.txt
122+
python -m pip install '.[vcf]'
123+
python -m bio2zarr vcf2zarr convert tests/data/vcf/sample.vcf.gz sample.vcz
124+
deactivate
125+
84126
packaging:
85127
name: Packaging
86128
runs-on: ubuntu-latest

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# 0.1.6 2025-0X-XX
22

3+
- Make format-specific dependencies optional (#XXX)
4+
35
- Add contigs to plink output (#344)
46

57
Breaking changes

bio2zarr/core.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import os
99
import os.path
1010
import threading
11+
import importlib
12+
import functools
1113
import time
1214

1315
import humanfriendly
@@ -21,6 +23,23 @@
2123
numcodecs.blosc.use_threads = False
2224

2325

26+
def requires_optional_dependency(module_name, extras_name):
27+
"""Decorator to check for optional dependencies"""
28+
def decorator(func):
29+
@functools.wraps(func)
30+
def wrapper(*args, **kwargs):
31+
try:
32+
importlib.import_module(module_name)
33+
except ImportError:
34+
raise ImportError(
35+
f"This process requires the optional {module_name} module. "
36+
f"Install it with: pip install bio2zarr[{extras_name}]"
37+
)
38+
return func(*args, **kwargs)
39+
return wrapper
40+
return decorator
41+
42+
2443
def display_number(x):
2544
ret = "n/a"
2645
if math.isfinite(x):

bio2zarr/plink.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import logging
22
import pathlib
33

4-
import bed_reader
54
import numpy as np
65
import zarr
76

@@ -11,7 +10,10 @@
1110

1211

1312
class PlinkFormat(vcz.Source):
13+
@core.requires_optional_dependency("bed_reader", "plink")
1414
def __init__(self, path):
15+
import bed_reader
16+
1517
self._path = pathlib.Path(path)
1618
self.bed = bed_reader.open_bed(path, num_threads=1, count_A1=False)
1719

@@ -175,7 +177,10 @@ def convert(
175177

176178
# FIXME do this more efficiently - currently reading the whole thing
177179
# in for convenience, and also comparing call-by-call
180+
@core.requires_optional_dependency("bed_reader", "plink")
178181
def validate(bed_path, zarr_path):
182+
import bed_reader
183+
179184
root = zarr.open(store=zarr_path, mode="r")
180185
call_genotype = root["call_genotype"][:]
181186

bio2zarr/tskit.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
import pathlib
33

44
import numpy as np
5-
import tskit
65

76
from bio2zarr import constants, core, vcz
87

98
logger = logging.getLogger(__name__)
109

1110

1211
class TskitFormat(vcz.Source):
12+
@core.requires_optional_dependency("tskit", "tskit")
1313
def __init__(
1414
self,
1515
ts_path,
@@ -18,6 +18,8 @@ def __init__(
1818
contig_id=None,
1919
isolated_as_missing=False,
2020
):
21+
import tskit
22+
2123
self._path = ts_path
2224
self.ts = tskit.load(ts_path)
2325
self.contig_id = contig_id if contig_id is not None else "1"

bio2zarr/vcf_utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99
from enum import Enum
1010
from typing import IO, Any
1111

12-
import cyvcf2
1312
import humanfriendly
1413
import numpy as np
1514

1615
from bio2zarr.typing import PathType
16+
from bio2zarr import core
1717

1818
logger = logging.getLogger(__name__)
1919

@@ -395,7 +395,10 @@ class VcfIndexType(Enum):
395395

396396

397397
class VcfFile(contextlib.AbstractContextManager):
398+
@core.requires_optional_dependency("cyvcf2", "vcf")
398399
def __init__(self, vcf_path, index_path=None):
400+
import cyvcf2
401+
399402
self.vcf = None
400403
self.file_type = None
401404
self.index_type = None

bio2zarr/vcz_verification.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import cyvcf2
21
import numpy as np
32
import numpy.testing as nt
43
import tqdm
54
import zarr
65

6+
from bio2zarr import core
77
from bio2zarr.zarr_utils import first_dim_iter
88

99
from . import constants
@@ -145,8 +145,10 @@ def assert_format_val_equal(vcf_val, zarr_val, vcf_type, vcf_number):
145145

146146
nt.assert_equal(vcf_val, zarr_val)
147147

148-
148+
@core.requires_optional_dependency("cyvcf2", "vcf")
149149
def verify(vcf_path, zarr_path, show_progress=False):
150+
import cyvcf2
151+
150152
root = zarr.open(store=zarr_path, mode="r")
151153
pos = root["variant_position"][:]
152154
allele = root["variant_allele"][:]

pyproject.toml

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,7 @@ dependencies = [
2121
"tabulate",
2222
"tqdm",
2323
"humanfriendly",
24-
# cyvcf2 also pulls in coloredlogs and click",
25-
# colouredlogs pulls in humanfriendly",
26-
"cyvcf2",
27-
"bed_reader",
28-
# TODO Using dev version of tskit for CI, FIXME before release
29-
"tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python",
24+
"coloredlogs"
3025
]
3126
requires-python = ">=3.10"
3227
classifiers = [
@@ -65,8 +60,21 @@ dev = [
6560
"pytest-coverage",
6661
"pytest-xdist",
6762
"sgkit>=0.8.0",
68-
"tqdm"
63+
"tqdm",
64+
"tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python",
65+
"bed_reader",
66+
"cyvcf2"
6967
]
68+
# TODO Using dev version of tskit for CI, FIXME before release
69+
tskit = ["tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python"]
70+
plink = ["bed_reader"]
71+
vcf = ["cyvcf2"]
72+
all = [
73+
"tskit @ git+https://github.com/tskit-dev/tskit.git@main#subdirectory=python",
74+
"bed_reader",
75+
"cyvcf2"
76+
]
77+
7078

7179
[tool.setuptools]
7280
packages = ["bio2zarr"]

tests/test_core.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,3 +244,13 @@ def test_examples(self, chunk_size, size, start, stop):
244244
)
245245
def test_du(path, expected):
246246
assert core.du(path) == expected
247+
248+
def test_decorator_missing_dependency():
249+
@core.requires_optional_dependency('non_existent_module', 'extras')
250+
def test_function():
251+
return "success"
252+
253+
with pytest.raises(ImportError) as exc_info:
254+
test_function()
255+
256+
assert "pip install bio2zarr[extras]" in str(exc_info.value)

tests/test_plink.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from unittest import mock
2+
13
import bed_reader
24
import numpy as np
35
import numpy.testing as nt
@@ -55,6 +57,21 @@ def test_genotypes(self, ds):
5557
],
5658
)
5759

60+
def test_missing_dependency(self):
61+
with mock.patch(
62+
"importlib.import_module",
63+
side_effect=ImportError("No module named 'bed_reader'"),
64+
):
65+
with pytest.raises(ImportError) as exc_info:
66+
plink.convert(
67+
"UNUSED_PATH",
68+
"UNUSED_PATH",
69+
)
70+
assert (
71+
"This process requires the optional bed_reader module. "
72+
"Install it with: pip install bio2zarr[plink]" in str(exc_info.value)
73+
)
74+
5875

5976
class TestEqualSgkit:
6077
def test_simulated_example(self, tmp_path):

tests/test_ts.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import tempfile
3+
from unittest import mock
34

45
import numpy as np
56
import pytest
@@ -93,6 +94,21 @@ def test_simple_tree_sequence(self, tmp_path):
9394
"sample_id",
9495
}
9596

97+
def test_missing_dependency(self):
98+
with mock.patch(
99+
"importlib.import_module",
100+
side_effect=ImportError("No module named 'tskit'"),
101+
):
102+
with pytest.raises(ImportError) as exc_info:
103+
ts.convert(
104+
"UNUSED_PATH",
105+
"UNUSED_PATH",
106+
)
107+
assert (
108+
"This process requires the optional tskit module. Install "
109+
"it with: pip install bio2zarr[tskit]" in str(exc_info.value)
110+
)
111+
96112

97113
class TestTskitFormat:
98114
"""Unit tests for TskitFormat without using full conversion."""

tests/test_vcf_examples.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import collections
22
import pathlib
33
import re
4+
from unittest import mock
45

56
import cyvcf2
67
import numpy as np
@@ -481,6 +482,22 @@ def test_small_example_all_missing_gts(self, ds, tmp_path_factory):
481482
p1[1] = True
482483
nt.assert_array_equal(p1, ds2["call_genotype_phased"].values)
483484

485+
def test_missing_dependency(self, tmp_path):
486+
with mock.patch(
487+
"importlib.import_module",
488+
side_effect=ImportError("No module named 'cyvcf2'"),
489+
):
490+
with pytest.raises(ImportError) as exc_info:
491+
vcf_mod.convert(
492+
["tests/data/vcf/sample.vcf.gz"],
493+
tmp_path / "example.vcf.zarr",
494+
worker_processes=0, # Synchronous mode so the mock works
495+
)
496+
assert (
497+
"This process requires the optional cyvcf2 module. Install "
498+
"it with: pip install bio2zarr[vcf]" in str(exc_info.value)
499+
)
500+
484501

485502
class TestSmallExampleLocalAlleles:
486503
data_path = "tests/data/vcf/sample.vcf.gz"

0 commit comments

Comments
 (0)