Skip to content

Commit 2abf626

Browse files
Report correct file sizes in inspect
Same as du -sb on Linux. Closes #142
1 parent e6b9a19 commit 2abf626

File tree

3 files changed

+37
-4
lines changed

3 files changed

+37
-4
lines changed

bio2zarr/core.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import dataclasses
44
import logging
55
import multiprocessing
6+
import os
7+
import os.path
68
import threading
79
import time
810

@@ -45,6 +47,22 @@ def chunk_aligned_slices(z, n, max_chunks=None):
4547
return slices
4648

4749

50+
def du(path):
51+
"""
52+
Return the total bytes stored at this path.
53+
"""
54+
total = os.path.getsize(path)
55+
# pathlib walk method doesn't exist until 3.12 :(
56+
for root, dirs, files in os.walk(path):
57+
for lst in [dirs, files]:
58+
for name in lst:
59+
fullname = os.path.join(root, name)
60+
size = os.path.getsize(fullname)
61+
total += size
62+
logger.debug(f"du({path}) = {total}")
63+
return total
64+
65+
4866
class SynchronousExecutor(cf.Executor):
4967
def submit(self, fn, /, *args, **kwargs):
5068
future = cf.Future()

bio2zarr/vcf.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import logging
77
import math
88
import os
9+
import os.path
910
import pathlib
1011
import pickle
1112
import shutil
@@ -1509,14 +1510,12 @@ class VcfZarr:
15091510
def __init__(self, path):
15101511
if not (path / ".zmetadata").exists():
15111512
raise ValueError("Not in VcfZarr format") # NEEDS TEST
1513+
self.path = path
15121514
self.root = zarr.open(path, mode="r")
15131515

1514-
def __repr__(self):
1515-
return repr(self.root) # NEEDS TEST
1516-
15171516
def summary_table(self):
15181517
data = []
1519-
arrays = [(a.nbytes_stored, a) for _, a in self.root.arrays()]
1518+
arrays = [(core.du(self.path / a.basename), a) for _, a in self.root.arrays()]
15201519
arrays.sort(key=lambda x: x[0])
15211520
for stored, array in reversed(arrays):
15221521
d = {

tests/test_core.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,19 @@ def test_5_chunk_1(self, n, expected):
179179
z = zarr.array(np.arange(5), chunks=1, dtype=int)
180180
result = core.chunk_aligned_slices(z, n)
181181
assert result == expected
182+
183+
184+
@pytest.mark.parametrize(
185+
("path", "expected"),
186+
[
187+
# NOTE: this data was generated using du -sb on a Linux system.
188+
# It *might* work in CI, but it may well not either, as it's
189+
# probably dependent on a whole bunch of things. Expect to fail
190+
# at some point.
191+
("tests/data", 4630726),
192+
("tests/data/vcf", 4618589),
193+
("tests/data/vcf/sample.vcf.gz", 1089),
194+
],
195+
)
196+
def test_du(path, expected):
197+
assert core.du(path) == expected

0 commit comments

Comments
 (0)