Skip to content

Commit cc19a77

Browse files
committed
Add progress bar and match formal schema
1 parent 66a9f85 commit cc19a77

File tree

2 files changed

+23
-5
lines changed

2 files changed

+23
-5
lines changed

omero2pandas/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,9 +242,8 @@ def upload_table(source, table_name, parent_id=None, parent_type='Image',
242242
if local_path or remote_path:
243243
if not register_table:
244244
raise ValueError("Remote table support is not installed")
245-
ann_id = register_table(source, table_name, links,
246-
connector.server, chunk_size,
247-
local_path, remote_path)
245+
ann_id = register_table(
246+
source, chunk_size, local_path, remote_path)
248247
else:
249248
ann_id = create_table(source, table_name, links, conn, chunk_size)
250249
if ann_id is None:

omero2pandas/remote.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
from pathlib import Path, PurePosixPath
1111
import time
1212

13+
import pandas as pd
1314
import tiledb
15+
from tqdm.auto import tqdm
1416

1517
LOGGER = logging.getLogger(__name__)
1618

@@ -32,9 +34,26 @@ def register_table(source, chunk_size, local_path, remote_path):
3234
LOGGER.info("Writing data to TileDB")
3335
# Export table
3436
if isinstance(source, (str, Path)):
35-
tiledb.from_csv(write_path, source, chunksize=chunk_size)
37+
data_iterator = pd.read_csv(source, chunksize=chunk_size)
38+
total_rows = None
3639
else:
37-
tiledb.from_pandas(write_path, source, chunksize=chunk_size)
40+
data_iterator = (source.iloc[i:i + chunk_size]
41+
for i in range(0, len(source), chunk_size))
42+
total_rows = len(source)
43+
progress_monitor = tqdm(
44+
desc="Generating TileDB file...", initial=1, dynamic_ncols=True,
45+
total=total_rows,
46+
bar_format='{desc}: {percentage:3.0f}%|{bar}| '
47+
'{n_fmt}/{total_fmt} rows, {elapsed} {postfix}')
48+
row_idx = 0
49+
for chunk in data_iterator:
50+
tiledb.from_pandas(write_path, chunk, sparse=True, full_domain=True,
51+
tile=10000, attr_filters=None,
52+
row_start_idx=row_idx, allows_duplicates=False,
53+
mode="append" if row_idx else "ingest")
54+
progress_monitor.update(len(chunk))
55+
row_idx += len(chunk)
56+
progress_monitor.close()
3857
LOGGER.debug("Appending metadata to TileDB")
3958
# Append omero metadata
4059
with tiledb.open(write_path, mode="w") as array:

0 commit comments

Comments
 (0)