10
10
from pathlib import Path , PurePosixPath
11
11
import time
12
12
13
+ import pandas as pd
13
14
import tiledb
15
+ from tqdm .auto import tqdm
14
16
15
17
LOGGER = logging .getLogger (__name__ )
16
18
@@ -32,9 +34,26 @@ def register_table(source, chunk_size, local_path, remote_path):
32
34
LOGGER .info ("Writing data to TileDB" )
33
35
# Export table
34
36
if isinstance (source , (str , Path )):
35
- tiledb .from_csv (write_path , source , chunksize = chunk_size )
37
+ data_iterator = pd .read_csv (source , chunksize = chunk_size )
38
+ total_rows = None
36
39
else :
37
- tiledb .from_pandas (write_path , source , chunksize = chunk_size )
40
+ data_iterator = (source .iloc [i :i + chunk_size ]
41
+ for i in range (0 , len (source ), chunk_size ))
42
+ total_rows = len (source )
43
+ progress_monitor = tqdm (
44
+ desc = "Generating TileDB file..." , initial = 1 , dynamic_ncols = True ,
45
+ total = total_rows ,
46
+ bar_format = '{desc}: {percentage:3.0f}%|{bar}| '
47
+ '{n_fmt}/{total_fmt} rows, {elapsed} {postfix}' )
48
+ row_idx = 0
49
+ for chunk in data_iterator :
50
+ tiledb .from_pandas (write_path , chunk , sparse = True , full_domain = True ,
51
+ tile = 10000 , attr_filters = None ,
52
+ row_start_idx = row_idx , allows_duplicates = False ,
53
+ mode = "append" if row_idx else "ingest" )
54
+ progress_monitor .update (len (chunk ))
55
+ row_idx += len (chunk )
56
+ progress_monitor .close ()
38
57
LOGGER .debug ("Appending metadata to TileDB" )
39
58
# Append omero metadata
40
59
with tiledb .open (write_path , mode = "w" ) as array :
0 commit comments