6
6
"""
7
7
8
8
from pathlib import Path
9
- from typing import Any , Dict , List
9
+ from typing import Any , Callable , Dict , List
10
10
11
11
import pandas as pd
12
12
@@ -26,17 +26,24 @@ class CsvDirStore(BaseDataStore[TabularData]):
26
26
27
27
def __init__ (self , dir_path : Path , ** csv_kwargs ):
28
28
super ().__init__ ()
29
- self ._dir_path = dir_path
29
+ self ._dir_path = Path ( dir_path )
30
30
self ._csv_kwargs : Dict [str , Any ] = csv_kwargs
31
31
self ._header_rows : List [int ] = [0 ]
32
32
33
33
def load (self ) -> TabularData :
34
34
"""
35
- Load all CSV files in a directory as tabular data .
35
+ Create a lazy loader for all CSV files in a directory and store them in a TabularData instance .
36
36
"""
37
- data : Dict [str , pd .DataFrame ] = {}
37
+
38
+ def lazy_csv_loader (csv_path : Path ) -> Callable [[], pd .DataFrame ]:
39
+ def csv_loader ():
40
+ return pd .read_csv (filepath_or_buffer = csv_path , header = self ._header_rows , ** self ._csv_kwargs )
41
+
42
+ return csv_loader
43
+
44
+ data : Dict [str , Callable [[], pd .DataFrame ]] = {}
38
45
for path in self ._dir_path .glob ("*.csv" ):
39
- data [path .stem ] = pd . read_csv ( filepath_or_buffer = path , header = self . _header_rows , ** self . _csv_kwargs )
46
+ data [path .stem ] = lazy_csv_loader ( path )
40
47
41
48
return TabularData (** data )
42
49
0 commit comments