@@ -35,7 +35,6 @@ def __init__(self, file_path: Optional[Path] = None, **extra_paths: Path):
35
35
# Create a dictionary of all supplied file paths:
36
36
# {"": file_path, extra_name[0]: extra_path[0], extra_name[1]: extra_path[1], ...}
37
37
self ._file_paths : Dict [str , Path ] = {}
38
- self ._excel_files : Dict [str , pd .ExcelFile ] = {}
39
38
if file_path is not None :
40
39
self ._file_paths ["" ] = file_path
41
40
for name , path in extra_paths .items ():
@@ -64,37 +63,28 @@ def load(self) -> TabularData:
64
63
have no prefix, while the tables of all the extra files will be prefixed with the name of the key word argument
65
64
as supplied in the constructor.
66
65
"""
66
+
67
+ def lazy_sheet_loader (xls_file : pd .ExcelFile , xls_sheet_name : str ):
68
+ def sheet_loader ():
69
+ sheet_data = xls_file .parse (xls_sheet_name , header = self ._header_rows )
70
+ sheet_data = self ._remove_unnamed_column_placeholders (data = sheet_data )
71
+ sheet_data = self ._handle_duplicate_columns (data = sheet_data , sheet_name = xls_sheet_name )
72
+ return sheet_data
73
+
74
+ return sheet_loader
75
+
67
76
data : Dict [str , LazyDataFrame ] = {}
68
77
for name , path in self ._file_paths .items ():
69
- self . _excel_files [ name ] = pd .ExcelFile (path )
70
- for sheet_name in self . _excel_files [ name ] .sheet_names :
71
- loader = self . _load_sheet_wrapper ( name , sheet_name )
72
- if name :
78
+ excel_file = pd .ExcelFile (path )
79
+ for sheet_name in excel_file .sheet_names :
80
+ loader = lazy_sheet_loader ( excel_file , sheet_name )
81
+ if name != "" : # If the Excel file is not the main file, prefix the sheet name with the file name
73
82
sheet_name = f"{ name } .{ sheet_name } "
74
83
if sheet_name in data :
75
84
raise ValueError (f"Duplicate sheet name '{ sheet_name } '" )
76
85
data [sheet_name ] = loader
77
86
return TabularData (** data )
78
87
79
- def _load_sheet_wrapper (self , name : str , sheet_name : str ):
80
- """
81
- Load a single Excel sheet as a Pandas DataFrame.
82
-
83
- Args:
84
- name: the name of the file (empty string for the main sheet)
85
- sheet_name: the name of the sheet
86
-
87
- Returns: The contents the specified Excel sheet.
88
- """
89
-
90
- def wrapper ():
91
- sheet_data = self ._excel_files [name ].parse (sheet_name , header = self ._header_rows )
92
- sheet_data = self ._remove_unnamed_column_placeholders (data = sheet_data )
93
- sheet_data = self ._handle_duplicate_columns (data = sheet_data , sheet_name = sheet_name )
94
- return sheet_data
95
-
96
- return wrapper
97
-
98
88
def save (self , data : TabularData ) -> None :
99
89
"""
100
90
Store tabular data as one or more Excel file.
0 commit comments