Skip to content

Commit d758f0e

Browse files
authored
Merge pull request #168 from alliander-opensource/feature/lazy-excel-loading
Simplify lazy excel loading
2 parents 43c2949 + dbce5e0 commit d758f0e

File tree

1 file changed

+14
-24
lines changed

1 file changed

+14
-24
lines changed

src/power_grid_model_io/data_stores/excel_file_store.py

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ def __init__(self, file_path: Optional[Path] = None, **extra_paths: Path):
3535
# Create a dictionary of all supplied file paths:
3636
# {"": file_path, extra_name[0]: extra_path[0], extra_name[1]: extra_path[1], ...}
3737
self._file_paths: Dict[str, Path] = {}
38-
self._excel_files: Dict[str, pd.ExcelFile] = {}
3938
if file_path is not None:
4039
self._file_paths[""] = file_path
4140
for name, path in extra_paths.items():
@@ -64,37 +63,28 @@ def load(self) -> TabularData:
6463
have no prefix, while the tables of all the extra files will be prefixed with the name of the key word argument
6564
as supplied in the constructor.
6665
"""
66+
67+
def lazy_sheet_loader(xls_file: pd.ExcelFile, xls_sheet_name: str):
68+
def sheet_loader():
69+
sheet_data = xls_file.parse(xls_sheet_name, header=self._header_rows)
70+
sheet_data = self._remove_unnamed_column_placeholders(data=sheet_data)
71+
sheet_data = self._handle_duplicate_columns(data=sheet_data, sheet_name=xls_sheet_name)
72+
return sheet_data
73+
74+
return sheet_loader
75+
6776
data: Dict[str, LazyDataFrame] = {}
6877
for name, path in self._file_paths.items():
69-
self._excel_files[name] = pd.ExcelFile(path)
70-
for sheet_name in self._excel_files[name].sheet_names:
71-
loader = self._load_sheet_wrapper(name, sheet_name)
72-
if name:
78+
excel_file = pd.ExcelFile(path)
79+
for sheet_name in excel_file.sheet_names:
80+
loader = lazy_sheet_loader(excel_file, sheet_name)
81+
if name != "": # If the Excel file is not the main file, prefix the sheet name with the file name
7382
sheet_name = f"{name}.{sheet_name}"
7483
if sheet_name in data:
7584
raise ValueError(f"Duplicate sheet name '{sheet_name}'")
7685
data[sheet_name] = loader
7786
return TabularData(**data)
7887

79-
def _load_sheet_wrapper(self, name: str, sheet_name: str):
80-
"""
81-
Load a single Excel sheet as a Pandas DataFrame.
82-
83-
Args:
84-
name: the name of the file (empty string for the main sheet)
85-
sheet_name: the name of the sheet
86-
87-
Returns: The contents the specified Excel sheet.
88-
"""
89-
90-
def wrapper():
91-
sheet_data = self._excel_files[name].parse(sheet_name, header=self._header_rows)
92-
sheet_data = self._remove_unnamed_column_placeholders(data=sheet_data)
93-
sheet_data = self._handle_duplicate_columns(data=sheet_data, sheet_name=sheet_name)
94-
return sheet_data
95-
96-
return wrapper
97-
9888
def save(self, data: TabularData) -> None:
9989
"""
10090
Store tabular data as one or more Excel file.

0 commit comments

Comments
 (0)