Skip to content

Commit 2cd61a0

Browse files
Add tabdata encoding (#466)
* Allow passing encoding for TabData files
1 parent 5022cfa commit 2cd61a0

File tree

5 files changed

+109
-30
lines changed

5 files changed

+109
-30
lines changed

docs/whats_new.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
What's New
22
==========
3-
v3.14.3 (2025/XX/XX)
3+
v3.14.3 (2025/03/XX)
44
--------------------
55
New Features
66
~~~~~~~~~~~~
@@ -23,6 +23,7 @@ Performance
2323

2424
Internal Changes
2525
~~~~~~~~~~~~~~~~
26+
- Allow passing encoding for TabData files (:issue:`455`). (`@enekomartinmartinez <https://github.com/enekomartinmartinez>`_)
2627

2728
v3.14.2 (2024/11/12)
2829
--------------------

pysd/py_backend/data.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,9 @@ def get_columns(cls, file_name, vars=None, encoding=None):
125125
file_name: str
126126
Output file to read. Must be csv or tab.
127127
128-
vars: list
129-
List of var names to find in the file.
128+
vars: list or None (optional)
129+
List of var names to find in the file. If None all variables
130+
will be returned. Default is None.
130131
131132
encoding: str or None (optional)
132133
Encoding type to read output file. Needed if the file has special
@@ -268,14 +269,20 @@ def __init__(self, real_name, py_name, coords, interp="interpolate"):
268269
+ "'raw', 'interpolate', "
269270
+ "'look_forward' or 'hold_backward'")
270271

271-
def load_data(self, file_names):
272+
def load_data(self, file_names, encoding=None):
272273
"""
273274
Load data values from files.
274275
275276
Parameters
276277
----------
277278
file_names: list or str or pathlib.Path
278279
Name of the files to search the variable in.
280+
encoding: list or str or None (optional)
281+
Encoding to be used by the data readers. If a list is given,
282+
then file_names should be a list of the same lenght. If
283+
None or a string is given, this value will be used for all
284+
of them. See documentation from pandas.read_table for
285+
further information. Default is None.
279286
280287
Returns
281288
-------
@@ -285,9 +292,11 @@ def load_data(self, file_names):
285292
"""
286293
if isinstance(file_names, (str, Path)):
287294
file_names = [file_names]
295+
if isinstance(encoding, str) or encoding is None:
296+
encoding = [encoding]*len(file_names)
288297

289-
for file_name in file_names:
290-
self.data = self._load_data(Path(file_name))
298+
for file_name, encoding_df in zip(file_names, encoding):
299+
self.data = self._load_data(Path(file_name), encoding_df)
291300
if self.data is not None:
292301
break
293302

@@ -297,7 +306,7 @@ def load_data(self, file_names):
297306
f"Data for {self.real_name} not found in "
298307
f"{', '.join([str(file_name) for file_name in file_names])}")
299308

300-
def _load_data(self, file_name):
309+
def _load_data(self, file_name, encoding):
301310
"""
302311
Load data values from output
303312
@@ -317,7 +326,10 @@ def _load_data(self, file_name):
317326
if file_name.suffix in [".csv", ".tab"]:
318327

319328
columns, transpose = Columns.get_columns(
320-
file_name, vars=[self.real_name, self.py_name])
329+
file_name,
330+
vars=[self.real_name, self.py_name],
331+
encoding=encoding
332+
)
321333

322334
if not columns:
323335
# the variable is not in the passed file

pysd/py_backend/model.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class Macro(DynamicStateful):
7373
"""
7474
def __init__(self, py_model_file, params=None, return_func=None,
7575
time=None, time_initialization=None, data_files=None,
76-
py_name=None):
76+
data_files_encoding=None, py_name=None):
7777
super().__init__()
7878
self.time = time
7979
self.time_initialization = time_initialization
@@ -158,7 +158,7 @@ def __init__(self, py_model_file, params=None, return_func=None,
158158

159159
# Load data files
160160
if data_files:
161-
self._get_data(data_files)
161+
self._get_data(data_files, data_files_encoding)
162162

163163
# Assign the cache type to each variable
164164
self._assign_cache_type()
@@ -221,14 +221,19 @@ def clean_caches(self):
221221
# if nested macros
222222
[macro.clean_caches() for macro in self._macro_elements]
223223

224-
def _get_data(self, data_files):
224+
def _get_data(self, data_files, encoding):
225+
"""Load Data for TabData objects"""
225226
if isinstance(data_files, dict):
226227
for data_file, vars in data_files.items():
228+
if isinstance(encoding, dict):
229+
encoding_df = encoding.get(data_file, None)
230+
else:
231+
encoding_df = encoding
227232
for var in vars:
228233
found = False
229234
for element in self._data_elements:
230235
if var in [element.py_name, element.real_name]:
231-
element.load_data(data_file)
236+
element.load_data(data_file, encoding_df)
232237
found = True
233238
break
234239
if not found:
@@ -237,7 +242,7 @@ def _get_data(self, data_files):
237242

238243
else:
239244
for element in self._data_elements:
240-
element.load_data(data_files)
245+
element.load_data(data_files, encoding)
241246

242247
def _get_initialize_order(self):
243248
"""
@@ -1396,11 +1401,13 @@ class Model(Macro):
13961401
:class:`pysd.py_backend.model.Macro`
13971402
13981403
"""
1399-
def __init__(self, py_model_file, data_files, initialize, missing_values):
1404+
def __init__(self, py_model_file, data_files, data_files_encoding,
1405+
initialize, missing_values):
14001406
""" Sets up the Python objects """
14011407
super().__init__(py_model_file, None, None, Time(),
14021408
data_files=data_files)
14031409
self.data_files = data_files
1410+
self.data_files_encoding = data_files_encoding
14041411
self.missing_values = missing_values
14051412
# set time component
14061413
self.time.stage = 'Load'
@@ -2159,6 +2166,7 @@ def copy(self, reload=False):
21592166
new_model = type(self)(
21602167
py_model_file=deepcopy(self.py_model_file),
21612168
data_files=deepcopy(self.data_files),
2169+
data_files_encoding=deepcopy(self.data_files_encoding),
21622170
initialize=initialize,
21632171
missing_values=deepcopy(self.missing_values)
21642172
)
@@ -2194,6 +2202,7 @@ def reload(self):
21942202
21952203
"""
21962204
self.__init__(self.py_model_file, data_files=self.data_files,
2205+
data_files_encoding=self.data_files_encoding,
21972206
initialize=True,
21982207
missing_values=self.missing_values)
21992208

pysd/pysd.py

Lines changed: 71 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
)
2525

2626

27-
def read_xmile(xmile_file, data_files=None, initialize=True,
28-
missing_values="warning"):
27+
def read_xmile(xmile_file, data_files=None, data_files_encoding=None,
28+
initialize=True, missing_values="warning"):
2929
"""
3030
Construct a model from a Xmile file.
3131
@@ -38,9 +38,20 @@ def read_xmile(xmile_file, data_files=None, initialize=True,
3838
If False, the model will not be initialize when it is loaded.
3939
Default is True.
4040
41-
data_files: list or str or None (optional)
42-
If given the list of files where the necessary data to run the model
43-
is given. Default is None.
41+
data_files: dict or list or str or None
42+
The dictionary with keys the name of file and variables to
43+
load the data from there. Or the list of names or name of the
44+
file to search the data in. Only works for TabData type object
45+
and it is neccessary to provide it. Default is None.
46+
47+
data_files_encoding: list or str or dict or None (optional)
48+
Encoding for data_files. If a string or None is passed this
49+
value will be used for all the files. If data_files is a list,
50+
a list of the same length could be used to specify different
51+
encodings. If data_files is a dictionary, a dictionary with the
52+
same keys could be used, being the values the encodings. See
53+
documentation from pandas.read_table for further information.
54+
Default is None.
4455
4556
missing_values: str ("warning", "error", "ignore", "keep") (optional)
4657
What to do with missing values. If "warning" (default)
@@ -75,15 +86,20 @@ def read_xmile(xmile_file, data_files=None, initialize=True,
7586
py_model_file = ModelBuilder(abs_model).build_model()
7687

7788
# load Python file
78-
model = load(py_model_file, data_files, initialize, missing_values)
89+
model = load(
90+
py_model_file,
91+
data_files, data_files_encoding,
92+
initialize,
93+
missing_values
94+
)
7995
model.xmile_file = str(xmile_file)
8096

8197
return model
8298

8399

84-
def read_vensim(mdl_file, data_files=None, initialize=True,
85-
missing_values="warning", split_views=False,
86-
encoding=None, **kwargs):
100+
def read_vensim(mdl_file, data_files=None, data_files_encoding=None,
101+
initialize=True, missing_values="warning",
102+
split_views=False, encoding=None, **kwargs):
87103
"""
88104
Construct a model from Vensim `.mdl` file.
89105
@@ -96,9 +112,29 @@ def read_vensim(mdl_file, data_files=None, initialize=True,
96112
If False, the model will not be initialize when it is loaded.
97113
Default is True.
98114
99-
data_files: list or str or None (optional)
100-
If given the list of files where the necessary data to run the model
101-
is given. Default is None.
115+
data_files: dict or list or str or None
116+
The dictionary with keys the name of file and variables to
117+
load the data from there. Or the list of names or name of the
118+
file to search the data in. Only works for TabData type object
119+
and it is neccessary to provide it. Default is None.
120+
121+
data_files_encoding: list or str or dict or None (optional)
122+
Encoding for data_files. If a string or None is passed this
123+
value will be used for all the files. If data_files is a list,
124+
a list of the same length could be used to specify different
125+
encodings. If data_files is a dictionary, a dictionary with the
126+
same keys could be used, being the values the encodings. See
127+
documentation from pandas.read_table for further information.
128+
Default is None.
129+
130+
data_files_encoding: list or str or dict or None (optional)
131+
Encoding for data_files. If a string or None is passed this
132+
value will be used for all the files. If data_files is a list,
133+
a list of the same length could be used to specify different
134+
encodings. If data_files is a dictionary, a dictionary with the
135+
same keys could be used, being the values the encodings. See
136+
documentation from pandas.read_table for further information.
137+
Default is None.
102138
103139
missing_values: str ("warning", "error", "ignore", "keep") (optional)
104140
What to do with missing values. If "warning" (default)
@@ -155,14 +191,19 @@ def read_vensim(mdl_file, data_files=None, initialize=True,
155191
py_model_file = ModelBuilder(abs_model).build_model()
156192

157193
# load Python file
158-
model = load(py_model_file, data_files, initialize, missing_values)
194+
model = load(
195+
py_model_file,
196+
data_files, data_files_encoding,
197+
initialize,
198+
missing_values
199+
)
159200
model.mdl_file = str(mdl_file)
160201

161202
return model
162203

163204

164-
def load(py_model_file, data_files=None, initialize=True,
165-
missing_values="warning"):
205+
def load(py_model_file, data_files=None, data_files_encoding=None,
206+
initialize=True, missing_values="warning"):
166207
"""
167208
Load a Python-converted model file.
168209
@@ -182,6 +223,15 @@ def load(py_model_file, data_files=None, initialize=True,
182223
file to search the data in. Only works for TabData type object
183224
and it is neccessary to provide it. Default is None.
184225
226+
data_files_encoding: list or str or dict or None (optional)
227+
Encoding for data_files. If a string or None is passed this
228+
value will be used for all the files. If data_files is a list,
229+
a list of the same length could be used to specify different
230+
encodings. If data_files is a dictionary, a dictionary with the
231+
same keys could be used, being the values the encodings. See
232+
documentation from pandas.read_table for further information.
233+
Default is None.
234+
185235
missing_values : str ("warning", "error", "ignore", "keep") (optional)
186236
What to do with missing values. If "warning" (default)
187237
shows a warning message and interpolates the values.
@@ -195,4 +245,9 @@ def load(py_model_file, data_files=None, initialize=True,
195245
>>> model = load('../tests/test-models/samples/teacup/teacup.py')
196246
197247
"""
198-
return Model(py_model_file, data_files, initialize, missing_values)
248+
return Model(
249+
py_model_file,
250+
data_files, data_files_encoding,
251+
initialize,
252+
missing_values
253+
)

tests/pytest_types/external/pytest_external.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,9 @@ def test_constant_0d(self, _root):
13821382
data2.initialize()
13831383

13841384
assert data() == -1
1385+
assert type(data()) == float
13851386
assert data2() == 0
1387+
assert type(data2()) == float
13861388

13871389
def test_constant_n0d(self, _root):
13881390
"""

0 commit comments

Comments
 (0)