Skip to content

Commit 0e42c44

Browse files
authored
Merge pull request #161 from sassoftware/1.9.1
v1.9.1
2 parents f401981 + 0d9abb6 commit 0e42c44

19 files changed

+295
-241
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.

examples/data/hmeqModels/H2OMOJOGLM/fileMetadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@
1313
},
1414
{
1515
"role": "scoreResource",
16-
"name": "glmfit.pickle"
16+
"name": "glmfit.mojo"
1717
}
1818
]
374 Bytes
Binary file not shown.
-94.1 KB
Binary file not shown.
Binary file not shown.

examples/data/hmeqModels/H2OBinaryGLM/score_glmfit_mojo.py renamed to examples/data/hmeqModels/H2OMOJOGLM/score_glmfit_mojo.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,15 @@
1313

1414
h2o.init()
1515

16-
with gzip.open(Path(settings.pickle_path) / "{model_file_name}", "r") as fileIn, open(Path(settings.pickle_path) / "glmfit.zip", "wb") as fileOut:
17-
shutil.copyfileobj(fileIn, fileOut)
18-
os.chmod(Path(settings.pickle_path) / "glmfit.zip", 0o777)
19-
model = h2o.import_mojo(Path(settings.pickle_path) / "glmfit.zip")
16+
model = h2o.import_mojo(Path(settings.pickle_path))
2017

2118
def score(LOAN, MORTDUE, VALUE, REASON, JOB, YOJ, DEROG, DELINQ, CLAGE, NINQ, CLNO, DEBTINC):
2219
"Output: EM_CLASSIFICATION, EM_EVENTPROBABILITY"
2320

2421
try:
2522
global model
2623
except NameError:
27-
model = h2o.import_mojo(Path(settings.pickle_path) / "glmfit.zip")
24+
model = h2o.import_mojo(Path(settings.pickle_path))
2825

2926
try:
3027
if math.isnan(LOAN):

examples/pzmm_binary_classification_model_import.ipynb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,11 @@
648648
"\n",
649649
"# Serialize the models to a pickle format\n",
650650
"for (mod, prefix, path) in zip(model, model_prefix, zip_folder):\n",
651-
" pzmm.PickleModel.pickle_trained_model(trained_model=mod, model_prefix=prefix, pickle_path=path)"
651+
" pzmm.PickleModel.pickle_trained_model(\n",
652+
" model_prefix=prefix,\n",
653+
" trained_model=mod,\n",
654+
" pickle_path=path\n",
655+
" )"
652656
]
653657
},
654658
{

examples/pzmm_h2o_model_import.ipynb

Lines changed: 106 additions & 132 deletions
Large diffs are not rendered by default.

examples/pzmm_mlflow_model_import.ipynb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,11 @@
204204
"source": [
205205
"model_prefix = \"MLFlowModel\"\n",
206206
"zip_folder = Path.cwd() / \"data/MLFlowModels/Model1/\"\n",
207-
"pzmm.PickleModel.pickle_trained_model(trained_model=None, model_prefix=model_prefix, pickle_path=zip_folder, mlflow_details=metadata_dict)"
207+
"pzmm.PickleModel.pickle_trained_model(\n",
208+
" model_prefix=model_prefix, \n",
209+
" pickle_path=zip_folder, \n",
210+
" mlflow_details=metadata_dict\n",
211+
")"
208212
]
209213
},
210214
{

examples/pzmm_regression_model_import.ipynb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,11 @@
378378
"# Output variables expected in SAS Model Manager\n",
379379
"score_metrics = [\"EM_PREDICTION\"]\n",
380380
"\n",
381-
"pzmm.PickleModel.pickle_trained_model(lrm, model_prefix, zip_folder)"
381+
"pzmm.PickleModel.pickle_trained_model(\n",
382+
" model_prefix=model_prefix,\n",
383+
" trained_model=lrm,\n",
384+
" pickle_path=zip_folder\n",
385+
")"
382386
]
383387
},
384388
{

src/sasctl/pzmm/import_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def import_model(
136136
model_prefix: str,
137137
project: Union[str, dict, RestObj],
138138
input_data: Optional[DataFrame] = None,
139-
predict_method: [Callable[..., List], List[Any]] = None,
139+
predict_method: Union[Callable[..., List], List[Any]] = None,
140140
score_metrics: Optional[List[str]] = None,
141141
pickle_type: str = "pickle",
142142
project_version: str = "latest",

src/sasctl/pzmm/pickle_model.py

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@
66
import pickle
77
import shutil
88
from pathlib import Path
9-
from typing import Optional, Union
9+
from typing import Any, Optional, Union
10+
11+
try:
12+
import h2o
13+
except ImportError:
14+
h2o = None
1015

1116
from ..utils.misc import check_if_jupyter
1217

@@ -20,8 +25,8 @@ class PickleModel:
2025
@classmethod
2126
def pickle_trained_model(
2227
cls,
23-
trained_model,
24-
model_prefix,
28+
model_prefix: str,
29+
trained_model: Optional[Any] = None,
2530
pickle_path: Union[str, Path, None] = None,
2631
is_h2o_model: bool = False,
2732
is_binary_model: bool = False,
@@ -40,12 +45,11 @@ def pickle_trained_model(
4045
4146
Parameters
4247
---------------
43-
trained_model : model object, str, or Path
44-
For non-H2O models, this argument contains the model variable. Otherwise,
45-
this should be the file path of the MOJO file.
4648
model_prefix : str or Path
4749
Variable name for the model to be displayed in SAS Open Model Manager
4850
(i.e. hmeqClassTree + [Score.py || .pickle]).
51+
trained_model : model object
52+
The trained model to be exported.
4953
pickle_path : str, optional
5054
File location for the output pickle file. The default value is None.
5155
is_h2o_model : bool, optional
@@ -60,7 +64,7 @@ def pickle_trained_model(
6064
file. The default value is False.
6165
mlflow_details : dict, optional
6266
Model details from an MLFlow model. This dictionary is created by the
63-
readMLModelFile function. The default value is None
67+
readMLModelFile function. The default value is None.
6468
6569
Returns
6670
-------
@@ -106,22 +110,29 @@ def pickle_trained_model(
106110
)
107111
else:
108112
return {model_prefix + PICKLE: pickle.dumps(trained_model)}
109-
# For binary H2O models, rename the binary file as a pickle file
110-
elif is_binary_model and pickle_path:
111-
binary_file = Path(pickle_path) / model_prefix
112-
binary_file.rename(binary_file.with_suffix(PICKLE))
113-
# For MOJO H2O models, gzip the model file and adjust the file extension
113+
# For binary H2O models, save the binary file as a "pickle" file
114+
elif is_h2o_model and is_binary_model and pickle_path:
115+
if not h2o:
116+
raise RuntimeError(
117+
"The h2o package is required to save the model as a binary h2o"
118+
"model."
119+
)
120+
h2o.save_model(
121+
model=trained_model,
122+
force=True,
123+
path=str(pickle_path),
124+
filename=f"{model_prefix}.pickle",
125+
)
126+
# For MOJO H2O models, save as a mojo file and adjust the extension to .mojo
114127
elif is_h2o_model and pickle_path:
115-
with open(Path(trained_model), "rb") as fileIn, gzip.open(
116-
Path(pickle_path) / (model_prefix + ".mojo"), "wb"
117-
) as fileOut:
118-
fileOut.writelines(fileIn)
119-
if cls.notebook_output:
120-
print(
121-
f"MOJO model {model_prefix} was successfully gzipped and saved "
122-
f"to {Path(pickle_path) / (model_prefix + '.mojo')}."
128+
if not h2o:
129+
raise RuntimeError(
130+
"The h2o package is required to save the model as a mojo model."
123131
)
124-
else:
132+
trained_model.save_mojo(
133+
force=True, path=str(pickle_path), filename=f"{model_prefix}.mojo"
134+
)
135+
elif is_binary_model or is_h2o_model:
125136
raise ValueError(
126137
"There is currently no support for file-less H2O.ai model handling."
127138
" Please include a value for the pickle_path argument."

src/sasctl/pzmm/write_json_files.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,27 @@
2323
from ..utils.decorators import deprecated
2424
from ..utils.misc import check_if_jupyter
2525

26+
try:
27+
# noinspection PyPackageRequirements
28+
import numpy as np
29+
30+
class NpEncoder(json.JSONEncoder):
31+
def default(self, obj):
32+
if isinstance(obj, np.integer):
33+
return int(obj)
34+
if isinstance(obj, np.floating):
35+
return float(obj)
36+
if isinstance(obj, np.ndarray):
37+
return obj.tolist()
38+
return json.JSONEncoder.default(self, obj)
39+
40+
except ImportError:
41+
np = None
42+
43+
class NpEncoder(json.JSONEncoder):
44+
pass
45+
46+
2647
# TODO: add converter for any type of dataset (list, dataframe, numpy array)
2748

2849
# Constants
@@ -125,17 +146,17 @@ def write_var_json(
125146
file_name = OUTPUT
126147

127148
with open(Path(json_path) / file_name, "w") as json_file:
128-
json_file.write(json.dumps(dict_list, indent=4))
149+
json_file.write(json.dumps(dict_list, indent=4, cls=NpEncoder))
129150
if cls.notebook_output:
130151
print(
131152
f"{file_name} was successfully written and saved to "
132153
f"{Path(json_path) / file_name}"
133154
)
134155
else:
135156
if is_input:
136-
return {INPUT: json.dumps(dict_list)}
157+
return {INPUT: json.dumps(dict_list, indent=4, cls=NpEncoder)}
137158
else:
138-
return {OUTPUT: json.dumps(dict_list)}
159+
return {OUTPUT: json.dumps(dict_list, indent=4, cls=NpEncoder)}
139160

140161
@staticmethod
141162
def generate_variable_properties(
@@ -329,17 +350,17 @@ def write_model_properties_json(
329350
)
330351

331352
if not target_values:
332-
model_function = "Prediction"
353+
model_function = model_function if model_function else "Prediction"
333354
target_level = "INTERVAL"
334355
target_event = ""
335356
event_prob_var = ""
336357
elif isinstance(target_values, list) and len(target_values) == 2:
337-
model_function = "Classification"
358+
model_function = model_function if model_function else "Classification"
338359
target_level = "BINARY"
339360
target_event = str(target_values[0])
340361
event_prob_var = f"P_{target_values[0]}"
341362
elif isinstance(target_values, list) and len(target_values) > 2:
342-
model_function = "Classification"
363+
model_function = model_function if model_function else "Classification"
343364
target_level = "NOMINAL"
344365
target_event = ""
345366
event_prob_var = ""
@@ -577,14 +598,14 @@ def input_fit_statistics(
577598

578599
if json_path:
579600
with open(Path(json_path) / FITSTAT, "w") as json_file:
580-
json_file.write(json.dumps(json_dict, indent=4))
601+
json_file.write(json.dumps(json_dict, indent=4, cls=NpEncoder))
581602
if cls.notebook_output:
582603
print(
583604
f"{FITSTAT} was successfully written and saved to "
584605
f"{Path(json_path) / FITSTAT}"
585606
)
586607
else:
587-
return {FITSTAT: json.dumps(json_dict, indent=4)}
608+
return {FITSTAT: json.dumps(json_dict, indent=4, cls=NpEncoder)}
588609

589610
@classmethod
590611
def add_tuple_to_fitstat(
@@ -861,17 +882,17 @@ def calculate_model_statistics(
861882
if json_path:
862883
for name in [FITSTAT, ROC, LIFT]:
863884
with open(Path(json_path) / name, "w") as json_file:
864-
json_file.write(json.dumps(json_dict, indent=4))
885+
json_file.write(json.dumps(json_dict, indent=4, cls=NpEncoder))
865886
if cls.notebook_output:
866887
print(
867888
f"{name} was successfully written and saved to "
868889
f"{Path(json_path) / name}"
869890
)
870891
else:
871892
return {
872-
FITSTAT: json.dumps(json_dict[0], indent=4),
873-
ROC: json.dumps(json_dict[1], indent=4),
874-
LIFT: json.dumps(json_dict[2], indent=4),
893+
FITSTAT: json.dumps(json_dict[0], indent=4, cls=NpEncoder),
894+
ROC: json.dumps(json_dict[1], indent=4, cls=NpEncoder),
895+
LIFT: json.dumps(json_dict[2], indent=4, cls=NpEncoder),
875896
}
876897

877898
@staticmethod
@@ -1020,11 +1041,11 @@ def apply_dataframe_to_json(
10201041
values from the SAS CAS percentile action set added in.
10211042
"""
10221043
for row_num in range(len(stat_df)):
1023-
row_dict = stat_df.iloc[row_num].to_dict()
1044+
row_dict = stat_df.iloc[row_num].replace(float("nan"), None).to_dict()
10241045
json_dict[row_num + partition * len(stat_df)]["dataMap"].update(row_dict)
10251046
return json_dict
10261047

1027-
# noinspection PyCallingNonCallable,PyNestedDecorators
1048+
# noinspection PyCallingNonCallable, PyNestedDecorators
10281049
@deprecated(
10291050
"Please use the calculate_model_statistics method instead.",
10301051
version="1.9",

src/sasctl/pzmm/write_score_code.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def write_score_code(
2424
cls,
2525
model_prefix: str,
2626
input_data: Union[DataFrame, List[dict]],
27-
predict_method: [Callable[..., List], List[Any]],
27+
predict_method: Union[Callable[..., List], List[Any]],
2828
target_variable: Optional[str] = None,
2929
target_values: Optional[List] = None,
3030
score_metrics: Optional[List[str]] = None,
@@ -515,23 +515,8 @@ def _viya4_model_load(
515515
pickle_type = pickle_type if pickle_type else "pickle"
516516

517517
if mojo_model:
518-
cls.score_code += (
519-
f"with gzip.open(Path(settings.pickle_path) / "
520-
'"{model_file_name}", "r") as fileIn, '
521-
"open(Path(settings.pickle_path) / "
522-
f"\"{str(Path(model_file_name).with_suffix('.zip'))}\","
523-
f" \"wb\") as fileOut:\n{'':4}shutil.copyfileobj(fileIn,"
524-
" fileOut)\nos.chmod(Path(settings.pickle_path) / "
525-
f"\"{str(Path(model_file_name).with_suffix('.zip'))}\""
526-
", 0o777)\nmodel = h2o.import_mojo("
527-
"Path(settings.pickle_path) / "
528-
f"\"{str(Path(model_file_name).with_suffix('.zip'))}\")"
529-
"\n\n"
530-
)
531-
return (
532-
f"{'':8}model = h2o.import_mojo(Path(settings.pickle_path) / "
533-
f"\"{str(Path(model_file_name).with_suffix('.zip'))}\")\n\n"
534-
)
518+
cls.score_code += "model = h2o.import_mojo(Path(settings.pickle_path))\n\n"
519+
return f"{'':8}model = h2o.import_mojo(Path(settings.pickle_path))\n\n"
535520
elif binary_h2o_model:
536521
cls.score_code += "model = h2o.load(Path(settings.pickle_path))\n\n"
537522
return f"{'':8}model = h2o.load(Path(settings.pickle_path))\n\n"

tests/unit/test_misc_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
# SPDX-License-Identifier: Apache-2.0
66

77
import re
8-
from unittest.mock import patch, PropertyMock
8+
from unittest.mock import PropertyMock, patch
9+
910

1011
def test_list_packages():
1112
from sasctl.utils.misc import installed_packages

0 commit comments

Comments
 (0)