Skip to content

Model info #197

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 37 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
303b371
feat: re-added model info
jlwalke2 Aug 5, 2024
c7c143f
style: replace 'is' with '==' to eliminate warnings
jlwalke2 Aug 8, 2024
41c46ea
bug: allow binary file to be passed
jlwalke2 Aug 14, 2024
d5d6e39
feat: allow passing inline json
jlwalke2 Aug 14, 2024
a24b903
feat: simple parsing of pytorch models
jlwalke2 Aug 14, 2024
72097a3
chore: directly expose get_model_info()
jlwalke2 Aug 14, 2024
8228d02
fix: handle case where scikit model output not provided
jlwalke2 Aug 14, 2024
3953dd2
fix: close zip file before reading bytes
jlwalke2 Aug 14, 2024
4b0016b
feat: handle file-like or raw bytes
jlwalke2 Aug 14, 2024
b5c0e06
chore: use pzmm for open source models
jlwalke2 Aug 14, 2024
acc128b
fix: do not print unless running in a notebook
jlwalke2 Aug 14, 2024
816c0c2
fix: remove hooks
jlwalke2 Aug 14, 2024
58d32f1
feat: pass additional info when registering model
jlwalke2 Aug 14, 2024
eaba956
chore: misc cleanup
jlwalke2 Aug 15, 2024
5226d24
feat: generate score code
jlwalke2 Aug 15, 2024
8a86c86
chore: misc cleanup
jlwalke2 Aug 16, 2024
3abd251
fix: skip for Viya 4+
jlwalke2 Aug 16, 2024
0770141
fix: ignore spaces in env var
jlwalke2 Aug 16, 2024
5f79aeb
fix: use sanitized model names in file names.
jlwalke2 Aug 16, 2024
e74510a
fix: use instance methods & variables when manipulating model data
jlwalke2 Aug 16, 2024
7ced3f6
fix: score code generation creates score() not predict()
jlwalke2 Aug 16, 2024
7f7c900
chore: remove obsolete code
jlwalke2 Aug 16, 2024
b312107
fix: updated for instance methods
jlwalke2 Aug 19, 2024
5c0c903
fix: new pandas behavior
jlwalke2 Aug 19, 2024
aa6482a
feat: require dill
jlwalke2 Aug 19, 2024
8c87f9e
test: remove obsolete tests
jlwalke2 Aug 20, 2024
ce74c53
feat: reshape 3+d tensors
jlwalke2 Aug 22, 2024
793a1fe
fix: update for tree-based models
jlwalke2 Aug 22, 2024
bedaadb
fix: update for tree-based models
jlwalke2 Aug 22, 2024
48ec416
feat: use model_info. rename input to X.
jlwalke2 Aug 22, 2024
12f8b5b
test: update for changes to viya & pandas
jlwalke2 Aug 22, 2024
6fbaa2a
fix: convert non str/bytes to str.
jlwalke2 Aug 22, 2024
72630a9
feat: allow passing files using pathlib
jlwalke2 Aug 23, 2024
bc59d77
test: test case update for pzmm and cassette refresh
jlwalke2 Aug 23, 2024
f421917
Merge branch 'master' of github.com:sassoftware/python-sasctl into mo…
jlwalke2 Aug 23, 2024
3d1b0c5
chore: black formatting
jlwalke2 Aug 23, 2024
c2aa05e
chore: black formatting
jlwalke2 Aug 26, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 2 additions & 2 deletions examples/register_scikit_classification_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# Register the model in Model Manager
register_model(model,
model_name,
input=X, # Use X to determine model inputs
X=X, # Use X to determine model inputs
project='Iris', # Register in "Iris" project
force=True) # Create project if it doesn't exist

Expand All @@ -36,5 +36,5 @@
x = X.iloc[0, :]

# Call the published module and score the record
result = module.predict(x)
result = module.score(x)
print(result)
4 changes: 2 additions & 2 deletions examples/register_scikit_regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
project_name = 'Boston Housing'

# Register the model in SAS Model Manager
register_model(model, model_name, project_name, input=X, force=True)
register_model(model, model_name, project_name, X=X, force=True)

# Publish the model to the real-time scoring engine
module = publish_model(model_name, 'maslocal', replace=True)
Expand All @@ -37,5 +37,5 @@
x = X.iloc[0, :]

# Call the published module and score the record
result = module.predict(x)
result = module.score(x)
print(result)
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,10 @@ def get_file(filename):
packages=find_packages(where="src"),
package_dir={"": "src"},
python_requires=">=3.6",
install_requires=["pandas>=0.24.0", "requests", "pyyaml", "packaging"],
install_requires=["dill", "pandas>=0.24.0", "requests", "pyyaml", "packaging"],
extras_require={
"swat": ["swat"],
"GitPython": ["GitPython"],
"numpy": ["numpy"],
"scikit-learn": ["scikit-learn"],
"kerberos": [
'kerberos ; platform_system != "Windows"',
Expand Down
7 changes: 4 additions & 3 deletions src/sasctl/_services/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# SPDX-License-Identifier: Apache-2.0

import os
from pathlib import Path

from sasctl.utils.cli import sasctl_command

Expand Down Expand Up @@ -40,7 +41,7 @@ def create_file(cls, file, folder=None, filename=None, expiration=None):

Parameters
----------
file : str or file_like
file : str, pathlib.Path, or file_like
Path to the file to upload or a file-like object.
folder : str or dict, optional
Name, or, or folder information as returned by :func:`.get_folder`.
Expand All @@ -55,8 +56,8 @@ def create_file(cls, file, folder=None, filename=None, expiration=None):
A dictionary containing the file attributes.

"""
if isinstance(file, str):
filename = filename or os.path.splitext(os.path.split(file)[1])[0]
if isinstance(file, (str, Path)):
filename = filename or Path(file).name

with open(file, "rb") as f:
file = f.read()
Expand Down
9 changes: 7 additions & 2 deletions src/sasctl/_services/model_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ def import_model_from_zip(
project : str or dict
The name or id of the model project, or a dictionary
representation of the project.
file : bytes
file : bytes or file-like object
The ZIP file containing the model and contents.
description : str
The description of the model.
Expand All @@ -551,9 +551,14 @@ def import_model_from_zip(
}
params = "&".join("{}={}".format(k, v) for k, v in params.items())

if not isinstance(file, bytes):
if file.seekable():
file.seek(0)
file = file.read()

r = cls.post(
"/models#octetStream",
data=file.read(),
data=file,
params=params,
headers={"Content-Type": "application/octet-stream"},
)
Expand Down
26 changes: 20 additions & 6 deletions src/sasctl/pzmm/import_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,21 @@ def get_model_properties(
model_files: Union[str, Path, None] = None,
):
if type(model_files) is dict:
model = model_files["ModelProperties.json"]
input_var = model_files["inputVar.json"]
output_var = model_files["outputVar.json"]
try:
model = json.loads(model_files["ModelProperties.json"])
except (json.JSONDecodeError, TypeError):
model = model_files["ModelProperties.json"]

try:
input_var = json.loads(model_files["inputVar.json"])
except (json.JSONDecodeError, TypeError):
input_var = model_files["inputVar.json"]

try:
output_var = json.loads(model_files["outputVar.json"])
except (json.JSONDecodeError, TypeError):
output_var = model_files["outputVar.json"]

else:
with open(Path(model_files) / "ModelProperties.json") as f:
model = json.load(f)
Expand Down Expand Up @@ -99,7 +111,9 @@ def project_exists(
response = _create_project(project, model, repo, input_var, output_var)
else:
response = mr.create_project(project, repo)
print(f"A new project named {response.name} was created.")

if check_if_jupyter():
print(f"A new project named {response.name} was created.")
return response
else:
model, input_var, output_var = get_model_properties(target_values, model_files)
Expand Down Expand Up @@ -348,7 +362,7 @@ def import_model(
# For SAS Viya 4, the score code can be written beforehand and imported with
# all the model files
elif current_session().version_info() == 4:
score_code_dict = sc.write_score_code(
score_code_dict = sc().write_score_code(
model_prefix,
input_data,
predict_method,
Expand Down Expand Up @@ -447,7 +461,7 @@ def import_model(
except AttributeError:
print("Model failed to import to SAS Model Manager.")

score_code_dict = sc.write_score_code(
score_code_dict = sc().write_score_code(
model_prefix,
input_data,
predict_method,
Expand Down
21 changes: 13 additions & 8 deletions src/sasctl/pzmm/pickle_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# SPDX-License-Identifier: Apache-2.0
# %%
import codecs
import gzip
import pickle
import shutil
from pathlib import Path
Expand Down Expand Up @@ -77,6 +76,10 @@ def pickle_trained_model(
models.

"""
from .write_score_code import ScoreCode

sanitized_prefix = ScoreCode.sanitize_model_prefix(model_prefix)

if is_binary_string:
# For models that use a binary string representation
binary_string = codecs.encode(
Expand All @@ -91,25 +94,25 @@ def pickle_trained_model(
# For models imported from MLFlow
shutil.copy(ml_pickle_path, pickle_path)
pzmm_pickle_path = Path(pickle_path) / mlflow_details["model_path"]
pzmm_pickle_path.rename(Path(pickle_path) / (model_prefix + PICKLE))
pzmm_pickle_path.rename(Path(pickle_path) / (sanitized_prefix + PICKLE))
else:
with open(ml_pickle_path, "rb") as pickle_file:
return {model_prefix + PICKLE: pickle.load(pickle_file)}
return {sanitized_prefix + PICKLE: pickle.load(pickle_file)}
else:
# For all other model types
if not is_h2o_model:
if pickle_path:
with open(
Path(pickle_path) / (model_prefix + PICKLE), "wb"
Path(pickle_path) / (sanitized_prefix + PICKLE), "wb"
) as pickle_file:
pickle.dump(trained_model, pickle_file)
if cls.notebook_output:
print(
f"Model {model_prefix} was successfully pickled and saved "
f"to {Path(pickle_path) / (model_prefix + PICKLE)}."
f"to {Path(pickle_path) / (sanitized_prefix + PICKLE)}."
)
else:
return {model_prefix + PICKLE: pickle.dumps(trained_model)}
return {sanitized_prefix + PICKLE: pickle.dumps(trained_model)}
# For binary H2O models, save the binary file as a "pickle" file
elif is_h2o_model and is_binary_model and pickle_path:
if not h2o:
Expand All @@ -121,7 +124,7 @@ def pickle_trained_model(
model=trained_model,
force=True,
path=str(pickle_path),
filename=f"{model_prefix}.pickle",
filename=f"{sanitized_prefix}.pickle",
)
# For MOJO H2O models, save as a mojo file and adjust the extension to .mojo
elif is_h2o_model and pickle_path:
Expand All @@ -130,7 +133,9 @@ def pickle_trained_model(
"The h2o package is required to save the model as a mojo model."
)
trained_model.save_mojo(
force=True, path=str(pickle_path), filename=f"{model_prefix}.mojo"
force=True,
path=str(pickle_path),
filename=f"{sanitized_prefix}.mojo",
)
elif is_binary_model or is_h2o_model:
raise ValueError(
Expand Down
21 changes: 15 additions & 6 deletions src/sasctl/pzmm/write_json_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,18 +498,27 @@ def write_file_metadata_json(
Dictionary containing a key-value pair representing the file name and json
dump respectively.
"""

from .write_score_code import ScoreCode

sanitized_prefix = ScoreCode.sanitize_model_prefix(model_prefix)

dict_list = [
{"role": "inputVariables", "name": INPUT},
{"role": "outputVariables", "name": OUTPUT},
{"role": "score", "name": f"score_{model_prefix}.py"},
{"role": "score", "name": f"score_{sanitized_prefix}.py"},
]
if is_h2o_model:
dict_list.append({"role": "scoreResource", "name": model_prefix + ".mojo"})
dict_list.append(
{"role": "scoreResource", "name": sanitized_prefix + ".mojo"}
)
elif is_tf_keras_model:
dict_list.append({"role": "scoreResource", "name": model_prefix + ".h5"})
dict_list.append(
{"role": "scoreResource", "name": sanitized_prefix + ".h5"}
)
else:
dict_list.append(
{"role": "scoreResource", "name": model_prefix + ".pickle"}
{"role": "scoreResource", "name": sanitized_prefix + ".pickle"}
)

if json_path:
Expand Down Expand Up @@ -2314,9 +2323,9 @@ def generate_model_card(
"Only classification and prediction target types are currently accepted."
)
if selection_statistic is None:
if target_type is "classification":
if target_type == "classification":
selection_statistic = "_KS_"
elif target_type is "prediction":
elif target_type == "prediction":
selection_statistic = "_ASE_"
if selection_statistic not in cls.valid_params:
raise RuntimeError(
Expand Down
Loading
Loading