Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 27 additions & 6 deletions daal4py/mb/gbt_convertors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from collections import deque
from copy import deepcopy
from tempfile import NamedTemporaryFile
from typing import Any, Deque, Dict, List, Optional, Tuple
from typing import Any, Deque, Dict, List, Optional, Tuple, Union

import numpy as np

Expand Down Expand Up @@ -393,7 +393,7 @@ def get_gbt_model_from_tree_list(
is_regression: bool,
n_features: int,
n_classes: int,
base_score: Optional[float] = None,
base_score: Optional[Union[float, List[float]]] = None,
):
"""Return a GBT Model from TreeList"""

Expand All @@ -412,11 +412,21 @@ def get_gbt_model_from_tree_list(
else:
tree_id = mb.create_tree(n_nodes=tree.n_nodes, class_label=class_label)

# Note: starting from xgboost>=3.1.0, multi-class classification models have
# vector-valued intercepts. Since oneDAL doesn't support these, it instead
# adds the scores to all of the terminal leafs in the first tree.
if isinstance(base_score, list) and counter <= n_classes:
intercept_add = base_score[counter - 1]
else:
intercept_add = 0.0

if counter % n_iterations == 0:
class_label += 1

if tree.is_leaf:
mb.add_leaf(tree_id=tree_id, response=tree.value, cover=tree.cover)
mb.add_leaf(
tree_id=tree_id, response=tree.value + intercept_add, cover=tree.cover
)
continue

root_node = tree.root_node
Expand Down Expand Up @@ -445,7 +455,7 @@ def get_gbt_model_from_tree_list(
if node.is_leaf:
mb.add_leaf(
tree_id=tree_id,
response=node.value,
response=node.value + intercept_add,
cover=node.cover,
parent_id=node.parent_id,
position=node.position,
Expand All @@ -468,7 +478,7 @@ def get_gbt_model_from_tree_list(
child.position = position
node_queue.append(child)

return mb.model(base_score=base_score)
return mb.model(base_score=base_score if isinstance(base_score, float) else None)


def get_gbt_model_from_lightgbm(model: Any, booster=None) -> Any:
Expand Down Expand Up @@ -543,7 +553,18 @@ def get_gbt_model_from_xgboost(booster: Any, xgb_config=None) -> Any:

n_features = int(xgb_config["learner"]["learner_model_param"]["num_feature"])
n_classes = int(xgb_config["learner"]["learner_model_param"]["num_class"])
base_score = float(xgb_config["learner"]["learner_model_param"]["base_score"])
# Note: base scores in XGBoost might be vector-valued starting from version 3.1.0.
# When this is the case, the 'base_score' attribute will be a JSON list, otherwise
# it will be a scalar. Note that in either case, it will be in the response scale.
base_score_str: str = xgb_config["learner"]["learner_model_param"]["base_score"]
if base_score_str.startswith("["):
base_score = json.loads(base_score_str)
if len(base_score) == 1:
base_score = base_score[0]
elif len(base_score) == 0:
base_score = 0.5
else:
base_score = float(base_score_str)

is_regression = False
objective_fun = xgb_config["learner"]["learner_train_param"]["objective"]
Expand Down
2 changes: 1 addition & 1 deletion requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ scikit-learn==1.7.2 ; python_version >= '3.10'
pandas==2.1.3 ; python_version < '3.11'
pandas==2.3.3 ; python_version >= '3.11'
xgboost==2.1.4 ; python_version < '3.10'
xgboost==3.0.5 ; python_version >= '3.10'
xgboost==3.1.1 ; python_version >= '3.10'
lightgbm==4.6.0
catboost==1.2.8
shap==0.49.1
Expand Down
32 changes: 26 additions & 6 deletions tests/test_model_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@
"See https://github.com/catboost/catboost/issues/2556."
)

# TODO: remove checks using these once treelite becomes compatible with xgboost>=3.1.0

Check notice on line 93 in tests/test_model_builders.py

View check run for this annotation

codefactor.io / CodeFactor

tests/test_model_builders.py#L93

Unresolved comment '# TODO: remove checks using these once treelite becomes compatible with xgboost>=3.1.0'. (C100)
xgb_is_31_version = xgb.__version__.startswith("3.1")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The checking for version >= 3.1 would be more robust.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but this is just a temporary workaround that should hopefully get removed before version 3.2 arrives.

tl_xgb_incompat_msg = "Incompatibilities between treelite and xgboost."


# Note: models have an attribute telling whether SHAP calculations
# are supported for it or not. When that attribute is 'False', attempts
Expand Down Expand Up @@ -233,6 +237,8 @@

xgb_model = make_xgb_model(objective, base_score, sklearn_class, empty_trees)
if from_treelite:
if xgb_is_31_version:
pytest.skip(tl_xgb_incompat_msg)
xgb_model = treelite.frontend.from_xgboost(xgb_model)
d4p_model = d4p.mb.convert_model(xgb_model)

Expand Down Expand Up @@ -293,9 +299,13 @@
pytest.skip()

xgb_model = make_xgb_model(objective, base_score, sklearn_class, empty_trees)
d4p_model = d4p.mb.convert_model(
xgb_model if not from_treelite else treelite.frontend.from_xgboost(xgb_model)
)
if from_treelite:
if xgb_is_31_version:
pytest.skip(tl_xgb_incompat_msg)
tl_model = treelite.frontend.from_xgboost(xgb_model)
d4p_model = d4p.mb.convert_model(tl_model)
else:
d4p_model = d4p.mb.convert_model(xgb_model)

if sklearn_class:
xgb_model = xgb_model.get_booster()
Expand Down Expand Up @@ -345,6 +355,8 @@
pytest.skip()
xgb_model = make_xgb_model(objective, base_score, sklearn_class, empty_trees)
if from_treelite:
if xgb_is_31_version:
pytest.skip(tl_xgb_incompat_msg)
xgb_model = treelite.frontend.from_xgboost(xgb_model)
d4p_model = d4p.mb.convert_model(xgb_model)

Expand Down Expand Up @@ -430,9 +442,13 @@
if sklearn_class and from_treelite:
pytest.skip()
xgb_model = make_xgb_model(objective, base_score, sklearn_class, empty_trees)
d4p_model = d4p.mb.convert_model(
xgb_model if not from_treelite else treelite.frontend.from_xgboost(xgb_model)
)
if from_treelite:
if xgb_is_31_version:
pytest.skip(tl_xgb_incompat_msg)
tl_model = treelite.frontend.from_xgboost(xgb_model)
d4p_model = d4p.mb.convert_model(tl_model)
else:
d4p_model = d4p.mb.convert_model(xgb_model)

if sklearn_class:
xgb_model = xgb_model.get_booster()
Expand Down Expand Up @@ -482,6 +498,8 @@
pytest.skip()
xgb_model = make_xgb_model(objective, base_score, sklearn_class, empty_trees)
if from_treelite:
if xgb_is_31_version:
pytest.skip(tl_xgb_incompat_msg)
xgb_model = treelite.frontend.from_xgboost(xgb_model)
d4p_model = d4p.mb.convert_model(xgb_model)

Expand Down Expand Up @@ -605,6 +623,8 @@
else:
# In this case, TreeLite handles the drop logic on their end in a
# format that is consumable by daal4py.
if xgb_is_31_version:
pytest.skip(tl_xgb_incompat_msg)
tl_model = treelite.frontend.from_xgboost(xgb_model)
d4p_model = d4p.mb.convert_model(tl_model)
np.testing.assert_allclose(
Expand Down
Loading