Skip to content

Commit 4cb94e1

Browse files
committed
refactor and add _DenseAdaBoostClassifier to inherit from to remove sparse data support
1 parent a635126 commit 4cb94e1

File tree

1 file changed

+237
-7
lines changed

1 file changed

+237
-7
lines changed

src/linearboost/linear_boost.py

Lines changed: 237 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import sys
1717
import warnings
18+
from abc import abstractmethod
1819
from numbers import Integral, Real
1920

2021
if sys.version_info >= (3, 11):
@@ -40,7 +41,7 @@
4041
from sklearn.utils.multiclass import check_classification_targets, type_of_target
4142
from sklearn.utils.validation import check_is_fitted
4243

43-
from ._utils import SKLEARN_V1_6_OR_LATER, check_X_y
44+
from ._utils import SKLEARN_V1_6_OR_LATER, check_X_y, validate_data
4445
from .sefr import SEFR
4546

4647
__all__ = ["LinearBoostClassifier"]
@@ -63,7 +64,201 @@
6364
}
6465

6566

66-
class LinearBoostClassifier(AdaBoostClassifier):
67+
class _DenseAdaBoostClassifier(AdaBoostClassifier):
68+
if SKLEARN_V1_6_OR_LATER:
69+
70+
def __sklearn_tags__(self):
71+
tags = super().__sklearn_tags__()
72+
tags.input_tags.sparse = False
73+
return tags
74+
75+
def _check_X(self, X):
76+
# Only called to validate X in non-fit methods, therefore reset=False
77+
return validate_data(
78+
self,
79+
X,
80+
accept_sparse=False,
81+
ensure_2d=True,
82+
allow_nd=True,
83+
dtype=None,
84+
reset=False,
85+
)
86+
87+
@abstractmethod
88+
def _boost(self, iboost, X, y, sample_weight, random_state):
89+
"""Implement a single boost.
90+
91+
Warning: This method needs to be overridden by subclasses.
92+
93+
Parameters
94+
----------
95+
iboost : int
96+
The index of the current boost iteration.
97+
98+
X : {array-like} of shape (n_samples, n_features)
99+
The training input samples.
100+
101+
y : array-like of shape (n_samples,)
102+
The target values (class labels).
103+
104+
sample_weight : array-like of shape (n_samples,)
105+
The current sample weights.
106+
107+
random_state : RandomState
108+
The current random number generator
109+
110+
Returns
111+
-------
112+
sample_weight : array-like of shape (n_samples,) or None
113+
The reweighted sample weights.
114+
If None then boosting has terminated early.
115+
116+
estimator_weight : float
117+
The weight for the current boost.
118+
If None then boosting has terminated early.
119+
120+
error : float
121+
The classification error for the current boost.
122+
If None then boosting has terminated early.
123+
"""
124+
pass
125+
126+
def staged_score(self, X, y, sample_weight=None):
127+
"""Return staged scores for X, y.
128+
129+
This generator method yields the ensemble score after each iteration of
130+
boosting and therefore allows monitoring, such as to determine the
131+
score on a test set after each boost.
132+
133+
Parameters
134+
----------
135+
X : {array-like} of shape (n_samples, n_features)
136+
The training input samples.
137+
138+
y : array-like of shape (n_samples,)
139+
Labels for X.
140+
141+
sample_weight : array-like of shape (n_samples,), default=None
142+
Sample weights.
143+
144+
Yields
145+
------
146+
z : float
147+
"""
148+
return super().staged_score(X, y, sample_weight)
149+
150+
def staged_predict(self, X):
151+
"""Return staged predictions for X.
152+
153+
The predicted class of an input sample is computed as the weighted mean
154+
prediction of the classifiers in the ensemble.
155+
156+
This generator method yields the ensemble prediction after each
157+
iteration of boosting and therefore allows monitoring, such as to
158+
determine the prediction on a test set after each boost.
159+
160+
Parameters
161+
----------
162+
X : array-like of shape (n_samples, n_features)
163+
The input samples.
164+
165+
Yields
166+
------
167+
y : generator of ndarray of shape (n_samples,)
168+
The predicted classes.
169+
"""
170+
return super().staged_predict(X)
171+
172+
def staged_decision_function(self, X):
173+
"""Compute decision function of ``X`` for each boosting iteration.
174+
175+
This method allows monitoring (i.e. determine error on testing set)
176+
after each boosting iteration.
177+
178+
Parameters
179+
----------
180+
X : {array-like} of shape (n_samples, n_features)
181+
The training input samples.
182+
183+
Yields
184+
------
185+
score : generator of ndarray of shape (n_samples, k)
186+
The decision function of the input samples. The order of
187+
outputs is the same of that of the :term:`classes_` attribute.
188+
Binary classification is a special cases with ``k == 1``,
189+
otherwise ``k==n_classes``. For binary classification,
190+
values closer to -1 or 1 mean more like the first or second
191+
class in ``classes_``, respectively.
192+
"""
193+
return super().staged_decision_function(X)
194+
195+
def predict_proba(self, X):
196+
"""Predict class probabilities for X.
197+
198+
The predicted class probabilities of an input sample is computed as
199+
the weighted mean predicted class probabilities of the classifiers
200+
in the ensemble.
201+
202+
Parameters
203+
----------
204+
X : {array-like} of shape (n_samples, n_features)
205+
The training input samples.
206+
207+
Returns
208+
-------
209+
p : ndarray of shape (n_samples, n_classes)
210+
The class probabilities of the input samples. The order of
211+
outputs is the same of that of the :term:`classes_` attribute.
212+
"""
213+
return super().predict_proba(X)
214+
215+
def staged_predict_proba(self, X):
216+
"""Predict class probabilities for X.
217+
218+
The predicted class probabilities of an input sample is computed as
219+
the weighted mean predicted class probabilities of the classifiers
220+
in the ensemble.
221+
222+
This generator method yields the ensemble predicted class probabilities
223+
after each iteration of boosting and therefore allows monitoring, such
224+
as to determine the predicted class probabilities on a test set after
225+
each boost.
226+
227+
Parameters
228+
----------
229+
X : {array-like} of shape (n_samples, n_features)
230+
The training input samples.
231+
232+
Yields
233+
------
234+
p : generator of ndarray of shape (n_samples,)
235+
The class probabilities of the input samples. The order of
236+
outputs is the same of that of the :term:`classes_` attribute.
237+
"""
238+
return super().staged_predict_proba(X)
239+
240+
def predict_log_proba(self, X):
241+
"""Predict class log-probabilities for X.
242+
243+
The predicted class log-probabilities of an input sample is computed as
244+
the weighted mean predicted class log-probabilities of the classifiers
245+
in the ensemble.
246+
247+
Parameters
248+
----------
249+
X : {array-like} of shape (n_samples, n_features)
250+
The training input samples.
251+
252+
Returns
253+
-------
254+
p : ndarray of shape (n_samples, n_classes)
255+
The class probabilities of the input samples. The order of
256+
outputs is the same of that of the :term:`classes_` attribute.
257+
"""
258+
return super().predict_log_proba(X)
259+
260+
261+
class LinearBoostClassifier(_DenseAdaBoostClassifier):
67262
"""A LinearBoost classifier.
68263
69264
A LinearBoost classifier is a meta-estimator based on AdaBoost and SEFR.
@@ -221,7 +416,6 @@ def __init__(
221416

222417
def __sklearn_tags__(self):
223418
tags = super().__sklearn_tags__()
224-
tags.input_tags.sparse = False
225419
tags.target_tags.required = True
226420
tags.classifier_tags.multi_class = False
227421
tags.classifier_tags.poor_score = True
@@ -268,6 +462,25 @@ def _check_X_y(self, X, y) -> tuple[np.ndarray, np.ndarray]:
268462
return X, y
269463

270464
def fit(self, X, y, sample_weight=None) -> Self:
465+
"""Build a LinearBoost classifier from the training set (X, y).
466+
467+
Parameters
468+
----------
469+
X : {array-like} of shape (n_samples, n_features)
470+
The training input samples.
471+
472+
y : array-like of shape (n_samples,)
473+
The target values.
474+
475+
sample_weight : array-like of shape (n_samples,), default=None
476+
Sample weights. If None, the sample weights are initialized to
477+
1 / n_samples.
478+
479+
Returns
480+
-------
481+
self : object
482+
Fitted estimator.
483+
"""
271484
if self.algorithm not in {"SAMME", "SAMME.R"}:
272485
raise ValueError("algorithm must be 'SAMME' or 'SAMME.R'")
273486

@@ -322,7 +535,8 @@ def fit(self, X, y, sample_weight=None) -> Self:
322535
)
323536
return super().fit(X_transformed, y, sample_weight)
324537

325-
def _samme_proba(self, estimator, n_classes, X):
538+
@staticmethod
539+
def _samme_proba(estimator, n_classes, X):
326540
"""Calculate algorithm 4, step 2, equation c) of Zhu et al [1].
327541
328542
References
@@ -401,6 +615,23 @@ def _boost(self, iboost, X, y, sample_weight, random_state):
401615
return sample_weight, estimator_weight, estimator_error
402616

403617
def decision_function(self, X):
618+
"""Compute the decision function of ``X``.
619+
620+
Parameters
621+
----------
622+
X : {array-like} of shape (n_samples, n_features)
623+
The training input samples.
624+
625+
Returns
626+
-------
627+
score : ndarray of shape of (n_samples, k)
628+
The decision function of the input samples. The order of
629+
outputs is the same as that of the :term:`classes_` attribute.
630+
Binary classification is a special cases with ``k == 1``,
631+
otherwise ``k==n_classes``. For binary classification,
632+
values closer to -1 or 1 mean more like the first or second
633+
class in ``classes_``, respectively.
634+
"""
404635
check_is_fitted(self)
405636
X_transformed = self.scaler_.transform(X)
406637

@@ -431,9 +662,8 @@ def predict(self, X):
431662
432663
Parameters
433664
----------
434-
X : {array-like, sparse matrix} of shape (n_samples, n_features)
435-
The training input samples. Sparse matrix can be CSC, CSR, COO,
436-
DOK, or LIL. COO, DOK, and LIL are converted to CSR.
665+
X : {array-like} of shape (n_samples, n_features)
666+
The training input samples.
437667
438668
Returns
439669
-------

0 commit comments

Comments
 (0)