|
15 | 15 |
|
16 | 16 | import sys
|
17 | 17 | import warnings
|
| 18 | +from abc import abstractmethod |
18 | 19 | from numbers import Integral, Real
|
19 | 20 |
|
20 | 21 | if sys.version_info >= (3, 11):
|
|
40 | 41 | from sklearn.utils.multiclass import check_classification_targets, type_of_target
|
41 | 42 | from sklearn.utils.validation import check_is_fitted
|
42 | 43 |
|
43 |
| -from ._utils import SKLEARN_V1_6_OR_LATER, check_X_y |
| 44 | +from ._utils import SKLEARN_V1_6_OR_LATER, check_X_y, validate_data |
44 | 45 | from .sefr import SEFR
|
45 | 46 |
|
46 | 47 | __all__ = ["LinearBoostClassifier"]
|
|
63 | 64 | }
|
64 | 65 |
|
65 | 66 |
|
66 |
| -class LinearBoostClassifier(AdaBoostClassifier): |
| 67 | +class _DenseAdaBoostClassifier(AdaBoostClassifier): |
| 68 | + if SKLEARN_V1_6_OR_LATER: |
| 69 | + |
| 70 | + def __sklearn_tags__(self): |
| 71 | + tags = super().__sklearn_tags__() |
| 72 | + tags.input_tags.sparse = False |
| 73 | + return tags |
| 74 | + |
| 75 | + def _check_X(self, X): |
| 76 | + # Only called to validate X in non-fit methods, therefore reset=False |
| 77 | + return validate_data( |
| 78 | + self, |
| 79 | + X, |
| 80 | + accept_sparse=False, |
| 81 | + ensure_2d=True, |
| 82 | + allow_nd=True, |
| 83 | + dtype=None, |
| 84 | + reset=False, |
| 85 | + ) |
| 86 | + |
| 87 | + @abstractmethod |
| 88 | + def _boost(self, iboost, X, y, sample_weight, random_state): |
| 89 | + """Implement a single boost. |
| 90 | +
|
| 91 | + Warning: This method needs to be overridden by subclasses. |
| 92 | +
|
| 93 | + Parameters |
| 94 | + ---------- |
| 95 | + iboost : int |
| 96 | + The index of the current boost iteration. |
| 97 | +
|
| 98 | + X : {array-like} of shape (n_samples, n_features) |
| 99 | + The training input samples. |
| 100 | +
|
| 101 | + y : array-like of shape (n_samples,) |
| 102 | + The target values (class labels). |
| 103 | +
|
| 104 | + sample_weight : array-like of shape (n_samples,) |
| 105 | + The current sample weights. |
| 106 | +
|
| 107 | + random_state : RandomState |
| 108 | + The current random number generator |
| 109 | +
|
| 110 | + Returns |
| 111 | + ------- |
| 112 | + sample_weight : array-like of shape (n_samples,) or None |
| 113 | + The reweighted sample weights. |
| 114 | + If None then boosting has terminated early. |
| 115 | +
|
| 116 | + estimator_weight : float |
| 117 | + The weight for the current boost. |
| 118 | + If None then boosting has terminated early. |
| 119 | +
|
| 120 | + error : float |
| 121 | + The classification error for the current boost. |
| 122 | + If None then boosting has terminated early. |
| 123 | + """ |
| 124 | + pass |
| 125 | + |
| 126 | + def staged_score(self, X, y, sample_weight=None): |
| 127 | + """Return staged scores for X, y. |
| 128 | +
|
| 129 | + This generator method yields the ensemble score after each iteration of |
| 130 | + boosting and therefore allows monitoring, such as to determine the |
| 131 | + score on a test set after each boost. |
| 132 | +
|
| 133 | + Parameters |
| 134 | + ---------- |
| 135 | + X : {array-like} of shape (n_samples, n_features) |
| 136 | + The training input samples. |
| 137 | +
|
| 138 | + y : array-like of shape (n_samples,) |
| 139 | + Labels for X. |
| 140 | +
|
| 141 | + sample_weight : array-like of shape (n_samples,), default=None |
| 142 | + Sample weights. |
| 143 | +
|
| 144 | + Yields |
| 145 | + ------ |
| 146 | + z : float |
| 147 | + """ |
| 148 | + return super().staged_score(X, y, sample_weight) |
| 149 | + |
| 150 | + def staged_predict(self, X): |
| 151 | + """Return staged predictions for X. |
| 152 | +
|
| 153 | + The predicted class of an input sample is computed as the weighted mean |
| 154 | + prediction of the classifiers in the ensemble. |
| 155 | +
|
| 156 | + This generator method yields the ensemble prediction after each |
| 157 | + iteration of boosting and therefore allows monitoring, such as to |
| 158 | + determine the prediction on a test set after each boost. |
| 159 | +
|
| 160 | + Parameters |
| 161 | + ---------- |
| 162 | + X : array-like of shape (n_samples, n_features) |
| 163 | + The input samples. |
| 164 | +
|
| 165 | + Yields |
| 166 | + ------ |
| 167 | + y : generator of ndarray of shape (n_samples,) |
| 168 | + The predicted classes. |
| 169 | + """ |
| 170 | + return super().staged_predict(X) |
| 171 | + |
| 172 | + def staged_decision_function(self, X): |
| 173 | + """Compute decision function of ``X`` for each boosting iteration. |
| 174 | +
|
| 175 | + This method allows monitoring (i.e. determine error on testing set) |
| 176 | + after each boosting iteration. |
| 177 | +
|
| 178 | + Parameters |
| 179 | + ---------- |
| 180 | + X : {array-like} of shape (n_samples, n_features) |
| 181 | + The training input samples. |
| 182 | +
|
| 183 | + Yields |
| 184 | + ------ |
| 185 | + score : generator of ndarray of shape (n_samples, k) |
| 186 | + The decision function of the input samples. The order of |
| 187 | + outputs is the same of that of the :term:`classes_` attribute. |
| 188 | + Binary classification is a special cases with ``k == 1``, |
| 189 | + otherwise ``k==n_classes``. For binary classification, |
| 190 | + values closer to -1 or 1 mean more like the first or second |
| 191 | + class in ``classes_``, respectively. |
| 192 | + """ |
| 193 | + return super().staged_decision_function(X) |
| 194 | + |
| 195 | + def predict_proba(self, X): |
| 196 | + """Predict class probabilities for X. |
| 197 | +
|
| 198 | + The predicted class probabilities of an input sample is computed as |
| 199 | + the weighted mean predicted class probabilities of the classifiers |
| 200 | + in the ensemble. |
| 201 | +
|
| 202 | + Parameters |
| 203 | + ---------- |
| 204 | + X : {array-like} of shape (n_samples, n_features) |
| 205 | + The training input samples. |
| 206 | +
|
| 207 | + Returns |
| 208 | + ------- |
| 209 | + p : ndarray of shape (n_samples, n_classes) |
| 210 | + The class probabilities of the input samples. The order of |
| 211 | + outputs is the same of that of the :term:`classes_` attribute. |
| 212 | + """ |
| 213 | + return super().predict_proba(X) |
| 214 | + |
| 215 | + def staged_predict_proba(self, X): |
| 216 | + """Predict class probabilities for X. |
| 217 | +
|
| 218 | + The predicted class probabilities of an input sample is computed as |
| 219 | + the weighted mean predicted class probabilities of the classifiers |
| 220 | + in the ensemble. |
| 221 | +
|
| 222 | + This generator method yields the ensemble predicted class probabilities |
| 223 | + after each iteration of boosting and therefore allows monitoring, such |
| 224 | + as to determine the predicted class probabilities on a test set after |
| 225 | + each boost. |
| 226 | +
|
| 227 | + Parameters |
| 228 | + ---------- |
| 229 | + X : {array-like} of shape (n_samples, n_features) |
| 230 | + The training input samples. |
| 231 | +
|
| 232 | + Yields |
| 233 | + ------ |
| 234 | + p : generator of ndarray of shape (n_samples,) |
| 235 | + The class probabilities of the input samples. The order of |
| 236 | + outputs is the same of that of the :term:`classes_` attribute. |
| 237 | + """ |
| 238 | + return super().staged_predict_proba(X) |
| 239 | + |
| 240 | + def predict_log_proba(self, X): |
| 241 | + """Predict class log-probabilities for X. |
| 242 | +
|
| 243 | + The predicted class log-probabilities of an input sample is computed as |
| 244 | + the weighted mean predicted class log-probabilities of the classifiers |
| 245 | + in the ensemble. |
| 246 | +
|
| 247 | + Parameters |
| 248 | + ---------- |
| 249 | + X : {array-like} of shape (n_samples, n_features) |
| 250 | + The training input samples. |
| 251 | +
|
| 252 | + Returns |
| 253 | + ------- |
| 254 | + p : ndarray of shape (n_samples, n_classes) |
| 255 | + The class probabilities of the input samples. The order of |
| 256 | + outputs is the same of that of the :term:`classes_` attribute. |
| 257 | + """ |
| 258 | + return super().predict_log_proba(X) |
| 259 | + |
| 260 | + |
| 261 | +class LinearBoostClassifier(_DenseAdaBoostClassifier): |
67 | 262 | """A LinearBoost classifier.
|
68 | 263 |
|
69 | 264 | A LinearBoost classifier is a meta-estimator based on AdaBoost and SEFR.
|
@@ -221,7 +416,6 @@ def __init__(
|
221 | 416 |
|
222 | 417 | def __sklearn_tags__(self):
|
223 | 418 | tags = super().__sklearn_tags__()
|
224 |
| - tags.input_tags.sparse = False |
225 | 419 | tags.target_tags.required = True
|
226 | 420 | tags.classifier_tags.multi_class = False
|
227 | 421 | tags.classifier_tags.poor_score = True
|
@@ -268,6 +462,25 @@ def _check_X_y(self, X, y) -> tuple[np.ndarray, np.ndarray]:
|
268 | 462 | return X, y
|
269 | 463 |
|
270 | 464 | def fit(self, X, y, sample_weight=None) -> Self:
|
| 465 | + """Build a LinearBoost classifier from the training set (X, y). |
| 466 | +
|
| 467 | + Parameters |
| 468 | + ---------- |
| 469 | + X : {array-like} of shape (n_samples, n_features) |
| 470 | + The training input samples. |
| 471 | +
|
| 472 | + y : array-like of shape (n_samples,) |
| 473 | + The target values. |
| 474 | +
|
| 475 | + sample_weight : array-like of shape (n_samples,), default=None |
| 476 | + Sample weights. If None, the sample weights are initialized to |
| 477 | + 1 / n_samples. |
| 478 | +
|
| 479 | + Returns |
| 480 | + ------- |
| 481 | + self : object |
| 482 | + Fitted estimator. |
| 483 | + """ |
271 | 484 | if self.algorithm not in {"SAMME", "SAMME.R"}:
|
272 | 485 | raise ValueError("algorithm must be 'SAMME' or 'SAMME.R'")
|
273 | 486 |
|
@@ -322,7 +535,8 @@ def fit(self, X, y, sample_weight=None) -> Self:
|
322 | 535 | )
|
323 | 536 | return super().fit(X_transformed, y, sample_weight)
|
324 | 537 |
|
325 |
| - def _samme_proba(self, estimator, n_classes, X): |
| 538 | + @staticmethod |
| 539 | + def _samme_proba(estimator, n_classes, X): |
326 | 540 | """Calculate algorithm 4, step 2, equation c) of Zhu et al [1].
|
327 | 541 |
|
328 | 542 | References
|
@@ -401,6 +615,23 @@ def _boost(self, iboost, X, y, sample_weight, random_state):
|
401 | 615 | return sample_weight, estimator_weight, estimator_error
|
402 | 616 |
|
403 | 617 | def decision_function(self, X):
|
| 618 | + """Compute the decision function of ``X``. |
| 619 | +
|
| 620 | + Parameters |
| 621 | + ---------- |
| 622 | + X : {array-like} of shape (n_samples, n_features) |
| 623 | + The training input samples. |
| 624 | +
|
| 625 | + Returns |
| 626 | + ------- |
| 627 | + score : ndarray of shape of (n_samples, k) |
| 628 | + The decision function of the input samples. The order of |
| 629 | + outputs is the same as that of the :term:`classes_` attribute. |
| 630 | + Binary classification is a special cases with ``k == 1``, |
| 631 | + otherwise ``k==n_classes``. For binary classification, |
| 632 | + values closer to -1 or 1 mean more like the first or second |
| 633 | + class in ``classes_``, respectively. |
| 634 | + """ |
404 | 635 | check_is_fitted(self)
|
405 | 636 | X_transformed = self.scaler_.transform(X)
|
406 | 637 |
|
@@ -431,9 +662,8 @@ def predict(self, X):
|
431 | 662 |
|
432 | 663 | Parameters
|
433 | 664 | ----------
|
434 |
| - X : {array-like, sparse matrix} of shape (n_samples, n_features) |
435 |
| - The training input samples. Sparse matrix can be CSC, CSR, COO, |
436 |
| - DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
| 665 | + X : {array-like} of shape (n_samples, n_features) |
| 666 | + The training input samples. |
437 | 667 |
|
438 | 668 | Returns
|
439 | 669 | -------
|
|
0 commit comments