1
+ # This file is part of the LinearBoost project.
2
+ #
3
+ # Portions of this file are derived from scikit-learn
4
+ # Copyright (c) 2007–2024, scikit-learn developers (version 1.5)
5
+ # Licensed under the BSD 3-Clause License
6
+ # See https://github.com/scikit-learn/scikit-learn/blob/main/COPYING for details.
7
+ #
8
+ # Additional code and modifications:
9
+ # - Hamidreza Keshavarz (hamid9@outlook.com) — machine learning logic, design, and new algorithms
10
+ # - Mehdi Samsami (mehdisamsami@live.com) — software refactoring, compatibility with scikit-learn framework, and packaging
11
+ #
12
+ # The combined work is licensed under the MIT License.
13
+
1
14
from __future__ import annotations
2
15
3
16
import sys
23
36
StandardScaler ,
24
37
)
25
38
from sklearn .utils import compute_sample_weight
26
- from sklearn .utils ._param_validation import Hidden , Interval , StrOptions
39
+ from sklearn .utils ._param_validation import Interval , StrOptions
27
40
from sklearn .utils .multiclass import check_classification_targets , type_of_target
28
41
from sklearn .utils .validation import check_is_fitted
29
42
@@ -73,18 +86,10 @@ class LinearBoostClassifier(AdaBoostClassifier):
73
86
algorithm : {'SAMME', 'SAMME.R'}, default='SAMME'
74
87
If 'SAMME' then use the SAMME discrete boosting algorithm.
75
88
If 'SAMME.R' then use the SAMME.R real boosting algorithm
76
- (only available in scikit-learn < 1.6 ).
89
+ (implemented from scikit-learn = 1.5 ).
77
90
The SAMME.R algorithm typically converges faster than SAMME,
78
91
achieving a lower test error with fewer boosting iterations.
79
92
80
- .. deprecated:: scikit-learn 1.4
81
- `"SAMME.R"` is deprecated and will be removed in scikit-learn 1.6.
82
- '"SAMME"' will become the default.
83
-
84
- .. deprecated:: scikit-learn 1.6
85
- `algorithm` is deprecated and will be removed in scikit-learn 1.8.
86
- This estimator only implements the 'SAMME' algorithm in scikit-learn >= 1.6.
87
-
88
93
scaler : str, default='minmax'
89
94
Specifies the scaler to apply to the data. Options include:
90
95
@@ -188,9 +193,7 @@ class LinearBoostClassifier(AdaBoostClassifier):
188
193
_parameter_constraints : dict = {
189
194
"n_estimators" : [Interval (Integral , 1 , None , closed = "left" )],
190
195
"learning_rate" : [Interval (Real , 0 , None , closed = "neither" )],
191
- "algorithm" : [StrOptions ({"SAMME" }), Hidden (StrOptions ({"deprecated" }))]
192
- if SKLEARN_V1_6_OR_LATER
193
- else [StrOptions ({"SAMME" , "SAMME.R" })],
196
+ "algorithm" : [StrOptions ({"SAMME" , "SAMME.R" })],
194
197
"scaler" : [StrOptions ({s for s in _scalers })],
195
198
"class_weight" : [
196
199
StrOptions ({"balanced_subsample" , "balanced" }),
@@ -206,18 +209,15 @@ def __init__(
206
209
n_estimators = 200 ,
207
210
* ,
208
211
learning_rate = 1.0 ,
209
- algorithm = "SAMME" ,
212
+ algorithm = "SAMME.R " ,
210
213
scaler = "minmax" ,
211
214
class_weight = None ,
212
215
loss_function = None ,
213
216
):
214
217
super ().__init__ (
215
- estimator = SEFR (),
216
- n_estimators = n_estimators ,
217
- learning_rate = learning_rate ,
218
- algorithm = algorithm ,
218
+ estimator = SEFR (), n_estimators = n_estimators , learning_rate = learning_rate
219
219
)
220
-
220
+ self . algorithm = algorithm
221
221
self .scaler = scaler
222
222
self .class_weight = class_weight
223
223
self .loss_function = loss_function
@@ -241,7 +241,11 @@ def _more_tags(self) -> dict[str, bool]:
241
241
"check_sample_weight_equivalence_on_dense_data" : (
242
242
"In LinearBoostClassifier, setting a sample's weight to 0 can produce a different "
243
243
"result than omitting the sample. Such samples intentionally still affect the data scaling process."
244
- )
244
+ ),
245
+ "check_sample_weights_invariance" : (
246
+ "In LinearBoostClassifier, a zero sample_weight is not equivalent to removing the sample, "
247
+ "as samples with zero weight intentionally still affect the data scaling process."
248
+ ),
245
249
},
246
250
}
247
251
@@ -269,9 +273,8 @@ def _check_X_y(self, X, y) -> tuple[np.ndarray, np.ndarray]:
269
273
return X , y
270
274
271
275
def fit (self , X , y , sample_weight = None ) -> Self :
272
- X , y = self ._check_X_y (X , y )
273
- self .classes_ = np .unique (y )
274
- self .n_classes_ = self .classes_ .shape [0 ]
276
+ if self .algorithm not in {"SAMME" , "SAMME.R" }:
277
+ raise ValueError ("algorithm must be 'SAMME' or 'SAMME.R'" )
275
278
276
279
if self .scaler not in _scalers :
277
280
raise ValueError ('Invalid scaler provided; got "%s".' % self .scaler )
@@ -283,6 +286,25 @@ def fit(self, X, y, sample_weight=None) -> Self:
283
286
clone (_scalers [self .scaler ]), clone (_scalers ["minmax" ])
284
287
)
285
288
X_transformed = self .scaler_ .fit_transform (X )
289
+ y = np .asarray (y )
290
+
291
+ if sample_weight is not None :
292
+ sample_weight = np .asarray (sample_weight )
293
+ if sample_weight .shape [0 ] != X_transformed .shape [0 ]:
294
+ raise ValueError (
295
+ f"sample_weight.shape == { sample_weight .shape } is incompatible with X.shape == { X_transformed .shape } "
296
+ )
297
+ nonzero_mask = (
298
+ sample_weight .sum (axis = 1 ) != 0
299
+ if sample_weight .ndim > 1
300
+ else sample_weight != 0
301
+ )
302
+ X_transformed = X_transformed [nonzero_mask ]
303
+ y = y [nonzero_mask ]
304
+ sample_weight = sample_weight [nonzero_mask ]
305
+ X_transformed , y = self ._check_X_y (X_transformed , y )
306
+ self .classes_ = np .unique (y )
307
+ self .n_classes_ = self .classes_ .shape [0 ]
286
308
287
309
if self .class_weight is not None :
288
310
valid_presets = ("balanced" , "balanced_subsample" )
@@ -307,50 +329,131 @@ def fit(self, X, y, sample_weight=None) -> Self:
307
329
warnings .filterwarnings (
308
330
"ignore" ,
309
331
category = FutureWarning ,
310
- message = ".*parameter 'algorithm' is deprecated .*" ,
332
+ message = ".*parameter 'algorithm' may change in the future .*" ,
311
333
)
312
334
return super ().fit (X_transformed , y , sample_weight )
313
335
336
+ def _samme_proba (self , estimator , n_classes , X ):
337
+ """Calculate algorithm 4, step 2, equation c) of Zhu et al [1].
338
+
339
+ References
340
+ ----------
341
+ .. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009.
342
+
343
+ """
344
+ proba = estimator .predict_proba (X )
345
+
346
+ # Displace zero probabilities so the log is defined.
347
+ # Also fix negative elements which may occur with
348
+ # negative sample weights.
349
+ np .clip (proba , np .finfo (proba .dtype ).eps , None , out = proba )
350
+ log_proba = np .log (proba )
351
+
352
+ return (n_classes - 1 ) * (
353
+ log_proba - (1.0 / n_classes ) * log_proba .sum (axis = 1 )[:, np .newaxis ]
354
+ )
355
+
314
356
def _boost (self , iboost , X , y , sample_weight , random_state ):
315
357
estimator = self ._make_estimator (random_state = random_state )
316
358
estimator .fit (X , y , sample_weight = sample_weight )
317
359
318
- y_pred = estimator . predict ( X )
319
- missclassified = y_pred != y
360
+ if self . algorithm == "SAMME.R" :
361
+ y_pred = estimator . predict ( X )
320
362
321
- if self .loss_function :
322
- estimator_error = self .loss_function (y , y_pred , sample_weight )
323
- else :
363
+ incorrect = y_pred != y
324
364
estimator_error = np .mean (
325
- np .average (missclassified , weights = sample_weight , axis = 0 )
365
+ np .average (incorrect , weights = sample_weight , axis = 0 )
326
366
)
327
367
328
- if estimator_error <= 0 :
329
- return sample_weight , 1.0 , 0.0
368
+ if estimator_error <= 0 :
369
+ return sample_weight , 1.0 , 0.0
370
+ elif estimator_error >= 0.5 :
371
+ if len (self .estimators_ ) > 1 :
372
+ self .estimators_ .pop (- 1 )
373
+ return None , None , None
330
374
331
- if estimator_error >= 0.5 :
332
- self .estimators_ .pop (- 1 )
333
- if len (self .estimators_ ) == 0 :
334
- raise ValueError (
335
- "BaseClassifier in AdaBoostClassifier ensemble is worse than random, ensemble can not be fit."
375
+ # Compute SEFR-specific weight update
376
+ estimator_weight = self .learning_rate * np .log (
377
+ (1 - estimator_error ) / estimator_error
378
+ )
379
+
380
+ if iboost < self .n_estimators - 1 :
381
+ sample_weight = np .exp (
382
+ np .log (sample_weight )
383
+ + estimator_weight * incorrect * (sample_weight > 0 )
336
384
)
337
- return None , None , None
338
385
339
- estimator_weight = (
340
- self .learning_rate
341
- * 0.5
342
- * np .log ((1.0 - estimator_error ) / max (estimator_error , 1e-10 ))
343
- )
386
+ return sample_weight , estimator_weight , estimator_error
387
+
388
+ else : # standard SAMME
389
+ y_pred = estimator .predict (X )
390
+ incorrect = y_pred != y
391
+ estimator_error = np .mean (np .average (incorrect , weights = sample_weight ))
392
+
393
+ if estimator_error <= 0 :
394
+ return sample_weight , 1.0 , 0.0
395
+ if estimator_error >= 0.5 :
396
+ self .estimators_ .pop (- 1 )
397
+ if len (self .estimators_ ) == 0 :
398
+ raise ValueError (
399
+ "BaseClassifier in AdaBoostClassifier ensemble is worse than random, ensemble cannot be fit."
400
+ )
401
+ return None , None , None
402
+
403
+ estimator_weight = self .learning_rate * np .log (
404
+ (1.0 - estimator_error ) / max (estimator_error , 1e-10 )
405
+ )
344
406
345
- sample_weight *= np .exp (
346
- estimator_weight
347
- * missclassified
348
- * ((sample_weight > 0 ) | (estimator_weight < 0 ))
349
- )
407
+ sample_weight *= np .exp (estimator_weight * incorrect )
408
+
409
+ # Normalize sample weights
410
+ sample_weight /= np .sum (sample_weight )
350
411
351
- return sample_weight , estimator_weight , estimator_error
412
+ return sample_weight , estimator_weight , estimator_error
352
413
353
414
def decision_function (self , X ):
354
415
check_is_fitted (self )
355
416
X_transformed = self .scaler_ .transform (X )
356
- return super ().decision_function (X_transformed )
417
+
418
+ if self .algorithm == "SAMME.R" :
419
+ # Proper SAMME.R decision function
420
+ classes = self .classes_
421
+ n_classes = len (classes )
422
+
423
+ pred = sum (
424
+ self ._samme_proba (estimator , n_classes , X_transformed )
425
+ for estimator in self .estimators_
426
+ )
427
+ pred /= self .estimator_weights_ .sum ()
428
+ if n_classes == 2 :
429
+ pred [:, 0 ] *= - 1
430
+ return pred .sum (axis = 1 )
431
+ return pred
432
+
433
+ else :
434
+ # Standard SAMME algorithm from AdaBoostClassifier (discrete)
435
+ return super ().decision_function (X_transformed )
436
+
437
+ def predict (self , X ):
438
+ """Predict classes for X.
439
+
440
+ The predicted class of an input sample is computed as the weighted mean
441
+ prediction of the classifiers in the ensemble.
442
+
443
+ Parameters
444
+ ----------
445
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
446
+ The training input samples. Sparse matrix can be CSC, CSR, COO,
447
+ DOK, or LIL. COO, DOK, and LIL are converted to CSR.
448
+
449
+ Returns
450
+ -------
451
+ y : ndarray of shape (n_samples,)
452
+ The predicted classes.
453
+ """
454
+ pred = self .decision_function (X )
455
+
456
+ if self .n_classes_ == 2 :
457
+ return self .classes_ .take (pred > 0 , axis = 0 )
458
+
459
+ return self .classes_ .take (np .argmax (pred , axis = 1 ), axis = 0 )
0 commit comments