23
23
from sklearn .preprocessing import Normalizer , StandardScaler
24
24
from collections .abc import Sequence
25
25
from sklearn .linear_model import LogisticRegression
26
+ from typing import Union , Optional , List , Tuple , Any
26
27
27
- def disp_mesa (txt ) :
28
+ def disp_mesa (txt : str ) -> None :
28
29
"""
29
30
Display a timestamped message to stderr for MESA logging.
30
31
@@ -36,7 +37,7 @@ def disp_mesa(txt):
36
37
print ("@%s \t %s" % (time .asctime (), txt ), file = sys .stderr )
37
38
38
39
39
- def wilcoxon (X , y ) :
40
+ def wilcoxon (X : np . ndarray , y : np . ndarray ) -> np . ndarray :
40
41
"""
41
42
Score function for feature selection using Wilcoxon rank-sum test.
42
43
@@ -98,7 +99,7 @@ def __init__(self, n=10, **kwargs):
98
99
super ().__init__ (** kwargs )
99
100
self .n = n
100
101
101
- def fit (self , X , y ) :
102
+ def fit (self , X : np . ndarray , y : np . ndarray ) -> "BorutaSelector" :
102
103
"""
103
104
Fit the Boruta feature selection algorithm and select top n features.
104
105
@@ -118,7 +119,7 @@ def fit(self, X, y):
118
119
self .indices = np .argsort (self .ranking_ )[: self .n ]
119
120
return self
120
121
121
- def transform (self , X ) :
122
+ def transform (self , X : Union [ np . ndarray , pd . DataFrame ]) -> Union [ np . ndarray , pd . DataFrame ] :
122
123
"""
123
124
Transform data to contain only the selected top n features.
124
125
@@ -146,7 +147,7 @@ def transform(self, X):
146
147
except :
147
148
return X [:, self .indices ]
148
149
149
- def get_support (self ):
150
+ def get_support (self ) -> np . ndarray :
150
151
"""
151
152
Get indices of the selected features.
152
153
@@ -196,7 +197,7 @@ def __init__(self, ratio=0.9, imputer=SimpleImputer(strategy="mean")):
196
197
self .ratio = ratio
197
198
self .imputer = imputer
198
199
199
- def fit (self , X , y = None ):
200
+ def fit (self , X : np . ndarray , y : Optional [ np . ndarray ] = None ) -> "missing_value_processing" :
200
201
"""
201
202
Fit the missing value processor by identifying valid features and fitting imputer.
202
203
@@ -228,7 +229,7 @@ def fit(self, X, y=None):
228
229
else :
229
230
raise ValueError ("The ratio of valid values should be greater than 0." )
230
231
231
- def transform (self , X ) :
232
+ def transform (self , X : pd . DataFrame ) -> pd . DataFrame :
232
233
"""
233
234
Transform data by removing high-missing features and imputing remaining values.
234
235
@@ -256,7 +257,7 @@ def transform(self, X):
256
257
else :
257
258
raise ValueError ("The ratio of valid values should be greater than 0." )
258
259
259
- def get_support (self ):
260
+ def get_support (self ) -> np . ndarray :
260
261
"""
261
262
Get indices of features that passed the missing value filter.
262
263
@@ -362,7 +363,7 @@ def __init__(
362
363
for key , value in kwargs .items ():
363
364
setattr (self , key , value )
364
365
365
- def fit (self , X , y ) :
366
+ def fit (self , X : Union [ pd . DataFrame , np . ndarray ], y : Union [ pd . Series , np . ndarray ]) -> "MESA_modality" :
366
367
"""
367
368
Fit the complete preprocessing pipeline and classifier.
368
369
@@ -396,7 +397,7 @@ def fit(self, X, y):
396
397
self .classifier = self .classifier .fit (self .pipeline .transform (X ), y )
397
398
return self
398
399
399
- def transform (self , X ) :
400
+ def transform (self , X : Union [ pd . DataFrame , np . ndarray ]) -> np . ndarray :
400
401
"""
401
402
Apply the preprocessing pipeline to data.
402
403
@@ -412,7 +413,7 @@ def transform(self, X):
412
413
"""
413
414
return self .pipeline .transform (X )
414
415
415
- def predict (self , X ) :
416
+ def predict (self , X : np . ndarray ) -> np . ndarray :
416
417
"""
417
418
Predict class labels for preprocessed data.
418
419
@@ -428,7 +429,7 @@ def predict(self, X):
428
429
"""
429
430
return self .classifier .predict (X )
430
431
431
- def predict_proba (self , X ) :
432
+ def predict_proba (self , X : np . ndarray ) -> np . ndarray :
432
433
"""
433
434
Predict class probabilities for preprocessed data.
434
435
@@ -444,7 +445,7 @@ def predict_proba(self, X):
444
445
"""
445
446
return self .classifier .predict_proba (X )
446
447
447
- def transform_predict (self , X ) :
448
+ def transform_predict (self , X : Union [ pd . DataFrame , np . ndarray ]) -> np . ndarray :
448
449
"""
449
450
Apply preprocessing pipeline and predict class labels.
450
451
@@ -460,7 +461,7 @@ def transform_predict(self, X):
460
461
"""
461
462
return self .classifier .predict (self .pipeline .transform (X ))
462
463
463
- def transform_predict_proba (self , X ) :
464
+ def transform_predict_proba (self , X : Union [ pd . DataFrame , np . ndarray ]) -> np . ndarray :
464
465
"""
465
466
Apply preprocessing pipeline and predict class probabilities.
466
467
@@ -476,7 +477,7 @@ def transform_predict_proba(self, X):
476
477
"""
477
478
return self .classifier .predict_proba (self .pipeline .transform (X ))
478
479
479
- def get_support (self , step = None ):
480
+ def get_support (self , step : Optional [ int ] = None ) -> np . ndarray :
480
481
"""
481
482
Get indices of features selected by pipeline components.
482
483
@@ -498,7 +499,7 @@ def get_support(self, step=None):
498
499
else :
499
500
return self .pipeline [step ].get_support (indices = True )
500
501
501
- def get_params (self , deep = True ):
502
+ def get_params (self , deep : bool = True ) -> dict :
502
503
"""
503
504
Get parameters of the MESA_modality instance.
504
505
@@ -590,7 +591,7 @@ def __init__(
590
591
for key , value in kwargs .items ():
591
592
setattr (self , key , value )
592
593
593
- def _base_fit (self , X , y , base_estimator ) :
594
+ def _base_fit (self , X : np . ndarray , y : Union [ pd . Series , np . ndarray ], base_estimator : Any ) -> np . ndarray :
594
595
"""
595
596
Generate meta-features using cross-validation for a single modality.
596
597
@@ -624,7 +625,7 @@ def _internal_cv(train_index, test_index):
624
625
)
625
626
return base_probability
626
627
627
- def fit (self , X_list , y ) :
628
+ def fit (self , X_list : List [ Union [ pd . DataFrame , np . ndarray ]], y : Union [ pd . Series , np . ndarray ]) -> "MESA" :
628
629
"""
629
630
Fit all modality estimators and the meta-estimator.
630
631
@@ -658,7 +659,7 @@ def fit(self, X_list, y):
658
659
self .meta_estimator .fit (base_probability , y_stacking )
659
660
return self
660
661
661
- def predict (self , X_list_test ) :
662
+ def predict (self , X_list_test : List [ Union [ pd . DataFrame , np . ndarray ]]) -> np . ndarray :
662
663
"""
663
664
Predict class labels using the fitted ensemble.
664
665
@@ -677,7 +678,7 @@ def predict(self, X_list_test):
677
678
)
678
679
return self .meta_estimator .predict (base_probability_test )
679
680
680
- def predict_proba (self , X_list_test ) :
681
+ def predict_proba (self , X_list_test : List [ Union [ pd . DataFrame , np . ndarray ]]) -> np . ndarray :
681
682
"""
682
683
Predict class probabilities using the fitted ensemble.
683
684
@@ -696,7 +697,7 @@ def predict_proba(self, X_list_test):
696
697
)
697
698
return self .meta_estimator .predict_proba (base_probability_test )
698
699
699
- def get_support (self , step = None ):
700
+ def get_support (self , step : Optional [ int ] = None ) -> List [ np . ndarray ] :
700
701
"""
701
702
Get feature support information from all modalities.
702
703
@@ -778,14 +779,17 @@ def __init__(
778
779
779
780
def _cv_iter (
780
781
self ,
781
- X ,
782
- y ,
783
- train_index ,
784
- test_index ,
785
- proba = True ,
786
- return_feature_in = False ,
787
- mesa = False
788
- ):
782
+ X : Union [pd .DataFrame , List [pd .DataFrame ]],
783
+ y : Union [pd .Series , np .ndarray ],
784
+ train_index : np .ndarray ,
785
+ test_index : np .ndarray ,
786
+ proba : bool = True ,
787
+ return_feature_in : bool = False ,
788
+ mesa : bool = False
789
+ ) -> Union [
790
+ Tuple [np .ndarray , np .ndarray ],
791
+ Tuple [np .ndarray , np .ndarray , Any ]
792
+ ]:
789
793
"""
790
794
Perform a single iteration of cross-validation.
791
795
@@ -837,7 +841,7 @@ def _cv_iter(
837
841
else :
838
842
return y_pred , y_test
839
843
840
- def fit (self , X , y ) :
844
+ def fit (self , X : Union [ pd . DataFrame , List [ pd . DataFrame ]], y : Union [ pd . Series , np . ndarray ]) -> "MESA_CV" :
841
845
"""
842
846
Perform cross-validation on the provided data.
843
847
@@ -894,7 +898,7 @@ def fit(self, X, y):
894
898
)
895
899
return self
896
900
897
- def get_performance (self ):
901
+ def get_performance (self ) -> float :
898
902
"""
899
903
Calculate the mean ROC AUC score across all cross-validation folds.
900
904
0 commit comments