1
+ from __future__ import annotations
2
+
1
3
import logging
2
4
import os
3
5
11
13
'train_cost_sensitive' ,
12
14
'train_cost_sensitive_micro' ,
13
15
'train_binary_and_multiclass' ,
14
- 'predict_values' ]
16
+ 'predict_values' ,
17
+ 'get_topk_labels' ]
15
18
16
19
17
20
class FlatModel :
18
21
def __init__ (self , weights : np .matrix ,
19
22
bias : float ,
20
- thresholds : ' float | np.ndarray' ,
23
+ thresholds : float | np .ndarray ,
21
24
):
22
25
self .weights = weights
23
26
self .bias = bias
@@ -68,7 +71,7 @@ def train_1vsrest(y: sparse.csr_matrix,
68
71
A model which can be used in predict_values.
69
72
"""
70
73
# Follows the MATLAB implementation at https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/multilabel/
71
- x , options , bias = prepare_options (x , options )
74
+ x , options , bias = _prepare_options (x , options )
72
75
73
76
y = y .tocsc ()
74
77
num_class = y .shape [1 ]
@@ -79,14 +82,14 @@ def train_1vsrest(y: sparse.csr_matrix,
79
82
logging .info (f'Training one-vs-rest model on { num_class } labels' )
80
83
for i in tqdm (range (num_class ), disable = not verbose ):
81
84
yi = y [:, i ].toarray ().reshape (- 1 )
82
- weights [:, i ] = do_train (2 * yi - 1 , x , options ).ravel ()
85
+ weights [:, i ] = _do_train (2 * yi - 1 , x , options ).ravel ()
83
86
84
87
return FlatModel (weights = np .asmatrix (weights ),
85
88
bias = bias ,
86
89
thresholds = 0 )
87
90
88
91
89
- def prepare_options (x : sparse .csr_matrix , options : str ) -> ' tuple[sparse.csr_matrix, str, float]' :
92
+ def _prepare_options (x : sparse .csr_matrix , options : str ) -> tuple [sparse .csr_matrix , str , float ]:
90
93
"""Prepare options and x for multi-label training. Called in the first line of
91
94
any training function.
92
95
@@ -150,7 +153,7 @@ def train_thresholding(y: sparse.csr_matrix,
150
153
A model which can be used in predict_values.
151
154
"""
152
155
# Follows the MATLAB implementation at https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/multilabel/
153
- x , options , bias = prepare_options (x , options )
156
+ x , options , bias = _prepare_options (x , options )
154
157
155
158
y = y .tocsc ()
156
159
num_class = y .shape [1 ]
@@ -162,7 +165,7 @@ def train_thresholding(y: sparse.csr_matrix,
162
165
logging .info (f'Training thresholding model on { num_class } labels' )
163
166
for i in tqdm (range (num_class ), disable = not verbose ):
164
167
yi = y [:, i ].toarray ().reshape (- 1 )
165
- w , t = thresholding_one_label (2 * yi - 1 , x , options )
168
+ w , t = _thresholding_one_label (2 * yi - 1 , x , options )
166
169
weights [:, i ] = w .ravel ()
167
170
thresholds [i ] = t
168
171
@@ -171,10 +174,10 @@ def train_thresholding(y: sparse.csr_matrix,
171
174
thresholds = thresholds )
172
175
173
176
174
- def thresholding_one_label (y : np .ndarray ,
177
+ def _thresholding_one_label (y : np .ndarray ,
175
178
x : sparse .csr_matrix ,
176
179
options : str
177
- ) -> ' tuple[np.ndarray, float]' :
180
+ ) -> tuple [np .ndarray , float ]:
178
181
"""Outer cross-validation for thresholding on a single label.
179
182
180
183
Args:
@@ -201,29 +204,29 @@ def thresholding_one_label(y: np.ndarray,
201
204
val_idx = perm [mask ]
202
205
train_idx = perm [mask != True ]
203
206
204
- scutfbr_w , scutfbr_b_list = scutfbr (
207
+ scutfbr_w , scutfbr_b_list = _scutfbr (
205
208
y [train_idx ], x [train_idx ], fbr_list , options )
206
209
wTx = (x [val_idx ] * scutfbr_w ).A1
207
210
208
211
for i in range (fbr_list .size ):
209
- F = fmeasure (y [val_idx ], 2 * (wTx > - scutfbr_b_list [i ]) - 1 )
212
+ F = _fmeasure (y [val_idx ], 2 * (wTx > - scutfbr_b_list [i ]) - 1 )
210
213
f_list [i ] += F
211
214
212
215
best_fbr = fbr_list [::- 1 ][np .argmax (f_list [::- 1 ])] # last largest
213
216
if np .max (f_list ) == 0 :
214
217
best_fbr = np .min (fbr_list )
215
218
216
219
# final model
217
- w , b_list = scutfbr (y , x , np .array ([best_fbr ]), options )
220
+ w , b_list = _scutfbr (y , x , np .array ([best_fbr ]), options )
218
221
219
222
return w , b_list [0 ]
220
223
221
224
222
- def scutfbr (y : np .ndarray ,
225
+ def _scutfbr (y : np .ndarray ,
223
226
x : sparse .csr_matrix ,
224
- fbr_list : ' list[float]' ,
227
+ fbr_list : list [float ],
225
228
options : str
226
- ) -> ' tuple[np.matrix, np.ndarray]' :
229
+ ) -> tuple [np .matrix , np .ndarray ]:
227
230
"""Inner cross-validation for SCutfbr heuristic.
228
231
229
232
Args:
@@ -250,10 +253,10 @@ def scutfbr(y: np.ndarray,
250
253
val_idx = perm [mask ]
251
254
train_idx = perm [mask != True ]
252
255
253
- w = do_train (y [train_idx ], x [train_idx ], options )
256
+ w = _do_train (y [train_idx ], x [train_idx ], options )
254
257
wTx = (x [val_idx ] * w ).A1
255
258
scut_b = 0.
256
- start_F = fmeasure (y [val_idx ], 2 * (wTx > - scut_b ) - 1 )
259
+ start_F = _fmeasure (y [val_idx ], 2 * (wTx > - scut_b ) - 1 )
257
260
258
261
# stableness to match the MATLAB implementation
259
262
sorted_wTx_index = np .argsort (wTx , kind = 'stable' )
@@ -291,7 +294,7 @@ def scutfbr(y: np.ndarray,
291
294
else :
292
295
scut_b = - (sorted_wTx [cut ] + sorted_wTx [cut + 1 ]) / 2
293
296
294
- F = fmeasure (y_val , 2 * (wTx > - scut_b ) - 1 )
297
+ F = _fmeasure (y_val , 2 * (wTx > - scut_b ) - 1 )
295
298
296
299
for i in range (fbr_list .size ):
297
300
if F > fbr_list [i ]:
@@ -300,10 +303,13 @@ def scutfbr(y: np.ndarray,
300
303
b_list [i ] -= np .max (wTx )
301
304
302
305
b_list = b_list / nr_fold
303
- return do_train (y , x , options ), b_list
306
+ return _do_train (y , x , options ), b_list
304
307
305
308
306
- def do_train (y : np .ndarray , x : sparse .csr_matrix , options : str ) -> np .matrix :
309
+ def _do_train (y : np .ndarray ,
310
+ x : sparse .csr_matrix ,
311
+ options : str
312
+ ) -> np .matrix :
307
313
"""Wrapper around liblinear.liblinearutil.train.
308
314
Forcibly suppresses all IO regardless of options.
309
315
@@ -351,7 +357,7 @@ def __exit__(self, type, value, traceback):
351
357
os .close (self .stderr )
352
358
353
359
354
- def fmeasure (y_true : np .ndarray , y_pred : np .ndarray ) -> float :
360
+ def _fmeasure (y_true : np .ndarray , y_pred : np .ndarray ) -> float :
355
361
"""Calculate F1 score.
356
362
357
363
Args:
@@ -393,7 +399,7 @@ def train_cost_sensitive(y: sparse.csr_matrix,
393
399
A model which can be used in predict_values.
394
400
"""
395
401
# Follows the MATLAB implementation at https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/multilabel/
396
- x , options , bias = prepare_options (x , options )
402
+ x , options , bias = _prepare_options (x , options )
397
403
398
404
y = y .tocsc ()
399
405
num_class = y .shape [1 ]
@@ -405,15 +411,15 @@ def train_cost_sensitive(y: sparse.csr_matrix,
405
411
f'Training cost-sensitive model for Macro-F1 on { num_class } labels' )
406
412
for i in tqdm (range (num_class ), disable = not verbose ):
407
413
yi = y [:, i ].toarray ().reshape (- 1 )
408
- w = cost_sensitive_one_label (2 * yi - 1 , x , options )
414
+ w = _cost_sensitive_one_label (2 * yi - 1 , x , options )
409
415
weights [:, i ] = w .ravel ()
410
416
411
417
return FlatModel (weights = np .asmatrix (weights ),
412
418
bias = bias ,
413
419
thresholds = 0 )
414
420
415
421
416
- def cost_sensitive_one_label (y : np .ndarray ,
422
+ def _cost_sensitive_one_label (y : np .ndarray ,
417
423
x : sparse .csr_matrix ,
418
424
options : str
419
425
) -> np .ndarray :
@@ -436,17 +442,17 @@ def cost_sensitive_one_label(y: np.ndarray,
436
442
bestScore = - np .Inf
437
443
for a in param_space :
438
444
cv_options = f'{ options } -w1 { a } '
439
- pred = cross_validate (y , x , cv_options , perm )
440
- score = fmeasure (y , pred )
445
+ pred = _cross_validate (y , x , cv_options , perm )
446
+ score = _fmeasure (y , pred )
441
447
if bestScore < score :
442
448
bestScore = score
443
449
bestA = a
444
450
445
451
final_options = f'{ options } -w1 { bestA } '
446
- return do_train (y , x , final_options )
452
+ return _do_train (y , x , final_options )
447
453
448
454
449
- def cross_validate (y : np .ndarray ,
455
+ def _cross_validate (y : np .ndarray ,
450
456
x : sparse .csr_matrix ,
451
457
options : str ,
452
458
perm : np .ndarray
@@ -470,7 +476,7 @@ def cross_validate(y: np.ndarray,
470
476
val_idx = perm [mask ]
471
477
train_idx = perm [mask != True ]
472
478
473
- w = do_train (y [train_idx ], x [train_idx ], options )
479
+ w = _do_train (y [train_idx ], x [train_idx ], options )
474
480
pred [val_idx ] = (x [val_idx ] * w ).A1 > 0
475
481
476
482
return 2 * pred - 1
@@ -498,7 +504,7 @@ def train_cost_sensitive_micro(y: sparse.csr_matrix,
498
504
A model which can be used in predict_values.
499
505
"""
500
506
# Follows the MATLAB implementation at https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/multilabel/
501
- x , options , bias = prepare_options (x , options )
507
+ x , options , bias = _prepare_options (x , options )
502
508
503
509
y = y .tocsc ()
504
510
num_class = y .shape [1 ]
@@ -520,7 +526,7 @@ def train_cost_sensitive_micro(y: sparse.csr_matrix,
520
526
yi = 2 * yi - 1
521
527
522
528
cv_options = f'{ options } -w1 { a } '
523
- pred = cross_validate (yi , x , cv_options , perm )
529
+ pred = _cross_validate (yi , x , cv_options , perm )
524
530
tp = tp + np .sum (np .logical_and (yi == 1 , pred == 1 ))
525
531
fn = fn + np .sum (np .logical_and (yi == 1 , pred == - 1 ))
526
532
fp = fp + np .sum (np .logical_and (yi == - 1 , pred == 1 ))
@@ -533,7 +539,7 @@ def train_cost_sensitive_micro(y: sparse.csr_matrix,
533
539
final_options = f'{ options } -w1 { bestA } '
534
540
for i in range (num_class ):
535
541
yi = y [:, i ].toarray ().reshape (- 1 )
536
- w = do_train (2 * yi - 1 , x , final_options )
542
+ w = _do_train (2 * yi - 1 , x , final_options )
537
543
weights [:, i ] = w .ravel ()
538
544
539
545
return FlatModel (weights = np .asmatrix (weights ),
@@ -557,7 +563,7 @@ def train_binary_and_multiclass(y: sparse.csr_matrix,
557
563
Returns:
558
564
A model which can be used in predict_values.
559
565
"""
560
- x , options , bias = prepare_options (x , options )
566
+ x , options , bias = _prepare_options (x , options )
561
567
num_instances , num_labels = y .shape
562
568
nonzero_instance_ids , nonzero_label_ids = y .nonzero ()
563
569
assert len (set (nonzero_instance_ids )) == num_instances , """
@@ -602,7 +608,10 @@ def predict_values(model, x: sparse.csr_matrix) -> np.ndarray:
602
608
return model .predict_values (x )
603
609
604
610
605
- def get_topk_labels (label_mapping : np .ndarray , preds : np .ndarray , top_k : int = 5 ) -> 'list[list[str]]' :
611
+ def get_topk_labels (label_mapping : np .ndarray ,
612
+ preds : np .ndarray ,
613
+ top_k : int = 5
614
+ ) -> list [list [str ]]:
606
615
"""Get top k predictions from decision values.
607
616
608
617
Args:
0 commit comments