Merge pull request #388 from ntumlgroup/change_EURLex-default_to_cddual

Eleven1Liu · web-flow · commit d2f315def67e · 2024-10-06T21:55:49.000+08:00
change the defaut optimizer to l2 cddual in EUR-lex config
diff --git a/example_config/EUR-Lex/tree_l2svm.yml b/example_config/EUR-Lex/tree_l2svm.yml
@@ -6,7 +6,7 @@ data_name: EUR-Lex
 # train
 seed: 1337
 linear: true
-liblinear_options: "-s 2 -B 1 -e 0.0001 -q"
+liblinear_options: "-s 1 -B 1 -e 0.0001 -q"
 linear_technique: tree
 
 # eval
diff --git a/example_config/rcv1/l2svm.yml b/example_config/rcv1/l2svm.yml
@@ -6,7 +6,7 @@ data_name: rcv1
 # train
 seed: 1337
 linear: true
-liblinear_options: "-s 2 -B 1 -e 0.0001 -q"
+liblinear_options: "-s 1 -B 1 -e 0.0001 -q"
 linear_technique: 1vsrest
 
 # eval
diff --git a/example_config/rcv1/l2svm_svm_format.yml b/example_config/rcv1/l2svm_svm_format.yml
@@ -6,7 +6,7 @@ data_name: rcv1
 # train
 seed: 1337
 linear: true
-liblinear_options: "-s 2 -B 1 -e 0.0001 -q"
+liblinear_options: "-s 1 -B 1 -e 0.0001 -q"
 linear_technique: 1vsrest
 
 # eval
diff --git a/libmultilabel/linear/linear.py b/libmultilabel/linear/linear.py
@@ -139,7 +139,7 @@ def _prepare_options(x: sparse.csr_matrix, options: str) -> tuple[sparse.csr_mat
             raise ValueError("Invalid LIBLINEAR solver type. Only classification solvers are allowed.")
     else:
         # workaround for liblinear warning about unspecified solver
-        options_split.extend(["-s", "2"])
+        options_split.extend(["-s", "1"])
 
     bias = -1.0
     if "-B" in options_split:
diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
@@ -225,7 +225,8 @@ def collect_stat(node: Node):
     root.dfs(collect_stat)
 
     # 16 is because when storing sparse matrices, indices (int64) require 8 bytes and floats require 8 bytes
-    return total_num_weights * 16
+    # Our study showed that among the used features of every binary classification problem, on average no more than 2/3 of weights obtained by the dual coordinate descent method are non-zeros.
+    return total_num_weights * 16 * 2/3
 
 
 def _train_node(y: sparse.csr_matrix, x: sparse.csr_matrix, options: str, node: Node):