add: random tie break for expected error reduction

cosmic-cortex · cosmic-cortex · commit fad91e80324e · 2018-12-05T13:30:48.000+01:00
diff --git a/modAL/expected_error.py b/modAL/expected_error.py
@@ -11,12 +11,13 @@
 
 from modAL.models import ActiveLearner
 from modAL.utils.data import modALinput, data_vstack
-from modAL.utils.selection import multi_argmax
+from modAL.utils.selection import multi_argmax, shuffled_argmax
 from modAL.uncertainty import _proba_uncertainty, _proba_entropy
 
 
 def expected_error_reduction(learner: ActiveLearner, X: modALinput, loss: str = 'binary',
-                             p_subsample: np.float = 1.0, n_instances: int = 1) -> Tuple[np.ndarray, modALinput]:
+                             p_subsample: np.float = 1.0, n_instances: int = 1,
+                             random_tie_break: bool = False) -> Tuple[np.ndarray, modALinput]:
     """
     Expected error reduction query strategy.
 
@@ -32,6 +33,8 @@ def expected_error_reduction(learner: ActiveLearner, X: modALinput, loss: str =
             calculating expected error. Significantly improves runtime
             for large sample pools.
         n_instances: The number of instances to be sampled.
+        random_tie_break: If True, shuffles utility scores to randomize the order. This
+            can be used to break the tie when the highest utility score is not unique.
 
 
     Returns:
@@ -73,6 +76,9 @@ def expected_error_reduction(learner: ActiveLearner, X: modALinput, loss: str =
         else:
             expected_error[x_idx] = np.inf
 
-    query_idx = multi_argmax(expected_error, n_instances)
+    if not random_tie_break:
+        query_idx = multi_argmax(expected_error, n_instances)
+    else:
+        query_idx = shuffled_argmax(expected_error, n_instances)
 
     return query_idx, X[query_idx]
diff --git a/tests/core_tests.py b/tests/core_tests.py
@@ -462,6 +462,7 @@ def test_eer(self):
                                                  X_training=X_training, y_training=y_training)
 
             modAL.expected_error.expected_error_reduction(learner, X_pool)
+            modAL.expected_error.expected_error_reduction(learner, X_pool, random_tie_break=True)
             modAL.expected_error.expected_error_reduction(learner, X_pool, p_subsample=0.1)
             modAL.expected_error.expected_error_reduction(learner, X_pool, loss='binary')
             modAL.expected_error.expected_error_reduction(learner, X_pool, p_subsample=0.1, loss='log')