Fix for issue #21118: inconsistent behavior across callbacks (#21275)

adar21a · web-flow · commit 37a0920a3d83 · 2025-05-20T09:42:39.000-07:00
* Replaced the mode="auto" logic in ModelCheckpoint with the generalized logic used in EarlyStopping. * Added a unit test for the fixed use case #15 * Extracted _set_monitor_op() to MonitoredCallback class for reuse across all callbacks that needs it * Changed to MonitorCallback * Changed to MonitorCallback * Added MonitorCallback * Added @pytest.mark.requires_trainable_backend * Changed to MonitorCallback * Removed exporting to public API
diff --git a/keras/src/callbacks/__init__.py b/keras/src/callbacks/__init__.py
@@ -7,6 +7,7 @@
 from keras.src.callbacks.lambda_callback import LambdaCallback
 from keras.src.callbacks.learning_rate_scheduler import LearningRateScheduler
 from keras.src.callbacks.model_checkpoint import ModelCheckpoint
+from keras.src.callbacks.monitor_callback import MonitorCallback
 from keras.src.callbacks.progbar_logger import ProgbarLogger
 from keras.src.callbacks.reduce_lr_on_plateau import ReduceLROnPlateau
 from keras.src.callbacks.remote_monitor import RemoteMonitor
diff --git a/keras/src/callbacks/early_stopping.py b/keras/src/callbacks/early_stopping.py
@@ -1,14 +1,12 @@
 import warnings
 
-from keras.src import ops
 from keras.src.api_export import keras_export
-from keras.src.callbacks.callback import Callback
-from keras.src.trainers import compile_utils
+from keras.src.callbacks.monitor_callback import MonitorCallback
 from keras.src.utils import io_utils
 
 
 @keras_export("keras.callbacks.EarlyStopping")
-class EarlyStopping(Callback):
+class EarlyStopping(MonitorCallback):
     """Stop training when a monitored metric has stopped improving.
 
     Assuming the goal of a training is to minimize the loss. With this, the
@@ -76,72 +74,20 @@ def __init__(
         restore_best_weights=False,
         start_from_epoch=0,
     ):
-        super().__init__()
-
-        self.monitor = monitor
+        super().__init__(monitor, mode, min_delta=min_delta)
         self.patience = patience
         self.verbose = verbose
         self.baseline = baseline
-        self.min_delta = abs(min_delta)
         self.wait = 0
         self.stopped_epoch = 0
         self.restore_best_weights = restore_best_weights
         self.best_weights = None
         self.start_from_epoch = start_from_epoch
 
-        if mode not in ["auto", "min", "max"]:
-            warnings.warn(
-                f"EarlyStopping mode {mode} is unknown, fallback to auto mode.",
-                stacklevel=2,
-            )
-            mode = "auto"
-        self.mode = mode
-        self.monitor_op = None
-
-    def _set_monitor_op(self):
-        if self.mode == "min":
-            self.monitor_op = ops.less
-        elif self.mode == "max":
-            self.monitor_op = ops.greater
-        else:
-            metric_name = self.monitor.removeprefix("val_")
-            if metric_name == "loss":
-                self.monitor_op = ops.less
-            if hasattr(self.model, "metrics"):
-                all_metrics = []
-                for m in self.model.metrics:
-                    if isinstance(
-                        m,
-                        (
-                            compile_utils.CompileMetrics,
-                            compile_utils.MetricsList,
-                        ),
-                    ):
-                        all_metrics.extend(m.metrics)
-                for m in all_metrics:
-                    if m.name == metric_name:
-                        if hasattr(m, "_direction"):
-                            if m._direction == "up":
-                                self.monitor_op = ops.greater
-                            else:
-                                self.monitor_op = ops.less
-        if self.monitor_op is None:
-            raise ValueError(
-                f"EarlyStopping callback received monitor={self.monitor} "
-                "but Keras isn't able to automatically determine whether "
-                "that metric should be maximized or minimized. "
-                "Pass `mode='max'` in order to do early stopping based "
-                "on the highest metric value, or pass `mode='min'` "
-                "in order to use the lowest value."
-            )
-        if self.monitor_op == ops.less:
-            self.min_delta *= -1
-
     def on_train_begin(self, logs=None):
         # Allow instances to be re-used
         self.wait = 0
         self.stopped_epoch = 0
-        self.best = None
         self.best_weights = None
         self.best_epoch = 0
 
@@ -206,8 +152,3 @@ def get_monitor_value(self, logs):
                 stacklevel=2,
             )
         return monitor_value
-
-    def _is_improvement(self, monitor_value, reference_value):
-        if reference_value is None:
-            return True
-        return self.monitor_op(monitor_value - self.min_delta, reference_value)
diff --git a/keras/src/callbacks/model_checkpoint.py b/keras/src/callbacks/model_checkpoint.py
@@ -6,13 +6,13 @@
 
 from keras.src import backend
 from keras.src.api_export import keras_export
-from keras.src.callbacks.callback import Callback
+from keras.src.callbacks.monitor_callback import MonitorCallback
 from keras.src.utils import file_utils
 from keras.src.utils import io_utils
 
 
 @keras_export("keras.callbacks.ModelCheckpoint")
-class ModelCheckpoint(Callback):
+class ModelCheckpoint(MonitorCallback):
     """Callback to save the Keras model or model weights at some frequency.
 
     `ModelCheckpoint` callback is used in conjunction with training using
@@ -105,9 +105,8 @@ class ModelCheckpoint(Callback):
             decision to overwrite the current save file is made based on either
             the maximization or the minimization of the monitored quantity.
             For `val_acc`, this should be `"max"`, for `val_loss` this should be
-            `"min"`, etc. In `"auto"` mode, the mode is set to `"max"` if the
-            quantities monitored are `"acc"` or start with `"fmeasure"` and are
-            set to `"min"` for the rest of the quantities.
+            `"min"`, etc. In `"auto"` mode, the direction is automatically
+            inferred from the name of the monitored quantity.
         save_weights_only: if `True`, then only the model's weights will be
             saved (`model.save_weights(filepath)`), else the full model is
             saved (`model.save(filepath)`).
@@ -136,42 +135,14 @@ def __init__(
         save_freq="epoch",
         initial_value_threshold=None,
     ):
-        super().__init__()
-        self.monitor = monitor
+        super().__init__(monitor, mode, initial_value_threshold)
         self.verbose = verbose
         self.filepath = file_utils.path_to_string(filepath)
         self.save_best_only = save_best_only
         self.save_weights_only = save_weights_only
         self.save_freq = save_freq
         self._batches_seen_since_last_saving = 0
         self._last_batch_seen = 0
-        self.best = initial_value_threshold
-
-        if mode not in ["auto", "min", "max"]:
-            warnings.warn(
-                f"ModelCheckpoint mode '{mode}' is unknown, "
-                "fallback to auto mode.",
-                stacklevel=2,
-            )
-            mode = "auto"
-
-        if mode == "min":
-            self.monitor_op = np.less
-            if self.best is None:
-                self.best = np.inf
-        elif mode == "max":
-            self.monitor_op = np.greater
-            if self.best is None:
-                self.best = -np.inf
-        else:
-            if "acc" in self.monitor or self.monitor.startswith("fmeasure"):
-                self.monitor_op = np.greater
-                if self.best is None:
-                    self.best = -np.inf
-            else:
-                self.monitor_op = np.less
-                if self.best is None:
-                    self.best = np.inf
 
         if self.save_freq != "epoch" and not isinstance(self.save_freq, int):
             raise ValueError(
@@ -205,6 +176,10 @@ def on_epoch_begin(self, epoch, logs=None):
         self._current_epoch = epoch
 
     def on_epoch_end(self, epoch, logs=None):
+        if self.monitor_op is None:
+            # Delay setup until the model's metrics are all built
+            self._set_monitor_op()
+
         if self.save_freq == "epoch":
             self._save_model(epoch=epoch, batch=None, logs=logs)
 
@@ -262,7 +237,7 @@ def _should_save_model(self, epoch, batch, logs, filepath):
                 )
                 return True
             else:
-                if self.monitor_op(current, self.best):
+                if self._is_improvement(current, self.best):
                     if self.verbose > 0:
                         io_utils.print_msg(
                             f"\nEpoch {epoch + 1}: {self.monitor} "
diff --git a/keras/src/callbacks/model_checkpoint_test.py b/keras/src/callbacks/model_checkpoint_test.py
@@ -164,7 +164,7 @@ def get_model():
         # Case 5: metric not available.
         cbks = [
             callbacks.ModelCheckpoint(
-                filepath, monitor="unknown", save_best_only=True
+                filepath, monitor="unknown", save_best_only=True, mode="min"
             )
         ]
         with pytest.warns(UserWarning):
@@ -453,6 +453,37 @@ def get_model():
         )
         self.assertFalse(os.path.exists(filepath))
 
+        # Case 15: ModelCheckpoint doesn't save model if auc was max earlier in
+        # auto mode
+        mode = "auto"
+        monitor = "val_auc"
+        initial_value_threshold = 1
+        save_best_only = True
+        cbks = [
+            callbacks.ModelCheckpoint(
+                filepath,
+                monitor=monitor,
+                save_best_only=save_best_only,
+                initial_value_threshold=initial_value_threshold,
+                mode=mode,
+            )
+        ]
+        model.compile(
+            loss="categorical_crossentropy",
+            optimizer="sgd",
+            metrics=[metrics.AUC()],
+        )
+        model.fit(
+            x_train,
+            y_train,
+            batch_size=BATCH_SIZE,
+            validation_data=(x_test, y_test),
+            callbacks=cbks,
+            epochs=1,
+            verbose=0,
+        )
+        self.assertFalse(os.path.exists(filepath))
+
     @pytest.mark.skipif(
         h5py is None,
         reason="`h5py` is a required dependency for `ModelCheckpoint` tests.",
diff --git a/keras/src/callbacks/monitor_callback.py b/keras/src/callbacks/monitor_callback.py
@@ -0,0 +1,104 @@
+import warnings
+
+from keras.src import ops
+from keras.src.callbacks.callback import Callback
+from keras.src.trainers import compile_utils
+
+
+class MonitorCallback(Callback):
+    """Base class for callbacks that monitor a quantity and evaluates
+    improvements.
+
+    This class provides common functionality for callbacks that monitor a
+    metric during training to determine whether a condition has been met,
+    such as improvement over time. It encapsulates logic for selecting
+    the comparison operation based on a `monitor` value and `mode`, and
+    computing whether a new value is an improvement.
+
+    It is intended to be subclassed by other callbacks like `ModelCheckpoint`,
+    `EarlyStopping`, or `ReduceLROnPlateau`, and is not meant to be used
+    directly.
+
+    Arguments:
+        monitor: Quantity to be monitored. Defaults to `"val_loss"`.
+        mode: One of `{"auto", "min", "max"}`. In `min` mode, training will aim
+            to minimize the monitored quantity; in `'max'` mode it will aim to
+            maximize it.; in `"auto"` mode, the direction is automatically
+            inferred from the name of the monitored quantity. Defaults to
+            `"auto"`.
+        baseline: Floating point initial "best" value of the metric to be
+            monitored. If `None` (default), the first monitored value will be
+            used.
+        min_delta: Minimum change in the monitored quantity to qualify as an
+            improvement, i.e. an absolute change of less than min_delta, will
+            count as no improvement. Defaults to `0`.
+
+    Raises:
+        ValueError: If `mode='auto'` is selected and the direction of the metric
+        cannot be inferred.
+    """
+
+    def __init__(
+        self,
+        monitor="val_loss",
+        mode="auto",
+        baseline=None,
+        min_delta=0,
+    ):
+        super().__init__()
+        if mode not in ["auto", "min", "max"]:
+            warnings.warn(
+                f"{self.__class__.__name__} mode '{mode}' is unknown, fallback "
+                "to auto mode.",
+                stacklevel=2,
+            )
+            mode = "auto"
+        self.monitor = monitor
+        self.mode = mode
+        self.best = baseline
+        self.min_delta = abs(min_delta)
+        self.monitor_op = None
+
+    def _set_monitor_op(self):
+        if self.mode == "min":
+            self.monitor_op = ops.less
+        elif self.mode == "max":
+            self.monitor_op = ops.greater
+        else:
+            metric_name = self.monitor.removeprefix("val_")
+            if metric_name == "loss":
+                self.monitor_op = ops.less
+            if hasattr(self.model, "metrics"):
+                all_metrics = []
+                for m in self.model.metrics:
+                    if isinstance(
+                        m,
+                        (
+                            compile_utils.CompileMetrics,
+                            compile_utils.MetricsList,
+                        ),
+                    ):
+                        all_metrics.extend(m.metrics)
+                for m in all_metrics:
+                    if m.name == metric_name:
+                        if hasattr(m, "_direction"):
+                            if m._direction == "up":
+                                self.monitor_op = ops.greater
+                            else:
+                                self.monitor_op = ops.less
+            if self.monitor_op is None:
+                raise ValueError(
+                    f"{self.__class__.__name__} callback received "
+                    f"monitor={self.monitor}, but Keras isn't able to "
+                    "automatically determine whether that metric should be "
+                    "maximized or minimized. Pass `mode='max'` in order to "
+                    "monitor based on the highest metric value, or pass "
+                    "`mode='min'` in order to use the lowest value."
+                )
+        if self.monitor_op == ops.less:
+            self.min_delta *= -1
+
+    def _is_improvement(self, monitor_value, reference_value):
+        if reference_value is None:
+            return True
+        return self.monitor_op(monitor_value - self.min_delta, reference_value)
diff --git a/keras/src/callbacks/monitor_callback_test.py b/keras/src/callbacks/monitor_callback_test.py
diff --git a/keras/src/callbacks/reduce_lr_on_plateau.py b/keras/src/callbacks/reduce_lr_on_plateau.py