ADD: optimizer state can now be saved & reinstantiated (#279)

VincentAuriau · web-flow · commit 1ecfaeff6171 · 2025-10-16T00:16:46.000+02:00
diff --git a/choice_learn/models/base_model.py b/choice_learn/models/base_model.py
@@ -516,7 +516,7 @@ def batch_predict(
         }
         return batch_loss, probabilities
 
-    def save_model(self, path):
+    def save_model(self, path, save_opt=True):
         """Save the different models on disk.
 
         Parameters
@@ -538,16 +538,34 @@ def save_model(self, path):
             elif isinstance(v, (list, tuple)):
                 if all(isinstance(item, (int, float, str, dict)) for item in v):
                     params[k] = v
-                else:
+                elif k != "_trainable_weights":
                     logging.warning(
                         """Attribute '%s' is a list with non-serializable
                          types and will not be saved.""",
                         k,
                     )
-        with open(os.path.join(path, "params.json"), "w") as f:
+        with open(Path(path) / "params.json", "w") as f:
             json.dump(params, f)
 
         # Save optimizer state
+        if save_opt and not isinstance(self.optimizer, str):
+            (Path(path) / "optimizer").mkdir(parents=True, exist_ok=True)
+            config = self.optimizer.get_config()
+            weights_store = {}
+            self.optimizer.save_own_variables(weights_store)
+            for key, value in weights_store.items():
+                if isinstance(value, tf.Variable):
+                    value = value.numpy()
+                weights_store[key] = value.tolist()
+            if "learning_rate" in config.keys():
+                if isinstance(config["learning_rate"], tf.Variable):
+                    config["learning_rate"] = config["learning_rate"].numpy()
+                if isinstance(config["learning_rate"], np.float32):
+                    config["learning_rate"] = config["learning_rate"].tolist()
+            with open(Path(path) / "optimizer" / "config.json", "w") as f:
+                json.dump(config, f)
+            with open(Path(path) / "optimizer" / "weights_store.json", "w") as f:
+                json.dump(weights_store, f)
 
     @classmethod
     def load_model(cls, path):
@@ -563,7 +581,11 @@ def load_model(cls, path):
         ChoiceModel
             Loaded ChoiceModel
         """
-        obj = cls()
+        # To improve for non string attributes
+        with open(Path(path) / "params.json") as f:
+            params = json.load(f)
+
+        obj = cls(optimizer=params["optimizer_name"])
         obj._trainable_weights = []
 
         i = 0
@@ -576,11 +598,22 @@ def load_model(cls, path):
             i += 1
             weight_path = f"weight_{i}.npy"
 
-        # To improve for non string attributes
-        params = json.load(open(Path(path) / "params.json"))
         for k, v in params.items():
             setattr(obj, k, v)
 
+        if Path.is_dir(Path(path) / "optimizer"):
+            with open(Path(path) / "optimizer" / "config.json") as f:
+                config = json.load(f)
+            # obj.optimizer = tf.keras.optimizers.get(params["optimizer_name"]).from_config(config)
+            obj.optimizer = obj.optimizer.from_config(config)
+            obj.optimizer.build(var_list=obj.trainable_weights)
+
+            with open(Path(path) / "optimizer" / "weights_store.json") as f:
+                store = json.load(f)
+            for key, value in store.items():
+                store[key] = np.array(value, dtype=np.float32)
+            obj.optimizer.load_own_variables(store)
+
         # Load optimizer step
         return obj
 
diff --git a/notebooks/auxiliary_tools/assortment_example.ipynb b/notebooks/auxiliary_tools/assortment_example.ipynb
@@ -961,7 +961,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.18"
+   "version": "3.11.4"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/models/generic_and_useful_tutorials/saving_loading_models.ipynb b/notebooks/models/generic_and_useful_tutorials/saving_loading_models.ipynb
@@ -0,0 +1,303 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install necessary requirements\n",
+    "\n",
+    "# If you run this notebook on Google Colab, or in standalone mode, you need to install the required packages.\n",
+    "# Uncomment the following lines:\n",
+    "\n",
+    "# !pip install choice-learn\n",
+    "\n",
+    "# If you run the notebook within the GitHub repository, you need to run the following lines, that can skipped otherwise:\n",
+    "import os\n",
+    "import sys\n",
+    "\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\"\n",
+    "sys.path.append(\"../../\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import numpy as np\n",
+    "import tensorflow as tf\n",
+    "\n",
+    "# Enabling eager execution sometimes decreases fitting time\n",
+    "tf.compat.v1.enable_eager_execution()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from choice_learn.models import ConditionalLogit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from choice_learn.datasets import load_swissmetro\n",
+    "\n",
+    "swiss_dataset = load_swissmetro(preprocessing=\"tutorial\")\n",
+    "print(swiss_dataset.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialization of the model\n",
+    "swiss_model = ConditionalLogit(optimizer=\"Adam\", epochs=25, lr=0.01)\n",
+    "\n",
+    "# Intercept for train & sm\n",
+    "swiss_model.add_coefficients(feature_name=\"intercept\", items_indexes=[0, 1])\n",
+    "# beta_he for train & sm\n",
+    "swiss_model.add_coefficients(feature_name=\"headway\",\n",
+    "                             items_indexes=[0, 1],\n",
+    "                             coefficient_name=\"beta_he\")\n",
+    "# beta_co for all items\n",
+    "swiss_model.add_coefficients(feature_name=\"cost\",\n",
+    "                             items_indexes=[0, 1, 2])\n",
+    "# beta first_class for train\n",
+    "swiss_model.add_coefficients(feature_name=\"regular_class\",\n",
+    "                             items_indexes=[0])\n",
+    "# beta seats for train\n",
+    "swiss_model.add_coefficients(feature_name=\"seats\", items_indexes=[1])\n",
+    "# betas luggage for car\n",
+    "swiss_model.add_coefficients(feature_name=\"single_luggage_piece\",\n",
+    "                             items_indexes=[2],\n",
+    "                             coefficient_name=\"beta_luggage=1\")\n",
+    "swiss_model.add_coefficients(feature_name=\"multiple_luggage_piece\",\n",
+    "                             items_indexes=[2],\n",
+    "                             coefficient_name=\"beta_luggage>1\")\n",
+    "# beta TT only for car\n",
+    "swiss_model.add_coefficients(feature_name=\"travel_time\",\n",
+    "                             items_indexes=[2],\n",
+    "                             coefficient_name=\"beta_tt_car\")\n",
+    "\n",
+    "# betas TT and HE shared by train and sm\n",
+    "swiss_model.add_shared_coefficient(feature_name=\"travel_time\",\n",
+    "                                   items_indexes=[0, 1])\n",
+    "swiss_model.add_shared_coefficient(feature_name=\"train_survey\",\n",
+    "                                   items_indexes=[0, 1],\n",
+    "                                   coefficient_name=\"beta_survey\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Estimation of the model\n",
+    "history = swiss_model.fit(swiss_dataset, get_report=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "isinstance(swiss_model.optimizer.get_config()[\"learning_rate\"], np.float32), isinstance(swiss_model.optimizer.get_config()[\"learning_rate\"], np.ndarray)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "swiss_model.save_model(\"test_save\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "swiss_model2 = ConditionalLogit.load_model(\"test_save\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hist = swiss_model2.fit(swiss_dataset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import shutil\n",
+    "\n",
+    "shutil.rmtree(\"test_save\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "11",
+   "metadata": {},
+   "source": [
+    "## Save every n epochs with a custom tf.Callback"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class SaveCallback(tf.keras.callbacks.Callback):\n",
+    "    \"\"\"Callback to save regularly the model during training.\"\"\"\n",
+    "\n",
+    "    def __init__(self, base_dir, save_every_n, *args, **kwargs):\n",
+    "        \"\"\"Instantiate callback.\"\"\"\n",
+    "        self.base_dir = base_dir\n",
+    "        self.save_every_n = save_every_n\n",
+    "        super().__init__(*args, **kwargs)\n",
+    "\n",
+    "    def on_epoch_end(self, epoch, logs=None):\n",
+    "        \"\"\"Define saving at the end of each epoch.\"\"\"\n",
+    "        _ = logs\n",
+    "        if (epoch + 1) % self.save_every_n == 0:\n",
+    "            self._save_model(epoch=epoch)\n",
+    "\n",
+    "    def _save_model(self, epoch):\n",
+    "        \"\"\"Handle model saving internally.\"\"\"\n",
+    "        dirname = os.path.join(self.base_dir, f\"epoch_{epoch}\")\n",
+    "        self.model.save_model(dirname)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialization of the model\n",
+    "swiss_model = ConditionalLogit(optimizer=\"Adam\", epochs=25, lr=0.01, callbacks=[SaveCallback(base_dir=\"test_save_cb\", save_every_n=2)])\n",
+    "\n",
+    "# Intercept for train & sm\n",
+    "swiss_model.add_coefficients(feature_name=\"intercept\", items_indexes=[0, 1])\n",
+    "# beta_he for train & sm\n",
+    "swiss_model.add_coefficients(feature_name=\"headway\",\n",
+    "                             items_indexes=[0, 1],\n",
+    "                             coefficient_name=\"beta_he\")\n",
+    "# beta_co for all items\n",
+    "swiss_model.add_coefficients(feature_name=\"cost\",\n",
+    "                             items_indexes=[0, 1, 2])\n",
+    "# beta first_class for train\n",
+    "swiss_model.add_coefficients(feature_name=\"regular_class\",\n",
+    "                             items_indexes=[0])\n",
+    "# beta seats for train\n",
+    "swiss_model.add_coefficients(feature_name=\"seats\", items_indexes=[1])\n",
+    "# betas luggage for car\n",
+    "swiss_model.add_coefficients(feature_name=\"single_luggage_piece\",\n",
+    "                             items_indexes=[2],\n",
+    "                             coefficient_name=\"beta_luggage=1\")\n",
+    "swiss_model.add_coefficients(feature_name=\"multiple_luggage_piece\",\n",
+    "                             items_indexes=[2],\n",
+    "                             coefficient_name=\"beta_luggage>1\")\n",
+    "# beta TT only for car\n",
+    "swiss_model.add_coefficients(feature_name=\"travel_time\",\n",
+    "                             items_indexes=[2],\n",
+    "                             coefficient_name=\"beta_tt_car\")\n",
+    "\n",
+    "# betas TT and HE shared by train and sm\n",
+    "swiss_model.add_shared_coefficient(feature_name=\"travel_time\",\n",
+    "                                   items_indexes=[0, 1])\n",
+    "swiss_model.add_shared_coefficient(feature_name=\"train_survey\",\n",
+    "                                   items_indexes=[0, 1],\n",
+    "                                   coefficient_name=\"beta_survey\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# Estimation of the model\n",
+    "history = swiss_model.fit(swiss_dataset, get_report=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# remove\n",
+    "shutil.rmtree(\"test_save_cb\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "tf_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Original file line number	Diff line number	Diff line change
`@@ -961,7 +961,7 @@`
`961`	`961`	`"name": "python",`
`962`	`962`	`"nbconvert_exporter": "python",`
`963`	`963`	`"pygments_lexer": "ipython3",`
`964`		`- "version": "3.8.18"`
	`964`	`+ "version": "3.11.4"`
`965`	`965`	`}`
`966`	`966`	`},`
`967`	`967`	`"nbformat": 4,`