From 2659b546ba47b5e174d8b77feff09b2a2b709b80 Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Tue, 3 Jun 2025 09:32:31 +0000 Subject: [PATCH 1/9] add the initial example. --- notebooks/en/_toctree.yml | 2 + .../en/optuna_hpo_with_transformers.ipynb | 108 ++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 notebooks/en/optuna_hpo_with_transformers.ipynb diff --git a/notebooks/en/_toctree.yml b/notebooks/en/_toctree.yml index e46efc44..1d50d46a 100644 --- a/notebooks/en/_toctree.yml +++ b/notebooks/en/_toctree.yml @@ -82,6 +82,8 @@ title: HuatuoGPT-o1 Medical RAG and Reasoning - local: fine_tune_chatbot_docs_synthetic title: Documentation Chatbot with Meta Synthetic Data Kit + - local: + title: diff --git a/notebooks/en/optuna_hpo_with_transformers.ipynb b/notebooks/en/optuna_hpo_with_transformers.ipynb new file mode 100644 index 00000000..172ee800 --- /dev/null +++ b/notebooks/en/optuna_hpo_with_transformers.ipynb @@ -0,0 +1,108 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "b9fb3a7c", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "from datasets import load_dataset\n", + "import evaluate\n", + "import optuna\n", + "import torch\n", + "\n", + "from transformers import AutoConfig\n", + "from transformers import AutoModelForSequenceClassification\n", + "from transformers import AutoTokenizer\n", + "from transformers import set_seed\n", + "from transformers import Trainer\n", + "from transformers import TrainingArguments\n", + "\n", + "\n", + "# Set seed for reproducibility\n", + "set_seed(42)\n", + "\n", + "# Load IMDb dataset\n", + "dataset = load_dataset(\"imdb\")\n", + "\n", + "metric = evaluate.load(\"accuracy\") # Replaces deprecated load_metric\n", + "\n", + "# Model name\n", + "model_name = \"lvwerra/distilbert-imdb\"\n", + "\n", + "# Tokenizer\n", + "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", + "\n", + "\n", + "def tokenize(batch):\n", + " return tokenizer(batch[\"text\"], padding=True, truncation=True)\n", + "\n", + "\n", + "dataset = dataset.map(tokenize, batched=True)\n", + "dataset = dataset.rename_column(\"label\", \"labels\")\n", + "dataset.set_format(\"torch\", columns=[\"input_ids\", \"attention_mask\", \"labels\"])\n", + "\n", + "# ➔ Define train and eval datasets here (before slicing)\n", + "train_dataset = dataset[\"train\"]\n", + "eval_dataset = dataset[\"test\"]\n", + "\n", + "# Slice datasets for faster experiments\n", + "train_dataset = train_dataset.select(range(1500))\n", + "eval_dataset = eval_dataset.select(range(500))\n", + "\n", + "# Model config\n", + "config = AutoConfig.from_pretrained(model_name, num_labels=2)\n", + "\n", + "\n", + "# Model initialization function\n", + "def model_init(trial):\n", + " return AutoModelForSequenceClassification.from_pretrained(\n", + " model_name,\n", + " config=config,\n", + " )\n", + "\n", + "\n", + "# Compute accuracy\n", + "def compute_metrics(eval_pred):\n", + " logits, labels = eval_pred\n", + " predictions = logits.argmax(axis=-1)\n", + " return metric.compute(predictions=predictions, references=labels)\n", + "\n", + "\n", + "# Define Optuna search space\n", + "def optuna_hp_space(trial):\n", + " return {\n", + " \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-6, 1e-4, log=True),\n", + " \"per_device_train_batch_size\": trial.suggest_categorical(\n", + " \"per_device_train_batch_size\", [8, 16]\n", + " ),\n", + " \"num_train_epochs\": trial.suggest_int(\"num_train_epochs\", 2, 3),\n", + " }\n", + "\n", + "\n", + "# Training arguments\n", + "best_run = trainer.hyperparameter_search(\n", + " direction=\"maximize\",\n", + " backend=\"optuna\",\n", + " hp_space=optuna_hp_space,\n", + " n_trials=5,\n", + " compute_objective=compute_objective,\n", + ")\n", + "\n", + "print(best_run)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 7c6cf763ab7940405eccba0ff82971be72aa23c7 Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Tue, 3 Jun 2025 15:51:17 +0000 Subject: [PATCH 2/9] update the example. --- notebooks/en/_toctree.yml | 4 +- .../en/optuna_hpo_with_transformers.ipynb | 139 ++++++++++++------ 2 files changed, 97 insertions(+), 46 deletions(-) diff --git a/notebooks/en/_toctree.yml b/notebooks/en/_toctree.yml index 1d50d46a..4f84f2a1 100644 --- a/notebooks/en/_toctree.yml +++ b/notebooks/en/_toctree.yml @@ -82,8 +82,8 @@ title: HuatuoGPT-o1 Medical RAG and Reasoning - local: fine_tune_chatbot_docs_synthetic title: Documentation Chatbot with Meta Synthetic Data Kit - - local: - title: + - local: optuna_hpo_with_transformers + title: Hyperparameter Optimization with Optuna and Transformers diff --git a/notebooks/en/optuna_hpo_with_transformers.ipynb b/notebooks/en/optuna_hpo_with_transformers.ipynb index 172ee800..23e9ed35 100644 --- a/notebooks/en/optuna_hpo_with_transformers.ipynb +++ b/notebooks/en/optuna_hpo_with_transformers.ipynb @@ -1,22 +1,34 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "08092aa8", + "metadata": {}, + "source": [ + "# Hyperparameter Optimization with Optuna and Transformers\n", + "\n", + "_Authored by: [Parag Ekbote](https://github.com/ParagEkbote)_\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "3612d3e9", + "metadata": {}, + "source": [ + "!pip install -q datasets evaluate transformers" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "b9fb3a7c", - "metadata": { - "vscode": { - "languageId": "plaintext" - } - }, + "id": "2cfb9d5e", + "metadata": {}, "outputs": [], "source": [ "from datasets import load_dataset\n", "import evaluate\n", - "import optuna\n", - "import torch\n", "\n", - "from transformers import AutoConfig\n", "from transformers import AutoModelForSequenceClassification\n", "from transformers import AutoTokenizer\n", "from transformers import set_seed\n", @@ -24,68 +36,73 @@ "from transformers import TrainingArguments\n", "\n", "\n", - "# Set seed for reproducibility\n", "set_seed(42)\n", "\n", - "# Load IMDb dataset\n", - "dataset = load_dataset(\"imdb\")\n", - "\n", - "metric = evaluate.load(\"accuracy\") # Replaces deprecated load_metric\n", "\n", - "# Model name\n", - "model_name = \"lvwerra/distilbert-imdb\"\n", + "train_dataset = load_dataset(\"imdb\", split=\"train\").shuffle(seed=42).select(range(1000))\n", + "valid_dataset = load_dataset(\"imdb\", split=\"test\").shuffle(seed=42).select(range(500))\n", "\n", - "# Tokenizer\n", + "model_name = \"prajjwal1/bert-tiny\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "\n", "\n", "def tokenize(batch):\n", - " return tokenizer(batch[\"text\"], padding=True, truncation=True)\n", + " return tokenizer(batch[\"text\"], padding=\"max_length\", truncation=True, max_length=512)\n", "\n", "\n", - "dataset = dataset.map(tokenize, batched=True)\n", - "dataset = dataset.rename_column(\"label\", \"labels\")\n", - "dataset.set_format(\"torch\", columns=[\"input_ids\", \"attention_mask\", \"labels\"])\n", - "\n", - "# ➔ Define train and eval datasets here (before slicing)\n", - "train_dataset = dataset[\"train\"]\n", - "eval_dataset = dataset[\"test\"]\n", + "tokenized_train = train_dataset.map(tokenize, batched=True).select_columns(\n", + " [\"input_ids\", \"attention_mask\", \"label\"]\n", + ")\n", + "tokenized_valid = valid_dataset.map(tokenize, batched=True).select_columns(\n", + " [\"input_ids\", \"attention_mask\", \"label\"]\n", + ")\n", "\n", - "# Slice datasets for faster experiments\n", - "train_dataset = train_dataset.select(range(1500))\n", - "eval_dataset = eval_dataset.select(range(500))\n", "\n", - "# Model config\n", - "config = AutoConfig.from_pretrained(model_name, num_labels=2)\n", + "metric = evaluate.load(\"accuracy\")\n", "\n", "\n", - "# Model initialization function\n", - "def model_init(trial):\n", - " return AutoModelForSequenceClassification.from_pretrained(\n", - " model_name,\n", - " config=config,\n", - " )\n", + "def model_init():\n", + " return AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)\n", "\n", "\n", - "# Compute accuracy\n", "def compute_metrics(eval_pred):\n", - " logits, labels = eval_pred\n", - " predictions = logits.argmax(axis=-1)\n", + " predictions = eval_pred.predictions.argmax(axis=-1)\n", + " labels = eval_pred.label_ids\n", " return metric.compute(predictions=predictions, references=labels)\n", "\n", "\n", - "# Define Optuna search space\n", + "def compute_objective(metrics):\n", + " return metrics[\"eval_accuracy\"]\n", + "\n", + "\n", + "training_args = TrainingArguments(\n", + " eval_strategy=\"epoch\",\n", + " save_strategy=\"best\",\n", + " load_best_model_at_end=True,\n", + " logging_strategy=\"epoch\",\n", + " report_to=\"none\",\n", + ")\n", + "\n", + "\n", + "trainer = Trainer(\n", + " model_init=model_init,\n", + " args=training_args,\n", + " train_dataset=tokenized_train,\n", + " eval_dataset=tokenized_valid,\n", + " processing_class=tokenizer,\n", + " compute_metrics=compute_metrics,\n", + ")\n", + "\n", + "\n", "def optuna_hp_space(trial):\n", " return {\n", " \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-6, 1e-4, log=True),\n", " \"per_device_train_batch_size\": trial.suggest_categorical(\n", - " \"per_device_train_batch_size\", [8, 16]\n", + " \"per_device_train_batch_size\", [16, 32, 64, 128]\n", " ),\n", - " \"num_train_epochs\": trial.suggest_int(\"num_train_epochs\", 2, 3),\n", " }\n", "\n", "\n", - "# Training arguments\n", "best_run = trainer.hyperparameter_search(\n", " direction=\"maximize\",\n", " backend=\"optuna\",\n", @@ -96,11 +113,45 @@ "\n", "print(best_run)" ] + }, + { + "cell_type": "markdown", + "id": "5a46fac4", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0ee0bae", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "b10c26c6", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ef88312", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { + "kernelspec": { + "display_name": "optuna", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "name": "python", + "version": "3.12.1" } }, "nbformat": 4, From 6e19b7351475f251278b8546045a9d52a44c6062 Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Tue, 3 Jun 2025 16:13:21 +0000 Subject: [PATCH 3/9] update example --- .../en/optuna_hpo_with_transformers.ipynb | 78 ++++++++++--------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/notebooks/en/optuna_hpo_with_transformers.ipynb b/notebooks/en/optuna_hpo_with_transformers.ipynb index 23e9ed35..0d17faca 100644 --- a/notebooks/en/optuna_hpo_with_transformers.ipynb +++ b/notebooks/en/optuna_hpo_with_transformers.ipynb @@ -8,6 +8,8 @@ "# Hyperparameter Optimization with Optuna and Transformers\n", "\n", "_Authored by: [Parag Ekbote](https://github.com/ParagEkbote)_\n", + "\n", + "In this notebook, we are going\n", "\n" ] }, @@ -39,10 +41,10 @@ "set_seed(42)\n", "\n", "\n", - "train_dataset = load_dataset(\"imdb\", split=\"train\").shuffle(seed=42).select(range(1000))\n", - "valid_dataset = load_dataset(\"imdb\", split=\"test\").shuffle(seed=42).select(range(500))\n", + "train_dataset = load_dataset(\"imdb\", split=\"train\").shuffle(seed=42).select(range(2500))\n", + "valid_dataset = load_dataset(\"imdb\", split=\"test\").shuffle(seed=42).select(range(1000))\n", "\n", - "model_name = \"prajjwal1/bert-tiny\"\n", + "model_name = \"lvwerra/distilbert-imdb\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "\n", "\n", @@ -62,9 +64,24 @@ "\n", "\n", "def model_init():\n", - " return AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)\n", - "\n", - "\n", + " return AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)\n" + ] + }, + { + "cell_type": "markdown", + "id": "5a46fac4", + "metadata": {}, + "source": [ + "# Set the Metrics and define the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0ee0bae", + "metadata": {}, + "outputs": [], + "source": [ "def compute_metrics(eval_pred):\n", " predictions = eval_pred.predictions.argmax(axis=-1)\n", " labels = eval_pred.label_ids\n", @@ -91,9 +108,24 @@ " eval_dataset=tokenized_valid,\n", " processing_class=tokenizer,\n", " compute_metrics=compute_metrics,\n", - ")\n", - "\n", - "\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b10c26c6", + "metadata": {}, + "source": [ + "# Define the Search Space and Start the Trials" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ef88312", + "metadata": {}, + "outputs": [], + "source": [ "def optuna_hp_space(trial):\n", " return {\n", " \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-6, 1e-4, log=True),\n", @@ -113,34 +145,6 @@ "\n", "print(best_run)" ] - }, - { - "cell_type": "markdown", - "id": "5a46fac4", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e0ee0bae", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "b10c26c6", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4ef88312", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 935ac901b1ea8819e8e34462b38053b61b6457ee Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Tue, 3 Jun 2025 18:00:07 +0000 Subject: [PATCH 4/9] update the tutorial. --- .../en/optuna_hpo_with_transformers.ipynb | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/notebooks/en/optuna_hpo_with_transformers.ipynb b/notebooks/en/optuna_hpo_with_transformers.ipynb index 0d17faca..3fb98fc6 100644 --- a/notebooks/en/optuna_hpo_with_transformers.ipynb +++ b/notebooks/en/optuna_hpo_with_transformers.ipynb @@ -9,14 +9,17 @@ "\n", "_Authored by: [Parag Ekbote](https://github.com/ParagEkbote)_\n", "\n", - "In this notebook, we are going\n", - "\n" + "In this notebook, we are going to use the [optuna](https://github.com/optuna/optuna) library to perform hyperparameter optimization on a light-weight BERT model on a small subset of the IMDB dataset. To learn more about transformers' hyperparameter search, you can check the following documentation [here](https://huggingface.co/docs/transformers/en/hpo_train).\n", + "\n", + "Firstly, we will install the following dependencies to ensure that our code is executed:" ] }, { - "cell_type": "markdown", - "id": "3612d3e9", + "cell_type": "code", + "execution_count": null, + "id": "a309e1a0", "metadata": {}, + "outputs": [], "source": [ "!pip install -q datasets evaluate transformers" ] @@ -72,7 +75,8 @@ "id": "5a46fac4", "metadata": {}, "source": [ - "# Set the Metrics and define the model" + "# Set the Metrics and define the Trainer class\n", + "\n" ] }, { @@ -116,7 +120,14 @@ "id": "b10c26c6", "metadata": {}, "source": [ - "# Define the Search Space and Start the Trials" + "# Define the Search Space and Start the Trials\n", + "\n", + "We will now define the optuna hyperparameter search space to find the best set of hyperparameters for the learning rate and batch size. We can now launch the hyperparameter search by passing the following metrics:\n", + "\n", + "1. direction: We aim to maxime the evaluation metric\n", + "2. backend: We will use optuna for searching\n", + "3. n_trials: The number of trials optuna will be executed \n", + "4. compute_objective: THe objective to minimize or maximize from the metrics returned by `evaluate`" ] }, { @@ -139,7 +150,7 @@ " direction=\"maximize\",\n", " backend=\"optuna\",\n", " hp_space=optuna_hp_space,\n", - " n_trials=5,\n", + " n_trials=20,\n", " compute_objective=compute_objective,\n", ")\n", "\n", From 76e8684027105302bb0f2fd75539eada5875eaa2 Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Tue, 3 Jun 2025 18:09:33 +0000 Subject: [PATCH 5/9] update the tutorial subheadings. --- notebooks/en/optuna_hpo_with_transformers.ipynb | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/notebooks/en/optuna_hpo_with_transformers.ipynb b/notebooks/en/optuna_hpo_with_transformers.ipynb index 3fb98fc6..7a632d0c 100644 --- a/notebooks/en/optuna_hpo_with_transformers.ipynb +++ b/notebooks/en/optuna_hpo_with_transformers.ipynb @@ -11,7 +11,7 @@ "\n", "In this notebook, we are going to use the [optuna](https://github.com/optuna/optuna) library to perform hyperparameter optimization on a light-weight BERT model on a small subset of the IMDB dataset. To learn more about transformers' hyperparameter search, you can check the following documentation [here](https://huggingface.co/docs/transformers/en/hpo_train).\n", "\n", - "Firstly, we will install the following dependencies to ensure that our code is executed:" + "Firstly, we will install the following dependencies to ensure that our code is executed" ] }, { @@ -24,6 +24,12 @@ "!pip install -q datasets evaluate transformers" ] }, + { + "cell_type": "markdown", + "id": "eff9ccd6", + "metadata": {}, + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -76,7 +82,10 @@ "metadata": {}, "source": [ "# Set the Metrics and define the Trainer class\n", - "\n" + "\n", + "Now, we can define the metric function to calculate evaluation metrics after each eval step. We shall also define the objective function to maximize the accuracy when selecting the best hyperparameters.\n", + "\n", + "Finally, we will also define the training arguments for the Trainer that will handle the evaluation, checkpointing, logging and hyperparameter search." ] }, { From fdd97659d84898689c8ffbb52712a5ac7e53831a Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Tue, 3 Jun 2025 18:43:06 +0000 Subject: [PATCH 6/9] update the tutorial. --- .../en/optuna_hpo_with_transformers.ipynb | 47 ++++++++++++++++--- 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/notebooks/en/optuna_hpo_with_transformers.ipynb b/notebooks/en/optuna_hpo_with_transformers.ipynb index 7a632d0c..161a0f46 100644 --- a/notebooks/en/optuna_hpo_with_transformers.ipynb +++ b/notebooks/en/optuna_hpo_with_transformers.ipynb @@ -11,7 +11,7 @@ "\n", "In this notebook, we are going to use the [optuna](https://github.com/optuna/optuna) library to perform hyperparameter optimization on a light-weight BERT model on a small subset of the IMDB dataset. To learn more about transformers' hyperparameter search, you can check the following documentation [here](https://huggingface.co/docs/transformers/en/hpo_train).\n", "\n", - "Firstly, we will install the following dependencies to ensure that our code is executed" + "Firstly, we will install the following dependencies for our code:" ] }, { @@ -21,14 +21,20 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install -q datasets evaluate transformers" + "!pip install -q datasets evaluate transformers optuna" ] }, { "cell_type": "markdown", "id": "eff9ccd6", "metadata": {}, - "source": [] + "source": [ + "# Prepare the dataset and set the Model\n", + "\n", + "We will load the IMDB dataset which is a standard benchmark for sentiment analysis. We will define 2000 examples for the training split and 1000 examples for validation. Both sets are shuffled with a fixed seed to ensure reproducibility.\n", + "\n", + "We shall also tokenize the text and map to efficiently preprocesses all the dataset samples. Next, we will load the accuracy metric. We will also initialize the model to be instantiated for binary classification. " + ] }, { "cell_type": "code", @@ -50,7 +56,7 @@ "set_seed(42)\n", "\n", "\n", - "train_dataset = load_dataset(\"imdb\", split=\"train\").shuffle(seed=42).select(range(2500))\n", + "train_dataset = load_dataset(\"imdb\", split=\"train\").shuffle(seed=42).select(range(2000))\n", "valid_dataset = load_dataset(\"imdb\", split=\"test\").shuffle(seed=42).select(range(1000))\n", "\n", "model_name = \"lvwerra/distilbert-imdb\"\n", @@ -131,12 +137,12 @@ "source": [ "# Define the Search Space and Start the Trials\n", "\n", - "We will now define the optuna hyperparameter search space to find the best set of hyperparameters for the learning rate and batch size. We can now launch the hyperparameter search by passing the following metrics:\n", + "We will now define the optuna hyperparameter search space to find the best set of hyperparameters for the learning rate, weight decay and batch size. We can now launch the hyperparameter search by passing the following metrics:\n", "\n", "1. direction: We aim to maxime the evaluation metric\n", "2. backend: We will use optuna for searching\n", "3. n_trials: The number of trials optuna will be executed \n", - "4. compute_objective: THe objective to minimize or maximize from the metrics returned by `evaluate`" + "4. compute_objective: The objective to minimize or maximize from the metrics returned by `evaluate`" ] }, { @@ -152,6 +158,7 @@ " \"per_device_train_batch_size\": trial.suggest_categorical(\n", " \"per_device_train_batch_size\", [16, 32, 64, 128]\n", " ),\n", + " \"weight_decay\": trial.suggest_float(\"weight_decay\", 0.0, 0.3),\n", " }\n", "\n", "\n", @@ -165,6 +172,34 @@ "\n", "print(best_run)" ] + }, + { + "cell_type": "markdown", + "id": "26a95ef3", + "metadata": {}, + "source": [ + "# Visualize the results\n", + "\n", + "After the completion of the trials, we can visualize the results in a simple manner using the `optuna` study object.\n", + "We can pass the object and plot visualizations that can help to understand the patterns in the trial outcomes. Here, we are plotting the key hyperparameters and how different hyperparameter combinations relate to performance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8f14007", + "metadata": {}, + "outputs": [], + "source": [ + "import optuna\n", + "import optuna.visualization as vis\n", + "\n", + "study = best_run.study \n", + "\n", + "optuna.visualization.plot_param_importances(study).show()\n", + "\n", + "vis.plot_parallel_coordinate(study)\n" + ] } ], "metadata": { From 0cf0f5845e9d2abd1045e789c3e5280c84783c2c Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Tue, 3 Jun 2025 18:58:36 +0000 Subject: [PATCH 7/9] update the example. --- notebooks/en/optuna_hpo_with_transformers.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/notebooks/en/optuna_hpo_with_transformers.ipynb b/notebooks/en/optuna_hpo_with_transformers.ipynb index 161a0f46..95aa92f3 100644 --- a/notebooks/en/optuna_hpo_with_transformers.ipynb +++ b/notebooks/en/optuna_hpo_with_transformers.ipynb @@ -9,7 +9,7 @@ "\n", "_Authored by: [Parag Ekbote](https://github.com/ParagEkbote)_\n", "\n", - "In this notebook, we are going to use the [optuna](https://github.com/optuna/optuna) library to perform hyperparameter optimization on a light-weight BERT model on a small subset of the IMDB dataset. To learn more about transformers' hyperparameter search, you can check the following documentation [here](https://huggingface.co/docs/transformers/en/hpo_train).\n", + "In this notebook, we are going to use the [optuna](https://github.com/optuna/optuna) library to perform hyperparameter optimization on a light-weight BERT model using a small subset of the IMDB dataset. To learn more about transformers' hyperparameter search, you can check the following documentation [here](https://huggingface.co/docs/transformers/en/hpo_train).\n", "\n", "Firstly, we will install the following dependencies for our code:" ] @@ -137,7 +137,7 @@ "source": [ "# Define the Search Space and Start the Trials\n", "\n", - "We will now define the optuna hyperparameter search space to find the best set of hyperparameters for the learning rate, weight decay and batch size. We can now launch the hyperparameter search by passing the following metrics:\n", + "We will now define the optuna hyperparameter search space to find the best set of hyperparameters for learning rate, weight decay and batch size. We can now launch the hyperparameter search by passing the following metrics:\n", "\n", "1. direction: We aim to maxime the evaluation metric\n", "2. backend: We will use optuna for searching\n", @@ -181,7 +181,7 @@ "# Visualize the results\n", "\n", "After the completion of the trials, we can visualize the results in a simple manner using the `optuna` study object.\n", - "We can pass the object and plot visualizations that can help to understand the patterns in the trial outcomes. Here, we are plotting the key hyperparameters and how different hyperparameter combinations relate to performance." + "We can pass the object and plot visualizations that can help to understand the patterns in the trial outcomes. Here, we are plotting the key hyperparameters and how different hyperparameter combinations relate to performance of the model." ] }, { From b91ad3c64a6ccb974cd3fb46ca2660f97701b46b Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Wed, 11 Jun 2025 14:26:27 +0000 Subject: [PATCH 8/9] update the tutorial with observability, storage for the trials and push to hub to make it more applied. --- .../en/optuna_hpo_with_transformers.ipynb | 145 ++++++++++++++++-- 1 file changed, 132 insertions(+), 13 deletions(-) diff --git a/notebooks/en/optuna_hpo_with_transformers.ipynb b/notebooks/en/optuna_hpo_with_transformers.ipynb index 95aa92f3..cbef5668 100644 --- a/notebooks/en/optuna_hpo_with_transformers.ipynb +++ b/notebooks/en/optuna_hpo_with_transformers.ipynb @@ -21,7 +21,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install -q datasets evaluate transformers optuna" + "!pip install -q datasets evaluate transformers optuna wandb" ] }, { @@ -56,10 +56,10 @@ "set_seed(42)\n", "\n", "\n", - "train_dataset = load_dataset(\"imdb\", split=\"train\").shuffle(seed=42).select(range(2000))\n", + "train_dataset = load_dataset(\"imdb\", split=\"train\").shuffle(seed=42).select(range(2500))\n", "valid_dataset = load_dataset(\"imdb\", split=\"test\").shuffle(seed=42).select(range(1000))\n", "\n", - "model_name = \"lvwerra/distilbert-imdb\"\n", + "model_name = \"answerdotai/ModernBERT-base\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "\n", "\n", @@ -82,14 +82,42 @@ " return AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)\n" ] }, + { + "cell_type": "markdown", + "id": "105d9e01", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7652b74b", + "metadata": {}, + "outputs": [], + "source": [ + "import optuna\n", + "from optuna.storages import RDBStorage\n", + "\n", + "# Define persistent storage\n", + "storage = RDBStorage(\"sqlite:///optuna_trials.db\")\n", + "\n", + "# Create or load a study\n", + "study = optuna.create_study(\n", + " study_name=\"transformers_optuna_study\",\n", + " direction=\"maximize\",\n", + " storage=storage,\n", + " load_if_exists=True\n", + ")" + ] + }, { "cell_type": "markdown", "id": "5a46fac4", "metadata": {}, "source": [ - "# Set the Metrics and define the Trainer class\n", + "# Initialize the Trainer Class and Setup Observability\n", "\n", - "Now, we can define the metric function to calculate evaluation metrics after each eval step. We shall also define the objective function to maximize the accuracy when selecting the best hyperparameters.\n", + "Now, we can define the metric function to calculate evaluation metrics after each eval step. We shall also define the objective function to maximize the accuracy when selecting the best hyperparameters. For observability,\n", "\n", "Finally, we will also define the training arguments for the Trainer that will handle the evaluation, checkpointing, logging and hyperparameter search." ] @@ -101,6 +129,8 @@ "metadata": {}, "outputs": [], "source": [ + "import wandb\n", + "\n", "def compute_metrics(eval_pred):\n", " predictions = eval_pred.predictions.argmax(axis=-1)\n", " labels = eval_pred.label_ids\n", @@ -110,13 +140,18 @@ "def compute_objective(metrics):\n", " return metrics[\"eval_accuracy\"]\n", "\n", + "wandb.init(project=\"hf-optuna\", name=f\"trial-{trial.number}\", reinit=True)\n", "\n", "training_args = TrainingArguments(\n", - " eval_strategy=\"epoch\",\n", - " save_strategy=\"best\",\n", - " load_best_model_at_end=True,\n", - " logging_strategy=\"epoch\",\n", - " report_to=\"none\",\n", + " output_dir=\"./results\",\n", + " evaluation_strategy=\"epoch\",\n", + " save_strategy=\"epoch\",\n", + " load_best_model_at_end=True,\n", + " logging_strategy=\"epoch\",\n", + " num_train_epochs=3,\n", + " report_to=\"wandb\", # Logs to W&B\n", + " logging_dir=\"./logs\",\n", + " run_name=f\"trial-{trial.number}\",\n", ")\n", "\n", "\n", @@ -168,6 +203,8 @@ " hp_space=optuna_hp_space,\n", " n_trials=20,\n", " compute_objective=compute_objective,\n", + " study_name=\"transformers_optuna_study\",\n", + " storage=\"sqlite:///optuna_trials.db\",\n", ")\n", "\n", "print(best_run)" @@ -194,11 +231,93 @@ "import optuna\n", "import optuna.visualization as vis\n", "\n", - "study = best_run.study \n", + "storage = optuna.storages.RDBStorage(\"sqlite:///optuna_trials.db\")\n", + "\n", + "study = optuna.load_study(\n", + " study_name=\"transformers_optuna_study\",\n", + " storage=storage\n", + ")\n", + "\n", + "vis.plot_param_importances(study).show()\n", + "\n", + "vis.plot_parallel_coordinate(study).show()\n", + "\n", + "vis.plot_contour(study).show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "ae8def79", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "572d1ab3", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import TrainingArguments, Trainer\n", + "\n", + "best_hparams = best_run.hyperparameters\n", + "\n", + "training_args = TrainingArguments(\n", + " output_dir=\"./final_model\",\n", + " learning_rate=best_hparams[\"learning_rate\"],\n", + " per_device_train_batch_size=best_hparams[\"per_device_train_batch_size\"],\n", + " weight_decay=best_hparams[\"weight_decay\"],\n", + " \n", + " evaluation_strategy=\"epoch\",\n", + " save_strategy=\"epoch\",\n", + " load_best_model_at_end=True,\n", + " logging_strategy=\"epoch\",\n", + " num_train_epochs=5, \n", + " push_to_hub=True, \n", + ")\n", + "\n", + "trainer = Trainer(\n", + " model_init=model_init, \n", + " args=training_args,\n", + " train_dataset=tokenized_train,\n", + " eval_dataset=tokenized_valid,\n", + " processing_class=tokenizer,\n", + " compute_metrics=compute_metrics,\n", + ")\n", + "\n", + "trainer.train()" + ] + }, + { + "cell_type": "markdown", + "id": "de469553", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d6b6e4a", + "metadata": {}, + "outputs": [], + "source": [ + "from huggingface_hub import login\n", "\n", - "optuna.visualization.plot_param_importances(study).show()\n", + "login()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6052309", + "metadata": {}, + "outputs": [], + "source": [ + "trainer.save_model(\"./final_model\")\n", + "tokenizer.save_pretrained(\"./final_model\")\n", "\n", - "vis.plot_parallel_coordinate(study)\n" + "trainer.push_to_hub(\"your-username/your-model-name\")\n", + "tokenizer.push_to_hub(\"your-username/your-model-name\")\n" ] } ], From 0f0fa1f96a17bf24d048772d0f9cafaaf2d9830e Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Wed, 11 Jun 2025 16:45:18 +0000 Subject: [PATCH 9/9] update the example. --- .../en/optuna_hpo_with_transformers.ipynb | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/notebooks/en/optuna_hpo_with_transformers.ipynb b/notebooks/en/optuna_hpo_with_transformers.ipynb index cbef5668..2bc47caa 100644 --- a/notebooks/en/optuna_hpo_with_transformers.ipynb +++ b/notebooks/en/optuna_hpo_with_transformers.ipynb @@ -29,7 +29,7 @@ "id": "eff9ccd6", "metadata": {}, "source": [ - "# Prepare the dataset and set the Model\n", + "# Prepare the dataset and set the model\n", "\n", "We will load the IMDB dataset which is a standard benchmark for sentiment analysis. We will define 2000 examples for the training split and 1000 examples for validation. Both sets are shuffled with a fixed seed to ensure reproducibility.\n", "\n", @@ -86,7 +86,11 @@ "cell_type": "markdown", "id": "105d9e01", "metadata": {}, - "source": [] + "source": [ + "# Define Storage with Optuna\n", + "\n", + "To store all trials across sessions, we are going to be using `RDBStorage`, which allows all hyperparameter optimization trials to be stored in a persistent SQLite database. This also allows for the Visualization and analysis of the trials to become more reproducible." + ] }, { "cell_type": "code", @@ -101,7 +105,7 @@ "# Define persistent storage\n", "storage = RDBStorage(\"sqlite:///optuna_trials.db\")\n", "\n", - "# Create or load a study\n", + "\n", "study = optuna.create_study(\n", " study_name=\"transformers_optuna_study\",\n", " direction=\"maximize\",\n", @@ -117,7 +121,7 @@ "source": [ "# Initialize the Trainer Class and Setup Observability\n", "\n", - "Now, we can define the metric function to calculate evaluation metrics after each eval step. We shall also define the objective function to maximize the accuracy when selecting the best hyperparameters. For observability,\n", + "Now, we can define the metric function to calculate evaluation metrics after each eval step. We shall also define the objective function to maximize the accuracy when selecting the best hyperparameters. For observability, we can utilize Weight & Biases to log the hyperparameter trials. It is important to remember to login with your API key to Weight & Biases to track your trial.\n", "\n", "Finally, we will also define the training arguments for the Trainer that will handle the evaluation, checkpointing, logging and hyperparameter search." ] @@ -177,7 +181,9 @@ "1. direction: We aim to maxime the evaluation metric\n", "2. backend: We will use optuna for searching\n", "3. n_trials: The number of trials optuna will be executed \n", - "4. compute_objective: The objective to minimize or maximize from the metrics returned by `evaluate`" + "4. compute_objective: The objective to minimize or maximize from the metrics returned by `evaluate`\n", + "5. study_name: The study name is used to retrieve or continue a specific run.\n", + "6. storage: The backend where Optuna will store all trial data." ] }, { @@ -249,7 +255,11 @@ "cell_type": "markdown", "id": "ae8def79", "metadata": {}, - "source": [] + "source": [ + "# Perform the Final Training\n", + "\n", + "We can now train the model by fetching the best parameters we have discovered by performing Hyperparameter Optimization(HPO). We can now pass the optimized training arguments to configure the essential aspects of training." + ] }, { "cell_type": "code", @@ -292,18 +302,10 @@ "cell_type": "markdown", "id": "de469553", "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4d6b6e4a", - "metadata": {}, - "outputs": [], "source": [ - "from huggingface_hub import login\n", + "# Uploading to Hugging Face Hub\n", "\n", - "login()" + "We can now save the trained model locally and upload it to the Hugging Face Hub. It is important to remember to login to the Hugging Face Hub using the `huggingface-cli` or `notebook_login()`. Now, we can push the model to the Hub." ] }, {