comet-ml · Lothiraldan · Jan 15, 2025
diff --git a/integrations/model-training/trl/notebooks/Comet_with_trl.ipynb b/integrations/model-training/trl/notebooks/Comet_with_trl.ipynb
@@ -0,0 +1,213 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"https://cdn.comet.ml/img/notebook_logo.png\">"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[Comet](https://www.comet.com/site/products/ml-experiment-tracking/?utm_campaign=ray_train&utm_medium=colab) is an MLOps Platform that is designed to help Data Scientists and Teams build better models faster! Comet provides tooling to track, Explain, Manage, and Monitor your models in a single place! It works with Jupyter Notebooks and Scripts and most importantly it's 100% free to get started!\n",
+    "\n",
+    "[TRL](https://github.com/huggingface/trl) is a cutting-edge library designed for post-training foundation models using advanced techniques like Supervised Fine-Tuning (SFT), Proximal Policy Optimization (PPO), and Direct Preference Optimization (DPO).\n",
+    "\n",
+    "Instrument your runs with Comet to start managing experiments, create dataset versions and track hyperparameters for faster and easier reproducibility and collaboration.\n",
+    "\n",
+    "[Find more information about our integration with TRL](https://www.comet.ml/docs/v2/integrations/ml-frameworks/trl/)\n",
+    "\n",
+    "Get a preview for what's to come. Check out a completed experiment created from this notebook [here](TODO).\n",
+    "\n",
+    "This example is based on the [following Ray Train Lightning example](https://docs.ray.io/en/latest/train/getting-started-pytorch-lightning.html)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ZYchV5RWwdv5"
+   },
+   "source": [
+    "# Install Dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "DJnmqphuY2eI"
+   },
+   "outputs": [],
+   "source": [
+    "%pip install \"comet_ml>=3.47.1\" \"trl>=0.13.0\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "crOcPHobwhGL"
+   },
+   "source": [
+    "# Initialize Comet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "HNQRM0U3caiY"
+   },
+   "outputs": [],
+   "source": [
+    "import comet_ml\n",
+    "\n",
+    "comet_ml.login()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "cgqwGSwtzVWD"
+   },
+   "source": [
+    "# Import Dependencies"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "e-5rRYaUw5AF"
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from datasets import load_dataset\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "\n",
+    "from trl import (\n",
+    "    DPOConfig,\n",
+    "    DPOTrainer,\n",
+    "    ModelConfig,\n",
+    "    ScriptArguments,\n",
+    "    TrlParser,\n",
+    "    get_kbit_device_map,\n",
+    "    get_peft_config,\n",
+    "    get_quantization_config,\n",
+    ")\n",
+    "from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "TJuThf1TxP_G"
+   },
+   "source": [
+    "# Load your dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = load_dataset(\"trl-lib/ultrafeedback_binarized\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Train the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = AutoModelForCausalLM.from_pretrained(\n",
+    "    \"Qwen/Qwen2-0.5B-Instruct\",\n",
+    ")\n",
+    "ref_model = AutoModelForCausalLM.from_pretrained(\n",
+    "    \"Qwen/Qwen2-0.5B-Instruct\",\n",
+    ")\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\n",
+    "    \"Qwen/Qwen2-0.5B-Instruct\",\n",
+    ")\n",
+    "if tokenizer.pad_token is None:\n",
+    "    tokenizer.pad_token = tokenizer.eos_token\n",
+    "if tokenizer.chat_template is None:\n",
+    "    tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE\n",
+    "\n",
+    "training_args = DPOConfig(\n",
+    "    output_dir=\"/tmp\",\n",
+    "    learning_rate=5.0e-7,\n",
+    "    max_steps=10,\n",
+    "    per_device_train_batch_size=1,\n",
+    "    gradient_accumulation_steps=8,\n",
+    "    logging_steps=1,\n",
+    "    eval_strategy=\"steps\",\n",
+    "    eval_steps=5,\n",
+    "    report_to=[\"comet_ml\"],\n",
+    ")\n",
+    "trainer = DPOTrainer(\n",
+    "    model,\n",
+    "    ref_model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=dataset[\"train\"],\n",
+    "    eval_dataset=dataset[\"test\"],\n",
+    "    processing_class=tokenizer,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "comet_ml.end()"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}