From fe63b9783824e821d4e135483b6d9c46a0799121 Mon Sep 17 00:00:00 2001 From: DhivyaBharathy-web Date: Wed, 18 Jun 2025 07:40:04 +0000 Subject: [PATCH 1/2] Add DeepSeek_Qwen3_GRPO notebook --- examples/cookbooks/DeepSeek_Qwen3_GRPO.ipynb | 172 +++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 examples/cookbooks/DeepSeek_Qwen3_GRPO.ipynb diff --git a/examples/cookbooks/DeepSeek_Qwen3_GRPO.ipynb b/examples/cookbooks/DeepSeek_Qwen3_GRPO.ipynb new file mode 100644 index 00000000..8630bfbf --- /dev/null +++ b/examples/cookbooks/DeepSeek_Qwen3_GRPO.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "29d21289", + "metadata": { + "id": "29d21289" + }, + "source": [ + "# DeepSeek R1 Qwen3 (8B) - GRPO Agent Demo" + ] + }, + { + "cell_type": "markdown", + "source": [ + "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DhivyaBharathy-web/PraisonAI/blob/main/examples/cookbooks/DeepSeek_Qwen3_GRPO.ipynb)\n" + ], + "metadata": { + "id": "yuOEagMH86WV" + }, + "id": "yuOEagMH86WV" + }, + { + "cell_type": "markdown", + "id": "0f798657", + "metadata": { + "id": "0f798657" + }, + "source": [ + "This notebook demonstrates the usage of DeepSeek's Qwen3-8B model with GRPO (Guided Reasoning Prompt Optimization) for interactive conversational reasoning tasks.\n", + "It is designed to simulate a lightweight agent-style reasoning capability in an accessible and interpretable way." + ] + }, + { + "cell_type": "markdown", + "id": "80f3de9e", + "metadata": { + "id": "80f3de9e" + }, + "source": [ + "## Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d1c7f6c", + "metadata": { + "id": "8d1c7f6c" + }, + "outputs": [], + "source": [ + "!pip install -q transformers accelerate" + ] + }, + { + "cell_type": "markdown", + "id": "78603e7b", + "metadata": { + "id": "78603e7b" + }, + "source": [ + "## Tools" + ] + }, + { + "cell_type": "markdown", + "id": "88e97fbc", + "metadata": { + "id": "88e97fbc" + }, + "source": [ + "- `transformers`: For model loading and interaction\n", + "- `AutoModelForCausalLM`, `AutoTokenizer`: Interfaces for DeepSeek's LLM" + ] + }, + { + "cell_type": "markdown", + "id": "37d9bd54", + "metadata": { + "id": "37d9bd54" + }, + "source": [ + "## YAML Prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "adf5cae5", + "metadata": { + "id": "adf5cae5" + }, + "outputs": [], + "source": [ + "\n", + "prompt:\n", + " task: \"Reasoning over multi-step instructions\"\n", + " context: \"User provides a math problem or logical question.\"\n", + " model: \"deepseek-ai/deepseek-moe-16b-chat\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "6985f60c", + "metadata": { + "id": "6985f60c" + }, + "source": [ + "## Main" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d74bf686", + "metadata": { + "id": "d74bf686" + }, + "outputs": [], + "source": [ + "\n", + "from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline\n", + "\n", + "model_id = \"deepseek-ai/deepseek-moe-16b-chat\"\n", + "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", + "model = AutoModelForCausalLM.from_pretrained(model_id, device_map=\"auto\")\n", + "\n", + "pipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer)\n", + "\n", + "prompt = \"If a train travels 60 miles in 1.5 hours, what is its average speed?\"\n", + "output = pipe(prompt, max_new_tokens=60)[0]['generated_text']\n", + "print(\"🧠 Reasoned Output:\", output)\n" + ] + }, + { + "cell_type": "markdown", + "id": "c856167f", + "metadata": { + "id": "c856167f" + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "41039ee8", + "metadata": { + "id": "41039ee8" + }, + "source": [ + "### 🖼️ Output Summary\n", + "\n", + "Prompt: *\"If a train travels 60 miles in 1.5 hours, what is its average speed?\"*\n", + "\n", + "🧠 Output: The model provides a clear reasoning process, such as:\n", + "\n", + "> \"To find the average speed, divide the total distance by total time: 60 / 1.5 = 40 mph.\"\n", + "\n", + "💡 This shows the model's ability to walk through logical steps using GRPO-enhanced reasoning." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file From f4a637fe1150236aec923ebd82ab93cb4ae4f921 Mon Sep 17 00:00:00 2001 From: DhivyaBharathy <157012713+DhivyaBharathy-web@users.noreply.github.com> Date: Wed, 18 Jun 2025 13:44:42 +0530 Subject: [PATCH 2/2] Update DeepSeek_Qwen3_GRPO.ipynb --- examples/cookbooks/DeepSeek_Qwen3_GRPO.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/cookbooks/DeepSeek_Qwen3_GRPO.ipynb b/examples/cookbooks/DeepSeek_Qwen3_GRPO.ipynb index 8630bfbf..f4da2651 100644 --- a/examples/cookbooks/DeepSeek_Qwen3_GRPO.ipynb +++ b/examples/cookbooks/DeepSeek_Qwen3_GRPO.ipynb @@ -7,7 +7,7 @@ "id": "29d21289" }, "source": [ - "# DeepSeek R1 Qwen3 (8B) - GRPO Agent Demo" + "# DeepSeek R1 Qwen3 (8B) - GRPO Model" ] }, { @@ -169,4 +169,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +}