deepsense-ai · mackurzawa · Sep 12, 2025 · Sep 12, 2025 · Sep 16, 2025 · Sep 16, 2025
diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml
@@ -19,43 +19,19 @@ jobs:
           ref: develop
           fetch-depth: 0
 
-      - name: Check if nightly build needed
-        id: check
-        run: |
-          # Get the latest commit hash on develop
-          COMMIT_HASH=$(git rev-parse --short HEAD)
-          echo "commit-hash=$COMMIT_HASH" >> "$GITHUB_OUTPUT"
-
-          # Check if we already built this commit as nightly
-          LAST_NIGHTLY_TAG=$(git tag -l "*dev*" --sort=-version:refname | head -1)
-          if [ -n "$LAST_NIGHTLY_TAG" ]; then
-            # Get the commit that the last nightly tag points to
-            LAST_NIGHTLY_COMMIT=$(git rev-list -n 1 $LAST_NIGHTLY_TAG)
-            CURRENT_COMMIT=$(git rev-parse HEAD)
-            if [ "$CURRENT_COMMIT" = "$LAST_NIGHTLY_COMMIT" ]; then
-              echo "should-build=false" >> "$GITHUB_OUTPUT"
-              echo "No new commits since last nightly build"
-              exit 0
-            fi
-          fi
+      - name: Install uv
+        uses: astral-sh/setup-uv@v2
+        with:
+          version: ${{ vars.UV_VERSION || '0.6.9' }}
 
-          # Generate nightly version
-          BASE_VERSION=$(python -c "
-          try:
-              import tomllib
-          except ImportError:
-              import tomli as tomllib
-          with open('packages/ragbits/pyproject.toml', 'rb') as f:
-              data = tomllib.load(f)
-          print(data['project']['version'])
-          ")
-          # Use timestamp for unique nightly version (PEP 440 compliant)
-          TIMESTAMP=$(date +%Y%m%d%H%M)
-          NIGHTLY_VERSION="${BASE_VERSION}.dev${TIMESTAMP}"
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
 
-          echo "should-build=true" >> "$GITHUB_OUTPUT"
-          echo "nightly-version=$NIGHTLY_VERSION" >> "$GITHUB_OUTPUT"
-          echo "Will build nightly version: $NIGHTLY_VERSION"
+      - name: Check if nightly build needed
+        id: check
+        run: uv run scripts/check_nightly_build.py
 
   build-and-publish:
     needs: check-for-changes
@@ -100,6 +76,7 @@ jobs:
           git commit -m "chore: update package versions for nightly build ${{ env.NIGHTLY_VERSION }}"
           git tag "${{ env.NIGHTLY_VERSION }}"
           git push origin "${{ env.NIGHTLY_VERSION }}"
+          git push origin develop
         env:
           GH_TOKEN: ${{ secrets.GH_TOKEN }}
           NIGHTLY_VERSION: ${{ needs.check-for-changes.outputs.nightly-version }}
@@ -114,7 +91,11 @@ jobs:
 
       - name: Deploy nightly documentation
         shell: bash
-        run: uv run mike deploy --push nightly
+        run: |
+            git config user.name "ds-ragbits-robot"
+            git config user.email "ds-ragbits-robot@users.noreply.github.com"
+            git fetch origin gh-pages
+            uv run mike deploy --push --alias-type copy nightly
         env:
           GH_TOKEN: ${{ secrets.GH_TOKEN }}
 

diff --git a/.github/workflows/publish-docs.yaml b/.github/workflows/publish-docs.yaml
@@ -15,6 +15,9 @@ jobs:
       contents: write
     steps:
       - uses: actions/checkout@v4
+        with:
+          ref: gh-pages
+          fetch-depth: 1
 
       - name: Deploy docs
         shell: bash

diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
@@ -56,6 +56,7 @@ jobs:
 
       - name: Deploy documentation
         run: |
-          uv run mike deploy --push stable
+          git fetch origin gh-pages
+          uv run mike deploy --push --alias-type copy stable
         env:
           GH_TOKEN: ${{ secrets.GH_TOKEN }}
diff --git a/docs/api_reference/agents/index.md b/docs/api_reference/agents/index.md
@@ -9,3 +9,9 @@
 ::: ragbits.agents.AgentResultStreaming
 
 ::: ragbits.agents.a2a.server.create_agent_server
+
+::: ragbits.agents.post_processors.base
+
+::: ragbits.agents.post_processors.supervisor
+
+::: ragbits.agents.AgentRunContext
diff --git a/docs/how-to/agents/define_and_use_agents.md b/docs/how-to/agents/define_and_use_agents.md
@@ -32,7 +32,7 @@ Use a structured prompt to instruct the LLM. For details on writing prompts with
 from pydantic import BaseModel
 from ragbits.core.prompt import Prompt
 
---8<-- "examples/agents/tool_use.py:51:70"
+--8<-- "examples/agents/tool_use.py:51:72"
 ```
 
 ### Run the agent
@@ -49,6 +49,33 @@ The result is an [AgentResult][ragbits.agents.AgentResult], which includes the m
 
 You can find the complete code example in the Ragbits repository [here](https://github.com/deepsense-ai/ragbits/blob/main/examples/agents/tool_use.py).
 
+### Alternative approach: inheritance with `prompt_config`
+
+In addition to explicitly attaching a Prompt instance, Ragbits also supports defining agents through a combination of inheritance and the `@Agent.prompt_config` decorator.
+
+This approach lets you bind input (and optionally output) models directly to your agent class. The agent then derives its prompt structure automatically, without requiring a prompt argument in the constructor.
+
+```python
+from pydantic import BaseModel
+from ragbits.agents import Agent
+
+--8<-- "examples/agents/with_decorator.py:51:71"
+```
+
+The decorator can also accept an output type, allowing you to strongly type both the inputs and outputs of the agent. If you do not explicitly define a `user_prompt`, Ragbits will default to `{{ input }}`.
+
+Once defined, the agent class can be used directly, just like any other subclass of Agent:
+
+```python
+import asyncio
+from ragbits.agents import Agent
+from ragbits.core.llms import LiteLLM
+
+--8<-- "examples/agents/with_decorator.py:73:84"
+```
+
+You can find the complete code example in the Ragbits repository [here](https://github.com/deepsense-ai/ragbits/blob/main/examples/agents/with_decorator.py).
+
 ## Tool choice
 To control what tool is used at first call you could use `tool_choice` parameter. There are the following options:
 - "auto": let model decide if tool call is needed

diff --git a/docs/how-to/agents/stream_downstream_agents.md b/docs/how-to/agents/stream_downstream_agents.md
@@ -0,0 +1,48 @@
+# How-To: Stream downstream agents with Ragbits
+
+Ragbits [Agent][ragbits.agents.Agent] can call other agents as tools, creating a chain of reasoning where downstream agents provide structured results to the parent agent.
+
+Using the streaming API, you can observe every chunk of output as it is generated, including tool calls, tool results, and final text - perfect for real-time monitoring or chat interfaces.
+
+## Define a simple tool
+
+A tool is just a Python function returning a JSON-serializable result. Here’s an example tool returning the current time for a given location:
+
+```python
+import json
+
+--8<-- "examples/agents/downstream_agents_streaming.py:33:51"
+```
+
+## Create a downstream agent
+
+The downstream agent wraps the tool with a prompt, allowing the LLM to use it as a function.
+
+```python
+from pydantic import BaseModel
+from ragbits.core.prompt import Prompt
+from ragbits.agents import Agent
+from ragbits.agents._main import AgentOptions
+from ragbits.core.llms import LiteLLM
+
+--8<-- "examples/agents/downstream_agents_streaming.py:54:82"
+```
+
+## Create a parent QA agent
+
+The parent agent can call downstream agents as tools. This lets the LLM reason and decide when to invoke the downstream agent.
+
+```python
+--8<-- "examples/agents/downstream_agents_streaming.py:85:111"
+```
+
+## Streaming output from downstream agents
+
+Use `run_streaming` with an [AgentRunContext][ragbits.agents.AgentRunContext] to see output as it happens. Each chunk contains either text, a tool call, or a tool result. You can print agent names when they change and handle downstream agent events.
+
+```python
+import asyncio
+from ragbits.agents import DownstreamAgentResult
+
+--8<-- "examples/agents/downstream_agents_streaming.py:114:133"
+```
diff --git a/docs/how-to/agents/use_post_processors.md b/docs/how-to/agents/use_post_processors.md
@@ -0,0 +1,167 @@
+# How-To: Use Post-Processors with Ragbits Agents
+
+Ragbits Agents can be enhanced with post-processors to intercept, validate, log, filter, and modify their outputs. In this guide you will learn how to:
+
+- Create custom post-processors (streaming and non-streaming)
+- Attach post-processors to agents in run and streaming modes
+- Use and configure the built-in Supervisor post-processor
+
+## Post-Processors Overview
+
+Ragbits provides two types of post-processors:
+
+- **PostProcessor**: Processes the final output after generation, ideal for end-of-run processing.
+- **StreamingPostProcessor**: Processes outputs as they are generated, suitable for real-time applications.
+
+### Implementing a custom Post-Processor
+
+To create a custom post-processor, inherit from the appropriate base class ([`PostProcessor`][ragbits.agents.post_processors.base.PostProcessor] or [`StreamingPostProcessor`][ragbits.agents.post_processors.base.StreamingPostProcessor]) and implement the required method.
+
+#### Post-Processor Example
+
+A non-streaming post-processor applies transformations after the entire content is generated.
+
+```python
+class TruncateProcessor(PostProcessor):
+    def __init__(self, max_length: int = 50) -> None:
+        self.max_length = max_length
+
+    async def process(self, result, agent, options=None, context=None):
+        content = result.content
+        if len(content) > self.max_length:
+            content = content[:self.max_length] + "... [TRUNCATED]"
+        result.content = content
+        return result
+```
+
+#### Streaming Post-Processor Example
+
+A streaming post-processor can manipulate all information returned during generation, including text, tool calls, etc.
+
+```python
+class UpperCaseStreamingProcessor(StreamingPostProcessor):
+    async def process_streaming(self, chunk, agent):
+        if isinstance(chunk, str):
+            return chunk.upper()
+        return chunk
+```
+
+## Using Post-Processors
+
+To use post-processors, pass them to the `run` or `run_streaming` methods of the `Agent` class. If you pass a non-streaming processor to `run_streaming`, set `allow_non_streaming=True`. This allows streaming processors to handle content piece by piece during generation, while non-streaming processors apply transformations after the entire output is generated.
+
+```python
+async def main() -> None:
+    llm = LiteLLM("gpt-4.1-mini")
+    agent = Agent(llm=llm, prompt="You are a helpful assistant.")
+    post_processors = [
+        UpperCaseStreamingProcessor(),
+        TruncateProcessor(max_length=50),
+    ]
+    stream_result = agent.run_streaming(
+        "Tell me about the history of AI.",
+        post_processors=post_processors,
+        allow_non_streaming=True
+    )
+    async for chunk in stream_result:
+        if isinstance(chunk, str):
+            print(chunk, end="")
+    print(f"\nFinal answer:\n{stream_result.content}")
+```
+
+Post-processors offer a flexible way to tailor agent outputs, whether filtering content in real-time or transforming final outputs.
+
+## Built-in Post-Processors
+
+### Supervisor
+
+The [`SupervisorPostProcessor`][ragbits.agents.post_processors.supervisor.SupervisorPostProcessor] validates the agent’s final response against the executed tool calls and, if needed, triggers an automatic rerun with a correction prompt. It helps catch inconsistencies (e.g., when the response contradicts tool output) and guide the agent to refine its answer. The Supervisor is a non-streaming post-processor: it runs after generation has completed, validating the final output before optionally issuing a correction rerun.
+
+Key capabilities:
+
+- Validates the last assistant response using an LLM-powered validation prompt
+- Optionally reruns the agent with a formatted correction prompt derived from validation feedback
+- Supports preserving or pruning intermediate history
+- Attaches validation metadata to the final `AgentResult`
+
+#### Quick start
+
+```python
+from ragbits.agents import Agent
+from ragbits.agents.post_processors import SupervisorPostProcessor
+from ragbits.agents.post_processors.supervisor import HistoryStrategy
+from ragbits.core.llms.litellm import LiteLLM
+
+llm = LiteLLM("gpt-4o-mini", use_structured_output=True)
+supervisor = SupervisorPostProcessor(
+    llm=llm,
+    max_retries=2,
+    fail_on_exceed=False,
+    history_strategy=HistoryStrategy.PRESERVE,  # Default HistoryStrategy is REMOVE
+)
+
+agent = Agent(
+    llm=llm,
+    prompt="You are a helpful assistant.",
+)
+
+result = await agent.run(
+    "What is the weather in Tokyo?",
+    post_processors=[supervisor],
+)
+```
+
+#### Configuration
+
+- **llm**: LLM used for validation and formatting structured outputs
+- **validation_prompt**: Optional custom prompt class describing the validation output schema
+- **correction_prompt**: Optional format string used to create a correction message from validation output
+- **max_retries**: How many times to attempt correction-driven reruns
+- **fail_on_exceed**: If `True`, raises when retries are exhausted; otherwise returns last result with metadata
+- **history_strategy**:
+    - `PRESERVE`: keep all messages, including the correction user message and rerun assistant message
+    - `REMOVE`: prune the invalid assistant message and the correction user message, keeping the final assistant response succinctly
+
+#### Custom structured validation and correction
+
+You can define a custom validation output model and prompt to shape the supervisor feedback and correction message:
+
+```python
+from pydantic import BaseModel
+from ragbits.core.prompt.prompt import Prompt
+from ragbits.agents.post_processors.supervisor import ValidationInput
+
+class MyValidationOutput(BaseModel):
+    is_valid: bool
+    errors: list[str]
+    fixes: list[str]
+    confidence: float
+
+class MyValidationPrompt(Prompt[ValidationInput, MyValidationOutput]):
+    system_prompt = "You are an expert validator. Provide clear, actionable feedback."
+    user_prompt = (
+        "Chat History:\n"
+        "{% for message in chat_history %}"
+        "\n{{ message.role | title }}: {{ message.content }} (if None it means it's a tool call)"
+        "{% endfor %}"
+        "\n\nList all errors, possible fixes, and provide a confidence score (0.0-1.0) for your assessment.\n"
+    )
+
+correction_prompt = (
+    "Previous answer had issues:\n"
+    "Errors: {errors}\n"
+    "Fixes: {fixes}\n"
+    "Confidence: {confidence}\n"
+    "Please answer again using the fixes."
+)
+
+supervisor = SupervisorPostProcessor(
+    llm=llm,
+    validation_prompt=MyValidationPrompt,
+    correction_prompt=correction_prompt,
+    max_retries=1,
+    history_strategy=HistoryStrategy.PRESERVE,
+)
+```
+
+The Supervisor appends validation records to `result.metadata` under the `post_processors.supervisor` key as a list of dicts; each entry corresponds to a validation step.
diff --git a/docs/tutorials/intro.md b/docs/tutorials/intro.md
@@ -2,7 +2,7 @@
 
 Let's walk through a quick example of **basic question answering**. Specifically, let's build **a system for answering tech questions**, e.g. about Linux or iPhone apps.
 
-Install the latest Ragbits via `pip install -U ragbits` and follow along.
+Install the latest Ragbits via `pip install -U ragbits ragbits-agents` and follow along.
 
 ## Configuring the environment