[Bump Minor Version] Release: Merge staging to main

Judgment Release Bot · Judgment Release Bot · commit fb9c64b0b619 · 2025-10-16T03:35:07.000Z
diff --git a/README.md b/README.md
@@ -36,8 +36,7 @@ Judgeval's agent monitoring infra provides a simple harness for integrating GRPO
 await trainer.train(
     agent_function=your_agent_function,  # entry point to your agent
     scorers=[RewardScorer()],  # Custom scorer you define based on task criteria, acts as reward
-    prompts=training_prompts,  # Tasks
-    rft_provider="fireworks"
+    prompts=training_prompts  # Tasks
 )
 ```
 
diff --git a/src/judgeval/trainer/__init__.py b/src/judgeval/trainer/__init__.py
@@ -1,5 +1,14 @@
 from judgeval.trainer.trainer import JudgmentTrainer
 from judgeval.trainer.config import TrainerConfig, ModelConfig
 from judgeval.trainer.trainable_model import TrainableModel
+from judgeval.trainer.base_trainer import BaseTrainer
+from judgeval.trainer.fireworks_trainer import FireworksTrainer
 
-__all__ = ["JudgmentTrainer", "TrainerConfig", "ModelConfig", "TrainableModel"]
+__all__ = [
+    "JudgmentTrainer",
+    "TrainerConfig",
+    "ModelConfig",
+    "TrainableModel",
+    "BaseTrainer",
+    "FireworksTrainer",
+]
diff --git a/src/judgeval/trainer/base_trainer.py b/src/judgeval/trainer/base_trainer.py
@@ -0,0 +1,117 @@
+from abc import ABC, abstractmethod
+from typing import Any, Callable, List, Optional, Union, Dict, TYPE_CHECKING
+from .config import TrainerConfig, ModelConfig
+from judgeval.scorers import ExampleScorer, ExampleAPIScorerConfig
+
+if TYPE_CHECKING:
+    from judgeval.tracer import Tracer
+    from .trainable_model import TrainableModel
+
+
+class BaseTrainer(ABC):
+    """
+    Abstract base class for training providers.
+
+    This class defines the interface that all training provider implementations
+    must follow. Each provider (Fireworks, Verifiers, etc.) will have its own
+    concrete implementation of this interface.
+    """
+
+    def __init__(
+        self,
+        config: TrainerConfig,
+        trainable_model: "TrainableModel",
+        tracer: "Tracer",
+        project_name: Optional[str] = None,
+    ):
+        """
+        Initialize the base trainer.
+
+        Args:
+            config: TrainerConfig instance with training parameters
+            trainable_model: TrainableModel instance to use for training
+            tracer: Tracer for observability
+            project_name: Project name for organizing training runs
+        """
+        self.config = config
+        self.trainable_model = trainable_model
+        self.tracer = tracer
+        self.project_name = project_name or "judgment_training"
+
+    @abstractmethod
+    async def generate_rollouts_and_rewards(
+        self,
+        agent_function: Callable[[Any], Any],
+        scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
+        prompts: List[Any],
+        num_prompts_per_step: Optional[int] = None,
+        num_generations_per_prompt: Optional[int] = None,
+        concurrency: Optional[int] = None,
+    ) -> Any:
+        """
+        Generate rollouts and compute rewards using the current model snapshot.
+
+        Args:
+            agent_function: Function/agent to call for generating responses
+            scorers: List of scorer objects to evaluate responses
+            prompts: List of prompts to use for training
+            num_prompts_per_step: Number of prompts to use per step
+            num_generations_per_prompt: Generations per prompt
+            concurrency: Concurrency limit
+
+        Returns:
+            Provider-specific dataset format for training
+        """
+        pass
+
+    @abstractmethod
+    async def run_reinforcement_learning(
+        self,
+        agent_function: Callable[[Any], Any],
+        scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
+        prompts: List[Any],
+    ) -> ModelConfig:
+        """
+        Run the iterative reinforcement learning fine-tuning loop.
+
+        Args:
+            agent_function: Function/agent to call for generating responses
+            scorers: List of scorer objects to evaluate responses
+            prompts: List of prompts to use for training
+
+        Returns:
+            ModelConfig: Configuration of the trained model
+        """
+        pass
+
+    @abstractmethod
+    async def train(
+        self,
+        agent_function: Callable[[Any], Any],
+        scorers: List[Union[ExampleAPIScorerConfig, ExampleScorer]],
+        prompts: List[Any],
+    ) -> ModelConfig:
+        """
+        Start the reinforcement learning fine-tuning process.
+
+        This is the main entry point for running the training.
+
+        Args:
+            agent_function: Function/agent to call for generating responses
+            scorers: List of scorer objects to evaluate responses
+            prompts: List of prompts to use for training
+
+        Returns:
+            ModelConfig: Configuration of the trained model
+        """
+        pass
+
+    @abstractmethod
+    def _extract_message_history_from_spans(self) -> List[Dict[str, str]]:
+        """
+        Extract message history from spans for training purposes.
+
+        Returns:
+            List of message dictionaries with 'role' and 'content' keys
+        """
+        pass
diff --git a/src/judgeval/trainer/config.py b/src/judgeval/trainer/config.py
@@ -16,7 +16,7 @@ class TrainerConfig:
     user_id: str
     model_id: str
     base_model_name: str = "qwen2p5-7b-instruct"
-    rft_provider: str = "fireworks"
+    rft_provider: str = "fireworks"  # Supported: "fireworks", "verifiers" (future)
     num_steps: int = 5
     num_generations_per_prompt: int = 4
     num_prompts_per_step: int = 4
diff --git a/src/judgeval/trainer/fireworks_trainer.py b/src/judgeval/trainer/fireworks_trainer.py
diff --git a/src/judgeval/trainer/trainer.py b/src/judgeval/trainer/trainer.py

Original file line number	Diff line number	Diff line change
`@@ -36,8 +36,7 @@ Judgeval's agent monitoring infra provides a simple harness for integrating GRPO`
`36`	`36`	`await trainer.train(`
`37`	`37`	`agent_function=your_agent_function, # entry point to your agent`
`38`	`38`	`scorers=[RewardScorer()], # Custom scorer you define based on task criteria, acts as reward`
`39`		`- prompts=training_prompts, # Tasks`
`40`		`- rft_provider="fireworks"`
	`39`	`+ prompts=training_prompts # Tasks`
`41`	`40`	`)`
`42`	`41`	```
`43`	`42`