NVIDIA-NeMo
diff --git a/‎nemoguardrails/server/api.py‎
Lines changed: 34 additions & 134 deletions b/‎nemoguardrails/server/api.py‎
Lines changed: 34 additions & 134 deletions
diff --git a/‎nemoguardrails/server/schemas/openai.py‎
Lines changed: 143 additions & 0 deletions b/‎nemoguardrails/server/schemas/openai.py‎
Lines changed: 143 additions & 0 deletions
@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import asyncio
 import contextvars
 import importlib.util
@@ -27,17 +28,20 @@
 
 from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
-from pydantic import BaseModel, Field, root_validator, validator
+from pydantic import Field, root_validator, validator
 from starlette.responses import StreamingResponse
 from starlette.staticfiles import StaticFiles
 
 from nemoguardrails import LLMRails, RailsConfig, utils
-from nemoguardrails.rails.llm.options import (
-    GenerationLog,
-    GenerationOptions,
-    GenerationResponse,
-)
+from nemoguardrails.rails.llm.options import GenerationOptions, GenerationResponse
 from nemoguardrails.server.datastore.datastore import DataStore
+from nemoguardrails.server.schemas.openai import (
+    Choice,
+    Model,
+    ModelsResponse,
+    OpenAIRequestFields,
+    ResponseBody,
+)
 from nemoguardrails.streaming import StreamingHandler
 
 logging.basicConfig(level=logging.INFO)
@@ -191,7 +195,7 @@ async def root_handler():
 app.single_config_id = None
 
 
-class RequestBody(BaseModel):
+class RequestBody(OpenAIRequestFields):
     config_id: Optional[str] = Field(
         default=os.getenv("DEFAULT_CONFIG_ID", None),
         description="The id of the configuration to be used. If not set, the default configuration will be used.",
@@ -230,47 +234,6 @@ class RequestBody(BaseModel):
         default=None,
         description="A state object that should be used to continue the interaction.",
     )
-    # Standard OpenAI completion parameters
-    model: Optional[str] = Field(
-        default=None,
-        description="The model to use for chat completion. Maps to config_id for backward compatibility.",
-    )
-    max_tokens: Optional[int] = Field(
-        default=None,
-        description="The maximum number of tokens to generate.",
-    )
-    temperature: Optional[float] = Field(
-        default=None,
-        description="Sampling temperature to use.",
-    )
-    top_p: Optional[float] = Field(
-        default=None,
-        description="Top-p sampling parameter.",
-    )
-    stop: Optional[str] = Field(
-        default=None,
-        description="Stop sequences.",
-    )
-    presence_penalty: Optional[float] = Field(
-        default=None,
-        description="Presence penalty parameter.",
-    )
-    frequency_penalty: Optional[float] = Field(
-        default=None,
-        description="Frequency penalty parameter.",
-    )
-    function_call: Optional[dict] = Field(
-        default=None,
-        description="Function call parameter.",
-    )
-    logit_bias: Optional[dict] = Field(
-        default=None,
-        description="Logit bias parameter.",
-    )
-    log_probs: Optional[bool] = Field(
-        default=None,
-        description="Log probabilities parameter.",
-    )
 
     @root_validator(pre=True)
     def ensure_config_id(cls, data: Any) -> Any:
@@ -297,75 +260,6 @@ def ensure_config_ids(cls, v, values):
         return v
 
 
-class Choice(BaseModel):
-    index: Optional[int] = Field(
-        default=None, description="The index of the choice in the list of choices."
-    )
-    messages: Optional[dict] = Field(
-        default=None, description="The message of the choice"
-    )
-    logprobs: Optional[dict] = Field(
-        default=None, description="The log probabilities of the choice"
-    )
-    finish_reason: Optional[str] = Field(
-        default=None, description="The reason the model stopped generating tokens."
-    )
-
-
-class ResponseBody(BaseModel):
-    # OpenAI-compatible fields
-    id: Optional[str] = Field(
-        default=None, description="A unique identifier for the chat completion."
-    )
-    object: str = Field(
-        default="chat.completion",
-        description="The object type, which is always chat.completion",
-    )
-    created: Optional[int] = Field(
-        default=None,
-        description="The Unix timestamp (in seconds) of when the chat completion was created.",
-    )
-    model: Optional[str] = Field(
-        default=None, description="The model used for the chat completion."
-    )
-    choices: Optional[List[Choice]] = Field(
-        default=None, description="A list of chat completion choices."
-    )
-    # NeMo-Guardrails specific fields for backward compatibility
-    state: Optional[dict] = Field(
-        default=None, description="State object for continuing the conversation."
-    )
-    llm_output: Optional[dict] = Field(
-        default=None, description="Additional LLM output data."
-    )
-    output_data: Optional[dict] = Field(
-        default=None, description="Additional output data."
-    )
-    log: Optional[dict] = Field(default=None, description="Generation log data.")
-
-
-class Model(BaseModel):
-    id: str = Field(
-        description="The model identifier, which can be referenced in the API endpoints."
-    )
-    object: str = Field(
-        default="model", description="The object type, which is always 'model'."
-    )
-    created: int = Field(
-        description="The Unix timestamp (in seconds) of when the model was created."
-    )
-    owned_by: str = Field(
-        default="nemo-guardrails", description="The organization that owns the model."
-    )
-
-
-class ModelsResponse(BaseModel):
-    object: str = Field(
-        default="list", description="The object type, which is always 'list'."
-    )
-    data: List[Model] = Field(description="The list of models.")
-
-
 @app.get(
     "/v1/models",
     response_model=ModelsResponse,
@@ -540,7 +434,7 @@ async def chat_completion(body: RequestBody, request: Request):
             choices=[
                 Choice(
                     index=0,
-                    messages={
+                    message={
                         "content": f"Could not load the {config_ids} guardrails configuration. "
                         f"An internal error has occurred.",
                         "role": "assistant",
@@ -573,7 +467,7 @@ async def chat_completion(body: RequestBody, request: Request):
                     choices=[
                         Choice(
                             index=0,
-                            messages={
+                            message={
                                 "content": "The `thread_id` must have a minimum length of 16 characters.",
                                 "role": "assistant",
                             },
@@ -591,19 +485,25 @@ async def chat_completion(body: RequestBody, request: Request):
             # And prepend them.
             messages = thread_messages + messages
 
-            generation_options = body.options
-            if body.max_tokens:
-                generation_options.max_tokens = body.max_tokens
-            if body.temperature is not None:
-                generation_options.temperature = body.temperature
-            if body.top_p is not None:
-                generation_options.top_p = body.top_p
-            if body.stop:
-                generation_options.stop = body.stop
-            if body.presence_penalty is not None:
-                generation_options.presence_penalty = body.presence_penalty
-            if body.frequency_penalty is not None:
-                generation_options.frequency_penalty = body.frequency_penalty
+        generation_options = body.options
+
+        # Initialize llm_params if not already set
+        if generation_options.llm_params is None:
+            generation_options.llm_params = {}
+
+        # Set OpenAI-compatible parameters in llm_params
+        if body.max_tokens:
+            generation_options.llm_params["max_tokens"] = body.max_tokens
+        if body.temperature is not None:
+            generation_options.llm_params["temperature"] = body.temperature
+        if body.top_p is not None:
+            generation_options.llm_params["top_p"] = body.top_p
+        if body.stop:
+            generation_options.llm_params["stop"] = body.stop
+        if body.presence_penalty is not None:
+            generation_options.llm_params["presence_penalty"] = body.presence_penalty
+        if body.frequency_penalty is not None:
+            generation_options.llm_params["frequency_penalty"] = body.frequency_penalty
 
         if (
             body.stream
@@ -654,7 +554,7 @@ async def chat_completion(body: RequestBody, request: Request):
                 "choices": [
                     Choice(
                         index=0,
-                        messages=bot_message,
+                        message=bot_message,
                         finish_reason="stop",
                         logprobs=None,
                     )
@@ -680,7 +580,7 @@ async def chat_completion(body: RequestBody, request: Request):
             choices=[
                 Choice(
                     index=0,
-                    messages={
+                    message={
                         "content": "Internal server error",
                         "role": "assistant",
                     },
 
@@ -0,0 +1,143 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""OpenAI API schema definitions for the NeMo Guardrails server."""
+
+from typing import List, Optional, Union
+
+from pydantic import BaseModel, Field
+
+
+class OpenAIRequestFields(BaseModel):
+    """OpenAI API request fields that can be mixed into other request schemas."""
+
+    # Standard OpenAI completion parameters
+    model: Optional[str] = Field(
+        default=None,
+        description="The model to use for chat completion. Maps to config_id for backward compatibility.",
+    )
+    max_tokens: Optional[int] = Field(
+        default=None,
+        description="The maximum number of tokens to generate.",
+    )
+    temperature: Optional[float] = Field(
+        default=None,
+        description="Sampling temperature to use.",
+    )
+    top_p: Optional[float] = Field(
+        default=None,
+        description="Top-p sampling parameter.",
+    )
+    stop: Optional[Union[str, List[str]]] = Field(
+        default=None,
+        description="Stop sequences.",
+    )
+    presence_penalty: Optional[float] = Field(
+        default=None,
+        description="Presence penalty parameter.",
+    )
+    frequency_penalty: Optional[float] = Field(
+        default=None,
+        description="Frequency penalty parameter.",
+    )
+    function_call: Optional[dict] = Field(
+        default=None,
+        description="Function call parameter.",
+    )
+    logit_bias: Optional[dict] = Field(
+        default=None,
+        description="Logit bias parameter.",
+    )
+    log_probs: Optional[bool] = Field(
+        default=None,
+        description="Log probabilities parameter.",
+    )
+
+
+class Choice(BaseModel):
+    """OpenAI API choice structure in chat completion responses."""
+
+    index: Optional[int] = Field(
+        default=None, description="The index of the choice in the list of choices."
+    )
+    message: Optional[dict] = Field(
+        default=None, description="The message of the choice"
+    )
+    logprobs: Optional[dict] = Field(
+        default=None, description="The log probabilities of the choice"
+    )
+    finish_reason: Optional[str] = Field(
+        default=None, description="The reason the model stopped generating tokens."
+    )
+
+
+class ResponseBody(BaseModel):
+    """OpenAI API response body with NeMo-Guardrails extensions."""
+
+    # OpenAI API fields
+    id: Optional[str] = Field(
+        default=None, description="A unique identifier for the chat completion."
+    )
+    object: str = Field(
+        default="chat.completion",
+        description="The object type, which is always chat.completion",
+    )
+    created: Optional[int] = Field(
+        default=None,
+        description="The Unix timestamp (in seconds) of when the chat completion was created.",
+    )
+    model: Optional[str] = Field(
+        default=None, description="The model used for the chat completion."
+    )
+    choices: Optional[List[Choice]] = Field(
+        default=None, description="A list of chat completion choices."
+    )
+    # NeMo-Guardrails specific fields for backward compatibility
+    state: Optional[dict] = Field(
+        default=None, description="State object for continuing the conversation."
+    )
+    llm_output: Optional[dict] = Field(
+        default=None, description="Additional LLM output data."
+    )
+    output_data: Optional[dict] = Field(
+        default=None, description="Additional output data."
+    )
+    log: Optional[dict] = Field(default=None, description="Generation log data.")
+
+
+class Model(BaseModel):
+    """OpenAI API model representation."""
+
+    id: str = Field(
+        description="The model identifier, which can be referenced in the API endpoints."
+    )
+    object: str = Field(
+        default="model", description="The object type, which is always 'model'."
+    )
+    created: int = Field(
+        description="The Unix timestamp (in seconds) of when the model was created."
+    )
+    owned_by: str = Field(
+        default="nemo-guardrails", description="The organization that owns the model."
+    )
+
+
+class ModelsResponse(BaseModel):
+    """OpenAI API models list response."""
+
+    object: str = Field(
+        default="list", description="The object type, which is always 'list'."
+    )
+    data: List[Model] = Field(description="The list of models.")