diff --git a/.env.example b/.env.example
index 7d6b3012..74aa4d05 100644
--- a/.env.example
+++ b/.env.example
@@ -26,6 +26,42 @@ DIAL_API_KEY=your_dial_api_key_here
# DIAL_API_HOST=https://core.dialx.ai # Optional: Base URL without /openai suffix (auto-appended)
# DIAL_API_VERSION=2025-01-01-preview # Optional: API version header for DIAL requests
+# Azure OpenAI (uses Responses API - works with GPT-5 and GPT-5-Codex)
+# ==============================================================================
+# IMPORTANT: This integration uses the Azure OpenAI Responses API exclusively
+# Works with both GPT-5 (general purpose) and GPT-5-Codex (code-specialized) models
+#
+# How to obtain credentials:
+# 1. Log in to Azure Portal (https://portal.azure.com)
+# 2. Navigate to your Azure OpenAI resource
+# 3. Go to "Keys and Endpoint" section
+# 4. Copy the API Key and Endpoint URL
+# 5. Note your deployment name (defined when you created the deployment)
+#
+# All 4 variables below are REQUIRED for Azure OpenAI to work:
+AZURE_OPENAI_API_KEY=your_azure_openai_key_here
+AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+AZURE_OPENAI_API_VERSION=2025-04-01-preview
+AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex
+#
+# Configuration Notes:
+# - API Key: Found in Azure Portal > Azure OpenAI > Keys and Endpoint
+# - Endpoint: Must include https:// and trailing slash (e.g., https://your-resource.openai.azure.com/)
+# - API Version: Must be 2025-03-01-preview or later for Responses API support
+# - Deployment Name: Your custom deployment name from Azure Portal (e.g., gpt-5, gpt-5-codex, or any custom name)
+#
+# Model Information:
+# - Deployment name is user-defined in Azure Portal when creating the deployment
+# - Common deployment names: gpt-5, gpt-5-codex, or custom names you choose
+# - GPT-5: General purpose model (400K context, 128K output, reasoning tokens)
+# - GPT-5-Codex: Code-specialized variant with enhanced programming capabilities
+#
+# Important Constraints:
+# - Temperature is fixed at 1.0 for all Azure OpenAI models (cannot be changed)
+# - Do NOT use placeholder values like "your_azure_openai_key_here" - they will fail
+# - All 4 variables must be set with real values from your Azure deployment
+# ==============================================================================
+
# Option 2: Use OpenRouter for access to multiple models through one API
# Get your OpenRouter API key from: https://openrouter.ai/
# If using OpenRouter, comment out the native API keys above
diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml
new file mode 100644
index 00000000..31c04fdf
--- /dev/null
+++ b/.github/workflows/claude-code-review.yml
@@ -0,0 +1,57 @@
+name: Claude Code Review
+
+on:
+ pull_request:
+ types: [opened, synchronize]
+ # Optional: Only run on specific file changes
+ # paths:
+ # - "src/**/*.ts"
+ # - "src/**/*.tsx"
+ # - "src/**/*.js"
+ # - "src/**/*.jsx"
+
+jobs:
+ claude-review:
+ # Optional: Filter by PR author
+ # if: |
+ # github.event.pull_request.user.login == 'external-contributor' ||
+ # github.event.pull_request.user.login == 'new-developer' ||
+ # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
+
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ pull-requests: read
+ issues: read
+ id-token: write
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Run Claude Code Review
+ id: claude-review
+ uses: anthropics/claude-code-action@v1
+ with:
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+ prompt: |
+ REPO: ${{ github.repository }}
+ PR NUMBER: ${{ github.event.pull_request.number }}
+
+ Please review this pull request and provide feedback on:
+ - Code quality and best practices
+ - Potential bugs or issues
+ - Performance considerations
+ - Security concerns
+ - Test coverage
+
+ Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback.
+
+ Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR.
+
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+ # or https://docs.claude.com/en/docs/claude-code/sdk#command-line for available options
+ claude_args: '--allowed-tools "Bash(gh issue view:*),Bash(gh search:*),Bash(gh issue list:*),Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr list:*)"'
+
diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
new file mode 100644
index 00000000..b1a3201d
--- /dev/null
+++ b/.github/workflows/claude.yml
@@ -0,0 +1,50 @@
+name: Claude Code
+
+on:
+ issue_comment:
+ types: [created]
+ pull_request_review_comment:
+ types: [created]
+ issues:
+ types: [opened, assigned]
+ pull_request_review:
+ types: [submitted]
+
+jobs:
+ claude:
+ if: |
+ (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+ (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
+ (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
+ (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ pull-requests: read
+ issues: read
+ id-token: write
+ actions: read # Required for Claude to read CI results on PRs
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Run Claude Code
+ id: claude
+ uses: anthropics/claude-code-action@v1
+ with:
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+
+ # This is an optional setting that allows Claude to read CI results on PRs
+ additional_permissions: |
+ actions: read
+
+ # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
+ # prompt: 'Update the pull request description to include a summary of changes.'
+
+ # Optional: Add claude_args to customize behavior and configuration
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+ # or https://docs.claude.com/en/docs/claude-code/sdk#command-line for available options
+ # claude_args: '--model claude-opus-4-1-20250805 --allowed-tools Bash(gh pr:*)'
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20498392..bafbce43 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
+## v1.0.0 (2025-10-04)
+
+- Initial Release
+
## v5.21.0 (2025-10-03)
### Chores
diff --git a/README.md b/README.md
index 8f6b1318..5b5805a0 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
[zen_web.webm](https://github.com/user-attachments/assets/851e3911-7f06-47c0-a4ab-a2601236697c)
-
🤖 Claude Code OR Gemini CLI OR Codex CLI + [Gemini / OpenAI / Grok / OpenRouter / DIAL / Ollama / Anthropic / Any Model] = Your Ultimate AI Development Team
+
🤖 Claude Code OR Gemini CLI OR Codex CLI + [Gemini / OpenAI / Azure OpenAI / Grok / OpenRouter / DIAL / Ollama / Anthropic / Any Model] = Your Ultimate AI Development Team
@@ -85,6 +85,7 @@ For best results, use Claude Code with:
- **[OpenRouter](https://openrouter.ai/)** - Access multiple models with one API
- **[Gemini](https://makersuite.google.com/app/apikey)** - Google's latest models
- **[OpenAI](https://platform.openai.com/api-keys)** - O3, GPT-5 series
+- **[Azure OpenAI](https://portal.azure.com/)** - GPT-5, GPT-5-Codex via Responses API
- **[X.AI](https://console.x.ai/)** - Grok models
- **[DIAL](https://dialx.ai/)** - Vendor-agnostic model access
- **[Ollama](https://ollama.ai/)** - Local models (free)
@@ -247,8 +248,8 @@ DISABLED_TOOLS=
- **[Context revival](docs/context-revival.md)** - Continue conversations even after context resets
**Model Support**
-- **Multiple providers** - Gemini, OpenAI, X.AI, OpenRouter, DIAL, Ollama
-- **Latest models** - GPT-5, Gemini 2.5 Pro, O3, Grok-4, local Llama
+- **Multiple providers** - Gemini, OpenAI, Azure OpenAI, X.AI, OpenRouter, DIAL, Ollama
+- **Latest models** - GPT-5, GPT-5-Codex, Gemini 2.5 Pro, O3, Grok-4, local Llama
- **[Thinking modes](docs/advanced-usage.md#thinking-modes)** - Control reasoning depth vs cost
- **Vision support** - Analyze images, diagrams, screenshots
@@ -307,6 +308,7 @@ Built with the power of **Multi-Model AI** collaboration 🤝
- [Claude Code](https://claude.ai/code) - Your AI coding orchestrator
- [Gemini 2.5 Pro & Flash](https://ai.google.dev/) - Extended thinking & fast analysis
- [OpenAI O3 & GPT-5](https://openai.com/) - Strong reasoning & latest capabilities
+- [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service) - Enterprise GPT-5 & GPT-5-Codex via Responses API
### Star History
diff --git a/config.py b/config.py
index 1e5a2b1c..1e3db855 100644
--- a/config.py
+++ b/config.py
@@ -14,9 +14,9 @@
# These values are used in server responses and for tracking releases
# IMPORTANT: This is the single source of truth for version and author info
# Semantic versioning: MAJOR.MINOR.PATCH
-__version__ = "5.21.0"
+__version__ = "1.0.0"
# Last update date in ISO format
-__updated__ = "2025-10-03"
+__updated__ = "2025-10-04"
# Primary maintainer
__author__ = "Fahad Gilani"
diff --git a/docs/AZURE_OPENAI_TROUBLESHOOTING.md b/docs/AZURE_OPENAI_TROUBLESHOOTING.md
new file mode 100644
index 00000000..212dec5e
--- /dev/null
+++ b/docs/AZURE_OPENAI_TROUBLESHOOTING.md
@@ -0,0 +1,653 @@
+# Azure OpenAI Troubleshooting Guide
+
+This guide provides comprehensive troubleshooting information for Azure OpenAI integration with Zen MCP Server.
+
+## Implementation Overview
+
+**IMPORTANT:** This implementation uses Azure OpenAI **Responses API** exclusively.
+
+- Works with both **GPT-5** and **GPT-5-Codex** models
+- Uses Responses API (not Chat Completions API) as required by GPT-5-Codex
+- Different content extraction methods than standard Chat Completions
+- Supports multi-turn conversations with proper session management
+
+---
+
+## Common Issues and Solutions
+
+### 1. Missing Environment Variables
+
+**Problem:** Azure OpenAI provider not available or returns configuration errors.
+
+**Solution:** Ensure all required environment variables are set in your `.env` file:
+
+```bash
+# Required Azure OpenAI Configuration
+AZURE_OPENAI_API_KEY=your-api-key-here
+AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com
+AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex # or gpt-5
+AZURE_OPENAI_API_VERSION=2025-03-01-preview # Must be 2025-03-01-preview or later
+```
+
+**Verify configuration:**
+```bash
+# Check if variables are set
+grep "AZURE_OPENAI" .env
+
+# Expected output should show all four variables with values
+```
+
+---
+
+### 2. Invalid API Key
+
+**Problem:** Authentication errors when making API calls.
+
+**Error Message:**
+```
+401 Unauthorized: Invalid API key provided
+```
+
+**Solution:**
+1. Verify your API key in Azure Portal:
+ - Go to Azure Portal → Your Azure OpenAI resource
+ - Navigate to "Keys and Endpoint"
+ - Copy either KEY 1 or KEY 2
+ - Update `AZURE_OPENAI_API_KEY` in `.env` file
+
+2. Ensure no extra spaces or quotes in the API key:
+```bash
+# Correct format
+AZURE_OPENAI_API_KEY=abcd1234567890...
+
+# Incorrect format (no quotes needed)
+AZURE_OPENAI_API_KEY="abcd1234567890..."
+```
+
+---
+
+### 3. Wrong Endpoint Format
+
+**Problem:** Connection errors or invalid endpoint errors.
+
+**Error Message:**
+```
+Invalid URL or endpoint format
+```
+
+**Solution:** Ensure endpoint follows correct format:
+
+```bash
+# Correct format
+AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com
+
+# Incorrect formats (missing https://)
+AZURE_OPENAI_ENDPOINT=your-resource-name.openai.azure.com
+
+# Incorrect formats (trailing slash)
+AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com/
+```
+
+**Verify endpoint:**
+```bash
+# Test endpoint connectivity
+curl -I https://your-resource-name.openai.azure.com
+```
+
+---
+
+### 4. Old API Version
+
+**Problem:** Responses API not available or unsupported API version.
+
+**Error Message:**
+```
+API version not supported or Responses API not available
+```
+
+**Solution:** Update to required API version:
+
+```bash
+# Required version for Responses API
+AZURE_OPENAI_API_VERSION=2025-03-01-preview
+
+# Older versions NOT supported for Responses API
+AZURE_OPENAI_API_VERSION=2024-10-21 # Too old
+AZURE_OPENAI_API_VERSION=2024-08-01-preview # Too old
+```
+
+**Note:** The Responses API requires API version `2025-03-01-preview` or later. Earlier versions only support Chat Completions API.
+
+---
+
+### 5. Deployment Name Mismatch
+
+**Problem:** Deployment not found or model not available.
+
+**Error Message:**
+```
+404 Not Found: The API deployment for this resource does not exist
+```
+
+**Solution:**
+1. Verify deployment name in Azure Portal:
+ - Go to Azure Portal → Your Azure OpenAI resource
+ - Navigate to "Model deployments"
+ - Copy exact deployment name (case-sensitive)
+
+2. Update deployment name in `.env`:
+```bash
+# Use exact deployment name from Azure Portal
+AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex
+
+# Common deployment names
+# AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5
+# AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex
+# AZURE_OPENAI_DEPLOYMENT_NAME=my-gpt5-deployment
+```
+
+**Verify deployment exists:**
+```bash
+# List deployments using Azure CLI
+az cognitiveservices account deployment list \
+ --name your-resource-name \
+ --resource-group your-resource-group
+```
+
+---
+
+### 6. Temperature Validation Errors
+
+**Problem:** Invalid temperature value for GPT-5-Codex.
+
+**Error Message:**
+```
+Temperature must be exactly 1.0 for GPT-5-Codex model
+Invalid temperature value: must be 1.0
+```
+
+**Solution:** The implementation enforces temperature=1.0 for GPT-5-Codex:
+
+```python
+# Temperature is automatically set to 1.0 for GPT-5-Codex
+# No configuration needed - handled internally
+
+# For other models (if supported later), temperature can vary
+# But for GPT-5-Codex: temperature=1.0 is required
+```
+
+**Note:** This is a GPT-5-Codex requirement enforced by Azure, not a server limitation.
+
+---
+
+### 7. Rate Limiting
+
+**Problem:** Too many requests or quota exceeded.
+
+**Error Message:**
+```
+429 Too Many Requests: Rate limit exceeded
+403 Forbidden: Quota exceeded
+```
+
+**Solution:**
+1. Check your Azure quota:
+ - Go to Azure Portal → Your Azure OpenAI resource
+ - Navigate to "Quotas"
+ - Verify Tokens Per Minute (TPM) limit
+
+2. Implement retry logic (already built-in):
+ - The server automatically retries with exponential backoff
+ - Wait a few moments between requests
+
+3. Request quota increase:
+ - Contact Azure support to increase your TPM quota
+ - Upgrade to higher tier if available
+
+---
+
+## Responses API Specific Issues
+
+### Understanding Responses API
+
+**Key Differences from Chat Completions API:**
+
+1. **Endpoint URL:**
+ ```bash
+ # Responses API (what we use)
+ POST https://{resource}.openai.azure.com/openai/deployments/{deployment}/responses
+
+ # Chat Completions API (NOT used)
+ POST https://{resource}.openai.azure.com/openai/deployments/{deployment}/chat/completions
+ ```
+
+2. **Content Extraction:**
+ ```python
+ # Responses API - two possible formats
+ # Format 1: output_text field
+ content = response_data.get("output_text", "")
+
+ # Format 2: output array
+ output = response_data.get("output", [])
+ if output and len(output) > 0:
+ content = output[0].get("content", "")
+ ```
+
+3. **Required Models:**
+ - GPT-5-Codex: **Requires** Responses API
+ - GPT-5: Works with Responses API
+
+### Responses API Error Handling
+
+**Problem:** Empty or missing response content.
+
+**Solution:** The implementation handles multiple content extraction methods:
+
+```python
+# Check multiple possible response formats
+# 1. Try output_text field
+# 2. Try output array
+# 3. Try choices array (fallback)
+# 4. Return error if none found
+```
+
+If you see empty responses, check server logs:
+```bash
+tail -n 100 logs/mcp_server.log | grep "Azure OpenAI"
+```
+
+---
+
+## Configuration Validation
+
+### Verify Azure Credentials
+
+**Step 1: Check environment variables**
+```bash
+# View current configuration
+grep "AZURE_OPENAI" .env
+
+# Expected output:
+# AZURE_OPENAI_API_KEY=sk-...
+# AZURE_OPENAI_ENDPOINT=https://...
+# AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex
+# AZURE_OPENAI_API_VERSION=2025-03-01-preview
+```
+
+**Step 2: Test API key validity**
+```bash
+# Using curl to test authentication
+curl -X POST "https://your-resource.openai.azure.com/openai/deployments/gpt-5-codex/responses?api-version=2025-03-01-preview" \
+ -H "api-key: your-api-key-here" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "messages": [{"role": "user", "content": "test"}],
+ "temperature": 1.0
+ }'
+```
+
+### Test Endpoint Connectivity
+
+**Check DNS resolution:**
+```bash
+# Verify endpoint resolves
+nslookup your-resource.openai.azure.com
+```
+
+**Check network connectivity:**
+```bash
+# Test HTTPS connection
+curl -I https://your-resource.openai.azure.com
+```
+
+**Expected response:**
+```
+HTTP/2 401
+# 401 is expected without API key - confirms endpoint is reachable
+```
+
+### Verify Deployment Exists
+
+**Using Azure CLI:**
+```bash
+# List all deployments
+az cognitiveservices account deployment list \
+ --name your-resource-name \
+ --resource-group your-resource-group \
+ --query "[].{name:name, model:properties.model.name}" \
+ --output table
+
+# Expected output:
+# Name Model
+# ---------------- -------------
+# gpt-5-codex gpt-5-codex
+```
+
+**Using Azure Portal:**
+1. Navigate to your Azure OpenAI resource
+2. Click "Model deployments"
+3. Verify deployment name and model
+
+### Verify API Version Support
+
+**Check supported API versions:**
+```bash
+# List available API versions for your resource
+az cognitiveservices account show \
+ --name your-resource-name \
+ --resource-group your-resource-group \
+ --query "properties.capabilities"
+```
+
+**Ensure using latest version:**
+- API version must be `2025-03-01-preview` or later
+- Older versions do not support Responses API
+
+---
+
+## Common Error Messages
+
+### Authentication Errors
+
+**Error:** `401 Unauthorized`
+```json
+{
+ "error": {
+ "code": "401",
+ "message": "Access denied due to invalid subscription key or wrong API endpoint."
+ }
+}
+```
+
+**Solutions:**
+1. Verify `AZURE_OPENAI_API_KEY` is correct
+2. Check API key is not expired
+3. Ensure using correct endpoint
+4. Regenerate API key if needed
+
+---
+
+### API Version Errors
+
+**Error:** `API version not supported`
+```json
+{
+ "error": {
+ "code": "InvalidApiVersion",
+ "message": "The requested API version is not supported."
+ }
+}
+```
+
+**Solutions:**
+1. Update `AZURE_OPENAI_API_VERSION=2025-03-01-preview`
+2. Verify your resource supports this API version
+3. Check Azure region availability
+
+---
+
+### Deployment Not Found Errors
+
+**Error:** `404 Not Found`
+```json
+{
+ "error": {
+ "code": "DeploymentNotFound",
+ "message": "The API deployment for this resource does not exist."
+ }
+}
+```
+
+**Solutions:**
+1. Verify deployment name is correct (case-sensitive)
+2. Check deployment exists in Azure Portal
+3. Ensure deployment is in "Succeeded" state
+4. Verify using correct resource/endpoint
+
+---
+
+### Temperature Constraint Errors
+
+**Error:** `Invalid temperature value`
+```json
+{
+ "error": {
+ "code": "InvalidParameter",
+ "message": "Temperature must be exactly 1.0 for GPT-5-Codex model."
+ }
+}
+```
+
+**Solutions:**
+- This is enforced by Azure for GPT-5-Codex
+- The implementation automatically sets temperature=1.0
+- If you see this error, check server logs for configuration issues
+
+---
+
+### Content Extraction Errors
+
+**Error:** Empty response or missing content
+
+**Symptoms:**
+- Tool returns empty string
+- No visible output from model
+- Logs show successful API call but no content
+
+**Solutions:**
+1. Check server logs for response format:
+```bash
+tail -n 200 logs/mcp_server.log | grep "Azure OpenAI response"
+```
+
+2. Verify Responses API is being used (not Chat Completions):
+```bash
+grep "responses?" logs/mcp_server.log
+```
+
+3. Check for multiple content extraction attempts in logs
+
+---
+
+## Testing and Validation
+
+### Run Integration Tests
+
+**Test Azure OpenAI provider:**
+```bash
+# Run integration tests (requires API keys)
+./run_integration_tests.sh
+
+# Run specific Azure OpenAI tests
+python -m pytest tests/ -v -k "azure" -m integration
+```
+
+**Expected output:**
+```
+tests/test_azure_openai_integration.py::test_azure_provider_registration PASSED
+tests/test_azure_openai_integration.py::test_azure_api_call PASSED
+tests/test_azure_openai_integration.py::test_azure_responses_api PASSED
+```
+
+### Check Server Logs
+
+**View recent Azure activity:**
+```bash
+# Filter for Azure OpenAI logs
+tail -n 500 logs/mcp_server.log | grep -i "azure"
+
+# View tool activity
+tail -n 100 logs/mcp_activity.log
+
+# Follow logs in real-time
+tail -f logs/mcp_server.log
+```
+
+**Look for:**
+- Provider registration confirmation
+- API call attempts
+- Response format handling
+- Error messages
+
+### Verify Provider Registration
+
+**Check provider availability:**
+```bash
+# Start server and check logs
+./run-server.sh
+
+# Look for registration message
+grep "Azure OpenAI provider registered" logs/mcp_server.log
+```
+
+**Expected log entry:**
+```
+INFO: Azure OpenAI provider registered successfully
+INFO: Deployment: gpt-5-codex
+INFO: API Version: 2025-03-01-preview
+```
+
+### Manual API Testing
+
+**Test Responses API directly:**
+```bash
+# Create test script
+cat > test_azure.sh << 'EOF'
+#!/bin/bash
+source .env
+
+curl -X POST "${AZURE_OPENAI_ENDPOINT}/openai/deployments/${AZURE_OPENAI_DEPLOYMENT_NAME}/responses?api-version=${AZURE_OPENAI_API_VERSION}" \
+ -H "api-key: ${AZURE_OPENAI_API_KEY}" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "messages": [
+ {"role": "user", "content": "Say hello in one word"}
+ ],
+ "temperature": 1.0,
+ "max_tokens": 10
+ }'
+EOF
+
+chmod +x test_azure.sh
+./test_azure.sh
+```
+
+**Expected response:**
+```json
+{
+ "output_text": "Hello",
+ "usage": {
+ "prompt_tokens": 12,
+ "completion_tokens": 1,
+ "total_tokens": 13
+ }
+}
+```
+
+---
+
+## Advanced Troubleshooting
+
+### Enable Debug Logging
+
+**Increase log verbosity:**
+```bash
+# Set debug level in environment
+export LOG_LEVEL=DEBUG
+
+# Restart server
+./run-server.sh
+
+# View detailed logs
+tail -f logs/mcp_server.log
+```
+
+### Network Diagnostics
+
+**Check firewall rules:**
+```bash
+# Test connectivity to Azure endpoint
+telnet your-resource.openai.azure.com 443
+
+# Check SSL certificate
+openssl s_client -connect your-resource.openai.azure.com:443
+```
+
+**Verify DNS:**
+```bash
+# Check DNS resolution
+dig your-resource.openai.azure.com
+
+# Alternative DNS check
+host your-resource.openai.azure.com
+```
+
+### Analyze Request/Response
+
+**Enable request logging:**
+```python
+# In providers/azure_openai_provider.py
+# Temporarily add debug prints to see full request/response
+
+logger.debug(f"Request URL: {url}")
+logger.debug(f"Request headers: {headers}")
+logger.debug(f"Request body: {json.dumps(payload, indent=2)}")
+logger.debug(f"Response status: {response.status_code}")
+logger.debug(f"Response body: {response.text}")
+```
+
+**Check cassette recordings:**
+```bash
+# View recorded API interactions
+ls -la tests/cassettes/azure_*.yaml
+```
+
+---
+
+## Additional Resources
+
+### Azure Documentation
+
+- [Azure OpenAI Service Documentation](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/)
+- [Responses API Reference](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference#responses-api)
+- [GPT-5-Codex Model Details](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models#gpt-5-codex)
+- [API Version Support](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/api-version-deprecation)
+
+### Project Documentation
+
+- Main README: `README.md`
+- Development Guide: `CLAUDE.md`
+- Integration Tests: `tests/test_azure_openai_integration.py`
+- Provider Implementation: `providers/azure_openai.py`
+
+### Getting Help
+
+1. **Check server logs:** `tail -n 500 logs/mcp_server.log`
+2. **Run diagnostics:** `./run_integration_tests.sh`
+3. **Review Azure Portal:** Verify configuration and quotas
+4. **Contact Azure Support:** For Azure-specific issues
+5. **GitHub Issues:** Report bugs or request features
+
+---
+
+## Summary Checklist
+
+Before opening an issue, verify:
+
+- [ ] All environment variables set correctly in `.env`
+- [ ] API key is valid and not expired
+- [ ] Endpoint format is correct (https://...)
+- [ ] API version is `2025-03-01-preview` or later
+- [ ] Deployment name matches Azure Portal exactly
+- [ ] Deployment is in "Succeeded" state
+- [ ] Quota/rate limits not exceeded
+- [ ] Network connectivity to Azure endpoint
+- [ ] Server logs checked for specific errors
+- [ ] Integration tests run successfully
+
+---
+
+**Last Updated:** 2025-10-03
+**API Version Required:** 2025-03-01-preview or later
+**Supported Models:** GPT-5, GPT-5-Codex
+**Implementation:** Azure OpenAI Responses API
diff --git a/docs/configuration.md b/docs/configuration.md
index 12e9d655..b2e6be1b 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -30,10 +30,17 @@ OPENAI_API_KEY=your-openai-key
GEMINI_API_KEY=your_gemini_api_key_here
# Get from: https://makersuite.google.com/app/apikey
-# OpenAI API
+# OpenAI API
OPENAI_API_KEY=your_openai_api_key_here
# Get from: https://platform.openai.com/api-keys
+# Azure OpenAI API (Responses API - supports GPT-5 and GPT-5-Codex)
+AZURE_OPENAI_API_KEY=your_azure_openai_api_key_here
+AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+AZURE_OPENAI_API_VERSION=2025-04-01-preview
+AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex
+# Get from: https://portal.azure.com/ (Keys and Endpoint section)
+
# X.AI GROK API
XAI_API_KEY=your_xai_api_key_here
# Get from: https://console.x.ai/
@@ -59,6 +66,57 @@ CUSTOM_MODEL_NAME=llama3.2 # Default model
- Use standard localhost URLs since the server runs natively
- Example: `http://localhost:11434/v1` for Ollama
+### Azure OpenAI Configuration
+
+Azure OpenAI integration uses the **Responses API** exclusively, supporting both GPT-5 and GPT-5-Codex models with enterprise-grade features.
+
+**Setup Steps:**
+
+1. **Create Azure OpenAI Resource:**
+ - Navigate to [Azure Portal](https://portal.azure.com/)
+ - Create or select an Azure OpenAI resource
+ - Deploy a GPT-5 or GPT-5-Codex model
+
+2. **Get Credentials:**
+ - Go to your Azure OpenAI resource
+ - Navigate to "Keys and Endpoint" section
+ - Copy the API key and endpoint URL
+
+3. **Configure Environment Variables:**
+ ```env
+ # Required for Azure OpenAI
+ AZURE_OPENAI_API_KEY=your_api_key_from_azure
+ AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+ AZURE_OPENAI_API_VERSION=2025-04-01-preview
+ AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex
+ ```
+
+**Supported Models:**
+- **`gpt-5`** - Intelligence score 16, 400K context window, 128K max output tokens
+- **`gpt-5-codex`** - Intelligence score 17, specialized for code generation and analysis
+
+**Key Features:**
+- **Responses API Implementation** - Uses Azure's Responses API (not Chat Completions API)
+- **Extended Thinking Support** - Full support for extended reasoning capabilities
+- **Deployment-Based Routing** - Routes requests through deployment names rather than model names
+- **Large Context Windows** - 400K token context, 128K token output capacity
+- **Temperature Constraint** - Temperature is fixed at 1.0 (cannot be adjusted)
+
+**Important Notes:**
+- Azure OpenAI requires all 4 environment variables to be configured
+- The deployment name must match your Azure deployment (not the model name directly)
+- Temperature is always set to 1.0 and cannot be modified
+- Uses deployment-based routing: requests go to your specific deployment endpoint
+
+**Example Configuration:**
+```env
+# Example Azure OpenAI setup for GPT-5-Codex
+AZURE_OPENAI_API_KEY=abc123def456ghi789jkl012mno345pqr
+AZURE_OPENAI_ENDPOINT=https://my-company-openai.openai.azure.com/
+AZURE_OPENAI_API_VERSION=2025-04-01-preview
+AZURE_OPENAI_DEPLOYMENT_NAME=my-gpt5-codex-deployment
+```
+
### Model Configuration
**Default Model Selection:**
@@ -70,10 +128,12 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended)
**Available Models:**
- **`auto`**: Claude automatically selects the optimal model
- **`pro`** (Gemini 2.5 Pro): Extended thinking, deep analysis
-- **`flash`** (Gemini 2.0 Flash): Ultra-fast responses
+- **`flash`** (Gemini 2.0 Flash): Ultra-fast responses
- **`o3`**: Strong logical reasoning (200K context)
- **`o3-mini`**: Balanced speed/quality (200K context)
- **`o4-mini`**: Latest reasoning model, optimized for shorter contexts
+- **`gpt-5`**: Azure OpenAI GPT-5 via Responses API (400K context, 128K output)
+- **`gpt-5-codex`**: Azure OpenAI GPT-5-Codex specialized for code (400K context, 128K output)
- **`grok-3`**: GROK-3 advanced reasoning (131K context)
- **`grok-4-latest`**: GROK-4 latest flagship model (256K context)
- **Custom models**: via OpenRouter or local APIs
@@ -190,6 +250,18 @@ LOG_LEVEL=DEBUG
CONVERSATION_TIMEOUT_HOURS=1
```
+### Azure OpenAI Setup
+```env
+# Azure OpenAI with GPT-5-Codex
+DEFAULT_MODEL=auto
+AZURE_OPENAI_API_KEY=your-azure-key
+AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+AZURE_OPENAI_API_VERSION=2025-04-01-preview
+AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex
+LOG_LEVEL=INFO
+CONVERSATION_TIMEOUT_HOURS=3
+```
+
### Production Setup
```env
# Production with cost controls
diff --git a/providers/azure_openai.py b/providers/azure_openai.py
new file mode 100644
index 00000000..08d603f8
--- /dev/null
+++ b/providers/azure_openai.py
@@ -0,0 +1,473 @@
+"""Azure OpenAI model provider implementation using Responses API.
+
+IMPORTANT: This implementation uses Azure OpenAI's **Responses API** exclusively,
+which works with both **GPT-5** and **GPT-5-Codex** models, as well as O3 reasoning
+models and GPT-4.1. The Responses API is required for GPT-5-Codex and provides
+consistent behavior across all Azure OpenAI models.
+
+This provider supports Azure OpenAI deployments using the Responses API format,
+which is required for advanced models like gpt-5, gpt-5-codex, gpt-5-mini,
+gpt-5-nano, o3-mini, and gpt-4.1.
+"""
+
+import logging
+from typing import TYPE_CHECKING, Optional
+
+if TYPE_CHECKING:
+ from tools.models import ToolModelCategory
+
+from openai import AzureOpenAI
+
+from .base import ModelProvider
+from .shared import ModelCapabilities, ModelResponse, ProviderType, TemperatureConstraint
+
+logger = logging.getLogger(__name__)
+
+
+class AzureOpenAIProvider(ModelProvider):
+ """Azure OpenAI provider using Responses API.
+
+ IMPORTANT: This implementation uses Azure OpenAI's **Responses API** exclusively,
+ which works with both **GPT-5** and **GPT-5-Codex** models, as well as all variants
+ (gpt-5-mini, gpt-5-nano), O3 reasoning models (o3-mini), and GPT-4.1. The Responses
+ API is required for GPT-5-Codex and provides consistent behavior across all Azure
+ OpenAI models.
+
+ This provider connects to Azure OpenAI deployments and uses the Responses API
+ (client.responses.create) instead of the Chat Completions API. This is required
+ for certain advanced models like gpt-5-codex and provides extended reasoning
+ capabilities for gpt-5, gpt-5-mini, and o3-mini.
+
+ Supported Models:
+ - gpt-5: Advanced reasoning model (400K context, 128K output)
+ - gpt-5-codex: Elite code generation (400K context, 128K output)
+ - gpt-5-mini: Faster, cost-effective variant (400K context, 128K output)
+ - gpt-5-nano: Fastest, most cost-effective (400K context, 128K output)
+ - o3-mini: Strong reasoning model (200K context, 64K output)
+ - gpt-4.1: Extended context window (1M context, 32K output)
+
+ Configuration:
+ - api_key: Azure OpenAI API key
+ - azure_endpoint: Azure OpenAI endpoint URL
+ - api_version: API version (must be 2025-03-01-preview or later)
+ - deployment_name: The deployment name to use (e.g., "gpt-5", "gpt-5-codex")
+ """
+
+ # Model configurations using ModelCapabilities objects
+ MODEL_CAPABILITIES = {
+ "gpt-5": ModelCapabilities(
+ provider=ProviderType.AZURE,
+ model_name="gpt-5",
+ friendly_name="Azure OpenAI (GPT-5)",
+ intelligence_score=16,
+ context_window=400_000, # 400K tokens
+ max_output_tokens=128_000, # 128K max output tokens
+ supports_extended_thinking=True, # Supports reasoning tokens
+ supports_system_prompts=True,
+ supports_streaming=True,
+ supports_function_calling=True,
+ supports_json_mode=True,
+ supports_images=True, # GPT-5 supports vision
+ max_image_size_mb=20.0, # 20MB per OpenAI docs
+ supports_temperature=True,
+ temperature_constraint=TemperatureConstraint.create("range"),
+ description="Azure GPT-5 (400K context, 128K output) - Advanced reasoning model with extended thinking",
+ aliases=["gpt5", "azure-gpt5", "azure-gpt-5"],
+ ),
+ "gpt-5-codex": ModelCapabilities(
+ provider=ProviderType.AZURE,
+ model_name="gpt-5-codex",
+ friendly_name="Azure OpenAI (GPT-5 Codex)",
+ intelligence_score=17,
+ context_window=400_000, # 400K tokens
+ max_output_tokens=128_000, # 128K max output tokens
+ supports_extended_thinking=True, # Codex supports advanced reasoning
+ supports_system_prompts=True,
+ supports_streaming=True,
+ supports_function_calling=True,
+ supports_json_mode=True,
+ supports_images=False, # Codex is code-focused
+ max_image_size_mb=0.0,
+ supports_temperature=False, # Requires fixed temperature=1.0
+ temperature_constraint=TemperatureConstraint.create("fixed"),
+ description="Azure GPT-5 Codex (400K context, 128K output) - Elite code generation with deep reasoning (temperature=1.0 required)",
+ aliases=["gpt5-codex", "gpt5codex", "codex", "azure-codex", "azure-gpt5-codex"],
+ ),
+ "gpt-5-mini": ModelCapabilities(
+ provider=ProviderType.AZURE,
+ model_name="gpt-5-mini",
+ friendly_name="Azure OpenAI (GPT-5 Mini)",
+ intelligence_score=14,
+ context_window=400_000, # 400K tokens
+ max_output_tokens=128_000, # 128K max output tokens
+ supports_extended_thinking=True, # Supports reasoning tokens
+ supports_system_prompts=True,
+ supports_streaming=True,
+ supports_function_calling=True,
+ supports_json_mode=True,
+ supports_images=True, # GPT-5 variants support vision
+ max_image_size_mb=20.0, # 20MB per OpenAI docs
+ supports_temperature=True,
+ temperature_constraint=TemperatureConstraint.create("range"),
+ description="Azure GPT-5-Mini - Faster, cost-effective variant",
+ aliases=["gpt5-mini", "gpt5mini", "mini", "azure-mini"],
+ ),
+ "gpt-5-nano": ModelCapabilities(
+ provider=ProviderType.AZURE,
+ model_name="gpt-5-nano",
+ friendly_name="Azure OpenAI (GPT-5 Nano)",
+ intelligence_score=12,
+ context_window=400_000, # 400K tokens
+ max_output_tokens=128_000, # 128K max output tokens
+ supports_extended_thinking=False, # Nano does not support extended thinking
+ supports_system_prompts=True,
+ supports_streaming=True,
+ supports_function_calling=True,
+ supports_json_mode=True,
+ supports_images=True, # GPT-5 variants support vision
+ max_image_size_mb=20.0, # 20MB per OpenAI docs
+ supports_temperature=True,
+ temperature_constraint=TemperatureConstraint.create("range"),
+ description="Azure GPT-5-Nano - Fastest, most cost-effective",
+ aliases=["gpt5-nano", "gpt5nano", "nano", "azure-nano"],
+ ),
+ "o3-mini": ModelCapabilities(
+ provider=ProviderType.AZURE,
+ model_name="o3-mini",
+ friendly_name="Azure OpenAI (O3 Mini)",
+ intelligence_score=15,
+ context_window=200_000, # 200K tokens
+ max_output_tokens=64_000, # 64K max output tokens
+ supports_extended_thinking=True, # O3 supports advanced reasoning
+ supports_system_prompts=True,
+ supports_streaming=True,
+ supports_function_calling=True,
+ supports_json_mode=True,
+ supports_images=False, # O3 is reasoning-focused, not vision
+ max_image_size_mb=0.0,
+ supports_temperature=False, # Reasoning model requires fixed temperature=1.0
+ temperature_constraint=TemperatureConstraint.create("fixed"),
+ description="Azure O3-Mini - Strong reasoning model (temperature=1.0 required)",
+ aliases=["o3mini", "azure-o3-mini"],
+ ),
+ "gpt-4.1": ModelCapabilities(
+ provider=ProviderType.AZURE,
+ model_name="gpt-4.1",
+ friendly_name="Azure OpenAI (GPT-4.1)",
+ intelligence_score=14,
+ context_window=1_000_000, # 1M tokens
+ max_output_tokens=32_000, # 32K max output tokens
+ supports_extended_thinking=False, # GPT-4.1 does not support extended thinking
+ supports_system_prompts=True,
+ supports_streaming=True,
+ supports_function_calling=True,
+ supports_json_mode=True,
+ supports_images=True, # GPT-4.1 supports vision
+ max_image_size_mb=20.0, # 20MB per OpenAI docs
+ supports_temperature=True,
+ temperature_constraint=TemperatureConstraint.create("range"),
+ description="Azure GPT-4.1 - Extended context window",
+ aliases=["gpt4.1", "azure-gpt4.1"],
+ ),
+ }
+
+ def __init__(self, api_key: str, **kwargs):
+ """Initialize Azure OpenAI provider.
+
+ Args:
+ api_key: Azure OpenAI API key
+ **kwargs: Additional configuration including:
+ - azure_endpoint: Azure OpenAI endpoint URL (required)
+ - api_version: API version (required, must be 2025-03-01-preview or later)
+ - deployment_name: Deployment name (required)
+
+ Raises:
+ ValueError: If required configuration is missing
+ """
+ super().__init__(api_key, **kwargs)
+
+ # Validate required kwargs
+ self.azure_endpoint = kwargs.get("azure_endpoint")
+ self.api_version = kwargs.get("api_version")
+ self.deployment_name = kwargs.get("deployment_name")
+
+ if not self.azure_endpoint:
+ raise ValueError("azure_endpoint is required for Azure OpenAI provider")
+ if not self.api_version:
+ raise ValueError("api_version is required for Azure OpenAI provider")
+ if not self.deployment_name:
+ raise ValueError("deployment_name is required for Azure OpenAI provider")
+
+ # Validate API version supports Responses API
+ if self.api_version < "2025-03-01-preview":
+ logger.warning(
+ f"API version {self.api_version} may not support Responses API. "
+ "Recommended: 2025-03-01-preview or later"
+ )
+
+ # Lazy client initialization
+ self._client: Optional[AzureOpenAI] = None
+
+ logger.info(
+ f"Initialized Azure OpenAI provider: endpoint={self.azure_endpoint}, "
+ f"deployment={self.deployment_name}, api_version={self.api_version}"
+ )
+
+ def _get_client(self) -> AzureOpenAI:
+ """Get or create the Azure OpenAI client (lazy initialization)."""
+ if self._client is None:
+ self._client = AzureOpenAI(
+ api_key=self.api_key,
+ azure_endpoint=self.azure_endpoint,
+ api_version=self.api_version,
+ )
+ logger.debug("Created Azure OpenAI client")
+ return self._client
+
+ def get_provider_type(self) -> ProviderType:
+ """Get the provider type."""
+ return ProviderType.AZURE
+
+ def generate_content(
+ self,
+ prompt: str,
+ model_name: str,
+ system_prompt: Optional[str] = None,
+ temperature: float = 0.3,
+ max_output_tokens: Optional[int] = None,
+ **kwargs,
+ ) -> ModelResponse:
+ """Generate content using Azure OpenAI Responses API.
+
+ Args:
+ prompt: User prompt/message
+ model_name: Model name (will be resolved to deployment)
+ system_prompt: Optional system prompt
+ temperature: Temperature parameter (default 0.3)
+ max_output_tokens: Maximum output tokens
+ **kwargs: Additional parameters
+
+ Returns:
+ ModelResponse with generated content and usage data
+
+ Raises:
+ ValueError: If model is not supported
+ Exception: If API call fails
+ """
+ # Resolve model name and get capabilities
+ resolved_model = self._resolve_model_name(model_name)
+ capabilities = self.get_capabilities(resolved_model)
+
+ # Validate parameters
+ self.validate_parameters(resolved_model, temperature, **kwargs)
+
+ # Build input messages in Responses API format
+ input_messages = []
+ if system_prompt:
+ input_messages.append({"role": "system", "content": system_prompt})
+ input_messages.append({"role": "user", "content": prompt})
+
+ # Prepare API parameters
+ api_params = {
+ "model": self.deployment_name,
+ "input": input_messages,
+ }
+
+ # Add max_output_tokens if specified
+ if max_output_tokens:
+ api_params["max_output_tokens"] = max_output_tokens
+ elif capabilities.max_output_tokens:
+ api_params["max_output_tokens"] = capabilities.max_output_tokens
+
+ # Add temperature if model supports it
+ if capabilities.supports_temperature:
+ api_params["temperature"] = temperature
+
+ logger.debug(
+ f"Azure OpenAI Responses API request: deployment={self.deployment_name}, "
+ f"model={resolved_model}, max_tokens={api_params.get('max_output_tokens')}"
+ )
+
+ try:
+ # Get client and make API call
+ client = self._get_client()
+ response = client.responses.create(**api_params)
+
+ # Extract content from response
+ content = self._extract_content(response)
+
+ # Extract usage data
+ usage = self._extract_usage(response)
+
+ # Build ModelResponse
+ model_response = ModelResponse(
+ content=content,
+ usage=usage,
+ model_name=resolved_model,
+ friendly_name=capabilities.friendly_name,
+ provider=ProviderType.AZURE,
+ metadata={
+ "response_id": response.id if hasattr(response, "id") else None,
+ "status": response.status if hasattr(response, "status") else None,
+ "deployment_name": self.deployment_name,
+ },
+ )
+
+ logger.debug(
+ f"Azure OpenAI response: tokens={usage.get('total_tokens', 0)}, "
+ f"status={response.status if hasattr(response, 'status') else 'N/A'}"
+ )
+
+ return model_response
+
+ except Exception as exc:
+ logger.error(f"Azure OpenAI API error: {exc}", exc_info=True)
+ raise
+
+ def _extract_content(self, response) -> str:
+ """Extract text content from Responses API response.
+
+ The Responses API returns content in different formats:
+ 1. output_text: Condensed text representation (preferred)
+ 2. output array: Array of output items (text, reasoning, etc.)
+
+ Args:
+ response: API response object
+
+ Returns:
+ Extracted text content
+
+ Raises:
+ ValueError: If no content can be extracted
+ """
+ # Try output_text first (condensed representation)
+ if hasattr(response, "output_text") and response.output_text:
+ logger.debug("Extracted content from output_text")
+ return response.output_text
+
+ # Parse output array for text items
+ if hasattr(response, "output") and response.output:
+ text_parts = []
+
+ for item in response.output:
+ item_type = getattr(item, "type", None)
+
+ if item_type == "text" or item_type == "message":
+ # Text output item
+ if hasattr(item, "content") and item.content:
+ if isinstance(item.content, list) and len(item.content) > 0:
+ # Content is a list of text parts
+ text_parts.append(item.content[0].text)
+ elif isinstance(item.content, str):
+ # Content is a string
+ text_parts.append(item.content)
+ elif hasattr(item, "text"):
+ # Direct text attribute
+ text_parts.append(item.text)
+
+ elif item_type == "reasoning":
+ # Reasoning output (optional: include summary)
+ if hasattr(item, "summary") and item.summary:
+ logger.debug(f"Reasoning summary: {item.summary}")
+ # Optionally include reasoning in output
+ # text_parts.append(f"[Reasoning: {item.summary}]")
+
+ if text_parts:
+ content = "\n".join(text_parts)
+ logger.debug(f"Extracted content from output array ({len(text_parts)} parts)")
+ return content
+
+ # No content found
+ logger.warning("No content found in response")
+ raise ValueError("No content available in response")
+
+ def _extract_usage(self, response) -> dict[str, int]:
+ """Extract token usage from Responses API response.
+
+ Args:
+ response: API response object
+
+ Returns:
+ Dictionary with token usage (input_tokens, output_tokens, total_tokens)
+ """
+ usage = {}
+
+ if hasattr(response, "usage") and response.usage:
+ usage_obj = response.usage
+
+ # Extract input tokens
+ if hasattr(usage_obj, "input_tokens"):
+ usage["input_tokens"] = usage_obj.input_tokens
+ usage["prompt_tokens"] = usage_obj.input_tokens
+ elif hasattr(usage_obj, "prompt_tokens"):
+ usage["prompt_tokens"] = usage_obj.prompt_tokens
+ usage["input_tokens"] = usage_obj.prompt_tokens
+
+ # Extract output tokens
+ if hasattr(usage_obj, "output_tokens"):
+ usage["output_tokens"] = usage_obj.output_tokens
+ usage["completion_tokens"] = usage_obj.output_tokens
+ elif hasattr(usage_obj, "completion_tokens"):
+ usage["completion_tokens"] = usage_obj.completion_tokens
+ usage["output_tokens"] = usage_obj.completion_tokens
+
+ # Extract total tokens
+ if hasattr(usage_obj, "total_tokens"):
+ usage["total_tokens"] = usage_obj.total_tokens
+ else:
+ # Calculate total if not provided
+ input_tokens = usage.get("input_tokens", 0)
+ output_tokens = usage.get("output_tokens", 0)
+ usage["total_tokens"] = input_tokens + output_tokens
+
+ logger.debug(f"Token usage: {usage}")
+
+ return usage
+
+ def close(self) -> None:
+ """Clean up resources."""
+ if self._client is not None:
+ # AzureOpenAI client doesn't require explicit cleanup
+ self._client = None
+ logger.debug("Closed Azure OpenAI client")
+
+ def get_preferred_model(self, category: "ToolModelCategory", allowed_models: list[str]) -> Optional[str]:
+ """Get Azure's preferred model for a given category from allowed models.
+
+ Args:
+ category: The tool category requiring a model
+ allowed_models: Pre-filtered list of models allowed by restrictions
+
+ Returns:
+ Preferred model name or None
+ """
+ from tools.models import ToolModelCategory
+
+ if not allowed_models:
+ return None
+
+ # Helper to find first available from preference list
+ def find_first(preferences: list[str]) -> Optional[str]:
+ """Return first available model from preference list."""
+ for model in preferences:
+ if model in allowed_models:
+ return model
+ return None
+
+ if category == ToolModelCategory.EXTENDED_REASONING:
+ # Prefer models with extended thinking support
+ # Order: gpt-5-codex > o3-mini > gpt-5 > gpt-5-mini
+ preferred = find_first(["gpt-5-codex", "o3-mini", "gpt-5", "gpt-5-mini"])
+ return preferred if preferred else allowed_models[0]
+
+ elif category == ToolModelCategory.FAST_RESPONSE:
+ # Prefer faster models with good performance
+ # Order: gpt-5-mini > gpt-5-nano > gpt-5 > gpt-4.1
+ preferred = find_first(["gpt-5-mini", "gpt-5-nano", "gpt-5", "gpt-4.1"])
+ return preferred if preferred else allowed_models[0]
+
+ else: # BALANCED or default
+ # Prefer gpt-5-codex for code tasks, then balanced options
+ # Order: gpt-5-codex > gpt-5 > gpt-5-mini > o3-mini > gpt-4.1 > gpt-5-nano
+ preferred = find_first(["gpt-5-codex", "gpt-5", "gpt-5-mini", "o3-mini", "gpt-4.1", "gpt-5-nano"])
+ return preferred if preferred else allowed_models[0]
diff --git a/providers/registry.py b/providers/registry.py
index 6f412ff7..4fd2fe1f 100644
--- a/providers/registry.py
+++ b/providers/registry.py
@@ -37,6 +37,7 @@ class ModelProviderRegistry:
PROVIDER_PRIORITY_ORDER = [
ProviderType.GOOGLE, # Direct Gemini access
ProviderType.OPENAI, # Direct OpenAI access
+ ProviderType.AZURE, # Azure OpenAI access
ProviderType.XAI, # Direct X.AI GROK access
ProviderType.DIAL, # DIAL unified API access
ProviderType.CUSTOM, # Local/self-hosted models
@@ -122,6 +123,24 @@ def get_provider(cls, provider_type: ProviderType, force_new: bool = False) -> O
provider_kwargs["base_url"] = gemini_base_url
logging.info(f"Initialized Gemini provider with custom endpoint: {gemini_base_url}")
provider = provider_class(**provider_kwargs)
+ elif provider_type == ProviderType.AZURE:
+ # For Azure OpenAI, check required configuration
+ if not api_key:
+ return None
+ azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+ azure_version = os.getenv("AZURE_OPENAI_API_VERSION", "2025-04-01-preview")
+ deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
+
+ if not azure_endpoint or not deployment_name:
+ logging.warning("Azure OpenAI requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_DEPLOYMENT_NAME")
+ return None
+
+ provider = provider_class(
+ api_key=api_key,
+ azure_endpoint=azure_endpoint,
+ api_version=azure_version,
+ deployment_name=deployment_name,
+ )
else:
if not api_key:
return None
@@ -265,6 +284,7 @@ def _get_api_key_for_provider(cls, provider_type: ProviderType) -> Optional[str]
ProviderType.OPENROUTER: "OPENROUTER_API_KEY",
ProviderType.CUSTOM: "CUSTOM_API_KEY", # Can be empty for providers that don't need auth
ProviderType.DIAL: "DIAL_API_KEY",
+ ProviderType.AZURE: "AZURE_OPENAI_API_KEY",
}
env_var = key_mapping.get(provider_type)
diff --git a/providers/shared/provider_type.py b/providers/shared/provider_type.py
index 44153f0a..8db043b3 100644
--- a/providers/shared/provider_type.py
+++ b/providers/shared/provider_type.py
@@ -12,5 +12,6 @@ class ProviderType(Enum):
OPENAI = "openai"
XAI = "xai"
OPENROUTER = "openrouter"
+ AZURE = "azure"
CUSTOM = "custom"
DIAL = "dial"
diff --git a/pyproject.toml b/pyproject.toml
index 74cf6091..e3e9bfd3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "zen-mcp-server"
-version = "5.21.0"
+version = "1.0.0"
description = "AI-powered MCP server with multiple model providers"
requires-python = ">=3.9"
dependencies = [
diff --git a/server.py b/server.py
index a8bf47e2..f5aa5461 100644
--- a/server.py
+++ b/server.py
@@ -412,6 +412,7 @@ def configure_providers():
value = os.getenv(key)
logger.debug(f" {key}: {'[PRESENT]' if value else '[MISSING]'}")
from providers import ModelProviderRegistry
+ from providers.azure_openai import AzureOpenAIProvider
from providers.custom import CustomProvider
from providers.dial import DIALModelProvider
from providers.gemini import GeminiModelProvider
@@ -453,6 +454,17 @@ def configure_providers():
has_native_apis = True
logger.info("X.AI API key found - GROK models available")
+ # Check for Azure OpenAI API key
+ azure_key = os.getenv("AZURE_OPENAI_API_KEY")
+ azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+ azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
+ if azure_key and azure_endpoint and azure_deployment:
+ if (azure_key != "your_azure_openai_key_here" and
+ azure_endpoint != "https://your-resource.openai.azure.com/"):
+ valid_providers.append("Azure OpenAI")
+ has_native_apis = True
+ logger.info(f"Azure OpenAI found - deployment: {azure_deployment}")
+
# Check for DIAL API key
dial_key = os.getenv("DIAL_API_KEY")
if dial_key and dial_key != "your_dial_api_key_here":
@@ -497,6 +509,10 @@ def configure_providers():
ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider)
if openai_key and openai_key != "your_openai_api_key_here":
ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider)
+ if azure_key and azure_endpoint and azure_deployment:
+ if (azure_key != "your_azure_openai_key_here" and
+ azure_endpoint != "https://your-resource.openai.azure.com/"):
+ ModelProviderRegistry.register_provider(ProviderType.AZURE, AzureOpenAIProvider)
if xai_key and xai_key != "your_xai_api_key_here":
ModelProviderRegistry.register_provider(ProviderType.XAI, XAIModelProvider)
if dial_key and dial_key != "your_dial_api_key_here":
@@ -522,6 +538,7 @@ def custom_provider_factory(api_key=None):
"At least one API configuration is required. Please set either:\n"
"- GEMINI_API_KEY for Gemini models\n"
"- OPENAI_API_KEY for OpenAI models\n"
+ "- AZURE_OPENAI_API_KEY + AZURE_OPENAI_ENDPOINT + AZURE_OPENAI_DEPLOYMENT_NAME for Azure OpenAI models\n"
"- XAI_API_KEY for X.AI GROK models\n"
"- DIAL_API_KEY for DIAL models\n"
"- OPENROUTER_API_KEY for OpenRouter (multiple models)\n"
diff --git a/tests/test_azure_openai_provider.py b/tests/test_azure_openai_provider.py
new file mode 100644
index 00000000..cbb254da
--- /dev/null
+++ b/tests/test_azure_openai_provider.py
@@ -0,0 +1,747 @@
+"""Tests for Azure OpenAI provider implementation using Responses API."""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from providers.azure_openai import AzureOpenAIProvider
+from providers.shared import ProviderType
+
+
+class TestAzureOpenAIProvider:
+ """Test Azure OpenAI provider functionality."""
+
+ def setup_method(self):
+ """Set up clean state before each test."""
+ # Clear restriction service cache before each test
+ import utils.model_restrictions
+
+ utils.model_restrictions._restriction_service = None
+
+ def teardown_method(self):
+ """Clean up after each test to avoid singleton issues."""
+ # Clear restriction service cache after each test
+ import utils.model_restrictions
+
+ utils.model_restrictions._restriction_service = None
+
+ def test_initialization_success(self):
+ """Test successful provider initialization with all required parameters."""
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ assert provider.api_key == "test-key"
+ assert provider.azure_endpoint == "https://test.openai.azure.com"
+ assert provider.api_version == "2025-03-01-preview"
+ assert provider.deployment_name == "gpt-5"
+ assert provider.get_provider_type() == ProviderType.AZURE
+
+ def test_initialization_missing_azure_endpoint(self):
+ """Test initialization fails without azure_endpoint."""
+ with pytest.raises(ValueError, match="azure_endpoint is required"):
+ AzureOpenAIProvider(
+ api_key="test-key",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ def test_initialization_missing_api_version(self):
+ """Test initialization fails without api_version."""
+ with pytest.raises(ValueError, match="api_version is required"):
+ AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ deployment_name="gpt-5",
+ )
+
+ def test_initialization_missing_deployment_name(self):
+ """Test initialization fails without deployment_name."""
+ with pytest.raises(ValueError, match="deployment_name is required"):
+ AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ )
+
+ def test_initialization_old_api_version_warning(self):
+ """Test warning is logged for API versions older than 2025-03-01-preview."""
+ with patch("providers.azure_openai.logger") as mock_logger:
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2024-06-01",
+ deployment_name="gpt-5",
+ )
+
+ # Verify provider was created and warning was logged
+ assert provider is not None
+ mock_logger.warning.assert_called_once()
+ warning_message = mock_logger.warning.call_args[0][0]
+ assert "may not support Responses API" in warning_message
+
+ def test_model_validation_gpt5(self):
+ """Test model name validation for GPT-5."""
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ # Test valid models
+ assert provider.validate_model_name("gpt-5") is True
+ assert provider.validate_model_name("gpt-5-codex") is True
+
+ # Test valid aliases
+ assert provider.validate_model_name("gpt5") is True
+ assert provider.validate_model_name("azure-gpt5") is True
+ assert provider.validate_model_name("azure-gpt-5") is True
+ assert provider.validate_model_name("codex") is True
+ assert provider.validate_model_name("gpt5-codex") is True
+ assert provider.validate_model_name("gpt5codex") is True
+ assert provider.validate_model_name("azure-codex") is True
+ assert provider.validate_model_name("azure-gpt5-codex") is True
+
+ # Test invalid models
+ assert provider.validate_model_name("gpt-4") is False
+ assert provider.validate_model_name("o3") is False
+ assert provider.validate_model_name("invalid-model") is False
+
+ def test_resolve_model_name(self):
+ """Test model name resolution for aliases."""
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ # Test GPT-5 aliases
+ assert provider._resolve_model_name("gpt5") == "gpt-5"
+ assert provider._resolve_model_name("azure-gpt5") == "gpt-5"
+ assert provider._resolve_model_name("azure-gpt-5") == "gpt-5"
+
+ # Test GPT-5 Codex aliases
+ assert provider._resolve_model_name("gpt5-codex") == "gpt-5-codex"
+ assert provider._resolve_model_name("gpt5codex") == "gpt-5-codex"
+ assert provider._resolve_model_name("codex") == "gpt-5-codex"
+ assert provider._resolve_model_name("azure-codex") == "gpt-5-codex"
+ assert provider._resolve_model_name("azure-gpt5-codex") == "gpt-5-codex"
+
+ # Test full names pass through unchanged
+ assert provider._resolve_model_name("gpt-5") == "gpt-5"
+ assert provider._resolve_model_name("gpt-5-codex") == "gpt-5-codex"
+
+ def test_get_capabilities_gpt5(self):
+ """Test getting model capabilities for GPT-5."""
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ capabilities = provider.get_capabilities("gpt-5")
+
+ assert capabilities.model_name == "gpt-5"
+ assert capabilities.friendly_name == "Azure OpenAI (GPT-5)"
+ assert capabilities.provider == ProviderType.AZURE
+ assert capabilities.intelligence_score == 16
+ assert capabilities.context_window == 400_000
+ assert capabilities.max_output_tokens == 128_000
+ assert capabilities.supports_extended_thinking is True
+ assert capabilities.supports_system_prompts is True
+ assert capabilities.supports_streaming is True
+ assert capabilities.supports_function_calling is True
+ assert capabilities.supports_json_mode is True
+ assert capabilities.supports_images is True
+ assert capabilities.max_image_size_mb == 20.0
+ assert capabilities.supports_temperature is True
+ # GPT-5 uses RangeTemperatureConstraint (not fixed)
+ assert capabilities.temperature_constraint.min_temp == 0.0
+ assert capabilities.temperature_constraint.max_temp == 2.0
+
+ def test_get_capabilities_gpt5_codex(self):
+ """Test getting model capabilities for GPT-5 Codex."""
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5-codex",
+ )
+
+ capabilities = provider.get_capabilities("gpt-5-codex")
+
+ assert capabilities.model_name == "gpt-5-codex"
+ assert capabilities.friendly_name == "Azure OpenAI (GPT-5 Codex)"
+ assert capabilities.provider == ProviderType.AZURE
+ assert capabilities.intelligence_score == 17
+ assert capabilities.context_window == 400_000
+ assert capabilities.max_output_tokens == 128_000
+ assert capabilities.supports_extended_thinking is True
+ assert capabilities.supports_system_prompts is True
+ assert capabilities.supports_streaming is True
+ assert capabilities.supports_function_calling is True
+ assert capabilities.supports_json_mode is True
+ assert capabilities.supports_images is False
+ assert capabilities.max_image_size_mb == 0.0
+ # GPT-5-Codex requires fixed temperature=1.0
+ assert capabilities.supports_temperature is False
+ assert capabilities.temperature_constraint.value == 1.0
+
+ def test_get_capabilities_with_alias(self):
+ """Test getting model capabilities with alias resolves correctly."""
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ capabilities = provider.get_capabilities("gpt5")
+ assert capabilities.model_name == "gpt-5"
+ assert capabilities.friendly_name == "Azure OpenAI (GPT-5)"
+
+ capabilities = provider.get_capabilities("codex")
+ assert capabilities.model_name == "gpt-5-codex"
+ assert capabilities.friendly_name == "Azure OpenAI (GPT-5 Codex)"
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_generate_content_basic(self, mock_azure_class):
+ """Test basic content generation using Responses API."""
+ # Set up mock Azure client
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ # Mock the response object
+ mock_response = MagicMock()
+ mock_response.output_text = "This is the response content"
+ mock_response.id = "test-response-id"
+ mock_response.status = "completed"
+ mock_response.usage = MagicMock()
+ mock_response.usage.input_tokens = 100
+ mock_response.usage.output_tokens = 50
+ mock_response.usage.total_tokens = 150
+
+ mock_client.responses.create.return_value = mock_response
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ # Generate content
+ result = provider.generate_content(
+ prompt="Test prompt",
+ model_name="gpt-5",
+ temperature=1.0,
+ )
+
+ # Verify API was called correctly
+ mock_client.responses.create.assert_called_once()
+ call_kwargs = mock_client.responses.create.call_args[1]
+
+ assert call_kwargs["model"] == "gpt-5"
+ assert call_kwargs["temperature"] == 1.0
+ assert len(call_kwargs["input"]) == 1
+ assert call_kwargs["input"][0]["role"] == "user"
+ assert call_kwargs["input"][0]["content"] == "Test prompt"
+
+ # Verify response
+ assert result.content == "This is the response content"
+ assert result.model_name == "gpt-5"
+ assert result.friendly_name == "Azure OpenAI (GPT-5)"
+ assert result.provider == ProviderType.AZURE
+ assert result.usage["input_tokens"] == 100
+ assert result.usage["output_tokens"] == 50
+ assert result.usage["total_tokens"] == 150
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_generate_content_with_system_prompt(self, mock_azure_class):
+ """Test content generation with system prompt."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ mock_response = MagicMock()
+ mock_response.output_text = "Response with system prompt"
+ mock_response.id = "test-id"
+ mock_response.status = "completed"
+ mock_response.usage = MagicMock()
+ mock_response.usage.input_tokens = 150
+ mock_response.usage.output_tokens = 75
+ mock_response.usage.total_tokens = 225
+
+ mock_client.responses.create.return_value = mock_response
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ result = provider.generate_content(
+ prompt="User message",
+ model_name="gpt-5",
+ system_prompt="You are a helpful assistant",
+ temperature=1.0,
+ )
+
+ # Verify messages include system prompt
+ call_kwargs = mock_client.responses.create.call_args[1]
+ assert len(call_kwargs["input"]) == 2
+ assert call_kwargs["input"][0]["role"] == "system"
+ assert call_kwargs["input"][0]["content"] == "You are a helpful assistant"
+ assert call_kwargs["input"][1]["role"] == "user"
+ assert call_kwargs["input"][1]["content"] == "User message"
+
+ assert result.content == "Response with system prompt"
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_generate_content_extracts_from_output_array(self, mock_azure_class):
+ """Test content extraction from output array when output_text is not available."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ # Mock response with output array (no output_text)
+ mock_response = MagicMock()
+ mock_response.output_text = None
+
+ # Create mock output items
+ text_item = MagicMock()
+ text_item.type = "text"
+ text_item.content = [MagicMock(text="Text from output array")]
+
+ mock_response.output = [text_item]
+ mock_response.id = "test-id"
+ mock_response.status = "completed"
+ mock_response.usage = MagicMock()
+ mock_response.usage.input_tokens = 50
+ mock_response.usage.output_tokens = 25
+ mock_response.usage.total_tokens = 75
+
+ mock_client.responses.create.return_value = mock_response
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ result = provider.generate_content(
+ prompt="Test prompt",
+ model_name="gpt-5",
+ temperature=1.0,
+ )
+
+ # Verify content extracted from output array
+ assert result.content == "Text from output array"
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_generate_content_extracts_from_message_type(self, mock_azure_class):
+ """Test content extraction from output array with message type."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ # Mock response with output array containing message type
+ mock_response = MagicMock()
+ mock_response.output_text = None
+
+ message_item = MagicMock()
+ message_item.type = "message"
+ message_item.content = "Direct message content"
+
+ mock_response.output = [message_item]
+ mock_response.id = "test-id"
+ mock_response.status = "completed"
+ mock_response.usage = MagicMock()
+ mock_response.usage.input_tokens = 30
+ mock_response.usage.output_tokens = 20
+ mock_response.usage.total_tokens = 50
+
+ mock_client.responses.create.return_value = mock_response
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ result = provider.generate_content(
+ prompt="Test",
+ model_name="gpt-5",
+ temperature=1.0,
+ )
+
+ assert result.content == "Direct message content"
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_generate_content_no_content_error(self, mock_azure_class):
+ """Test error when no content can be extracted from response."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ # Mock response with no content
+ mock_response = MagicMock()
+ mock_response.output_text = None
+ mock_response.output = []
+ mock_response.usage = MagicMock()
+
+ mock_client.responses.create.return_value = mock_response
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ with pytest.raises(ValueError, match="No content available in response"):
+ provider.generate_content(
+ prompt="Test",
+ model_name="gpt-5",
+ temperature=1.0,
+ )
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_token_usage_extraction(self, mock_azure_class):
+ """Test token usage extraction from response."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ mock_response = MagicMock()
+ mock_response.output_text = "Test response"
+ mock_response.id = "test-id"
+ mock_response.status = "completed"
+
+ # Test with input_tokens and output_tokens format
+ mock_response.usage = MagicMock()
+ mock_response.usage.input_tokens = 200
+ mock_response.usage.output_tokens = 100
+ mock_response.usage.total_tokens = 300
+
+ mock_client.responses.create.return_value = mock_response
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ result = provider.generate_content(
+ prompt="Test", model_name="gpt-5", temperature=1.0
+ )
+
+ assert result.usage["input_tokens"] == 200
+ assert result.usage["prompt_tokens"] == 200
+ assert result.usage["output_tokens"] == 100
+ assert result.usage["completion_tokens"] == 100
+ assert result.usage["total_tokens"] == 300
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_token_usage_extraction_alternative_format(self, mock_azure_class):
+ """Test token usage extraction with prompt_tokens and completion_tokens format."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ mock_response = MagicMock()
+ mock_response.output_text = "Test response"
+ mock_response.id = "test-id"
+ mock_response.status = "completed"
+
+ # Test with prompt_tokens and completion_tokens format
+ # Create a custom mock class that only has specific attributes
+ class UsageWithLegacyFields:
+ prompt_tokens = 250
+ completion_tokens = 125
+
+ mock_response.usage = UsageWithLegacyFields()
+
+ mock_client.responses.create.return_value = mock_response
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ result = provider.generate_content(
+ prompt="Test", model_name="gpt-5", temperature=1.0
+ )
+
+ assert result.usage["prompt_tokens"] == 250
+ assert result.usage["input_tokens"] == 250
+ assert result.usage["completion_tokens"] == 125
+ assert result.usage["output_tokens"] == 125
+ assert result.usage["total_tokens"] == 375 # Calculated
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_generate_content_with_max_output_tokens(self, mock_azure_class):
+ """Test content generation with explicit max_output_tokens."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ mock_response = MagicMock()
+ mock_response.output_text = "Response"
+ mock_response.id = "test-id"
+ mock_response.status = "completed"
+ mock_response.usage = MagicMock()
+ mock_response.usage.input_tokens = 50
+ mock_response.usage.output_tokens = 25
+ mock_response.usage.total_tokens = 75
+
+ mock_client.responses.create.return_value = mock_response
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ result = provider.generate_content(
+ prompt="Test",
+ model_name="gpt-5",
+ max_output_tokens=4000,
+ temperature=1.0,
+ )
+
+ # Verify max_output_tokens was passed and result is not None
+ assert result is not None
+ call_kwargs = mock_client.responses.create.call_args[1]
+ assert call_kwargs["max_output_tokens"] == 4000
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_generate_content_api_error(self, mock_azure_class):
+ """Test error handling when API call fails."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ # Simulate API error
+ mock_client.responses.create.side_effect = Exception("API Error")
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ with pytest.raises(Exception, match="API Error"):
+ provider.generate_content(
+ prompt="Test",
+ model_name="gpt-5",
+ temperature=1.0,
+ )
+
+ def test_provider_type(self):
+ """Test get_provider_type returns ProviderType.AZURE."""
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ assert provider.get_provider_type() == ProviderType.AZURE
+
+ def test_get_preferred_model_extended_reasoning(self):
+ """Test get_preferred_model for extended reasoning category."""
+ from tools.models import ToolModelCategory
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ # Test with both models available
+ allowed = ["gpt-5", "gpt-5-codex"]
+ preferred = provider.get_preferred_model(ToolModelCategory.EXTENDED_REASONING, allowed)
+ assert preferred == "gpt-5-codex" # Codex preferred for extended reasoning
+
+ # Test with only gpt-5 available
+ allowed = ["gpt-5"]
+ preferred = provider.get_preferred_model(ToolModelCategory.EXTENDED_REASONING, allowed)
+ assert preferred == "gpt-5"
+
+ # Test with empty list
+ preferred = provider.get_preferred_model(ToolModelCategory.EXTENDED_REASONING, [])
+ assert preferred is None
+
+ def test_get_preferred_model_fast_response(self):
+ """Test get_preferred_model for fast response category."""
+ from tools.models import ToolModelCategory
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ # Test with both models available
+ allowed = ["gpt-5", "gpt-5-codex"]
+ preferred = provider.get_preferred_model(ToolModelCategory.FAST_RESPONSE, allowed)
+ assert preferred == "gpt-5" # gpt-5 preferred for fast response
+
+ # Test with only codex available
+ allowed = ["gpt-5-codex"]
+ preferred = provider.get_preferred_model(ToolModelCategory.FAST_RESPONSE, allowed)
+ assert preferred == "gpt-5-codex"
+
+ def test_get_preferred_model_balanced(self):
+ """Test get_preferred_model for balanced category."""
+ from tools.models import ToolModelCategory
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ # Test with both models available
+ allowed = ["gpt-5", "gpt-5-codex"]
+ preferred = provider.get_preferred_model(ToolModelCategory.BALANCED, allowed)
+ assert preferred == "gpt-5-codex" # Codex preferred for code tasks
+
+ # Test with only gpt-5 available
+ allowed = ["gpt-5"]
+ preferred = provider.get_preferred_model(ToolModelCategory.BALANCED, allowed)
+ assert preferred == "gpt-5"
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_close_cleanup(self, mock_azure_class):
+ """Test close method properly cleans up resources."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ # Initialize client by calling _get_client
+ provider._get_client()
+ assert provider._client is not None
+
+ # Close should set client to None
+ provider.close()
+ assert provider._client is None
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_lazy_client_initialization(self, mock_azure_class):
+ """Test that Azure client is lazily initialized on first use."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5",
+ )
+
+ # Client should not be initialized yet
+ assert provider._client is None
+ mock_azure_class.assert_not_called()
+
+ # Get client should initialize it
+ client = provider._get_client()
+ assert client is not None
+ assert provider._client is not None
+ mock_azure_class.assert_called_once_with(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ )
+
+ # Second call should return same client
+ client2 = provider._get_client()
+ assert client2 is client
+ mock_azure_class.assert_called_once() # Still only called once
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_metadata_in_response(self, mock_azure_class):
+ """Test that response metadata includes deployment and status info."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ mock_response = MagicMock()
+ mock_response.output_text = "Test content"
+ mock_response.id = "response-123"
+ mock_response.status = "completed"
+ mock_response.usage = MagicMock()
+ mock_response.usage.input_tokens = 50
+ mock_response.usage.output_tokens = 25
+ mock_response.usage.total_tokens = 75
+
+ mock_client.responses.create.return_value = mock_response
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="my-gpt5-deployment",
+ )
+
+ result = provider.generate_content(
+ prompt="Test", model_name="gpt-5", temperature=1.0
+ )
+
+ # Verify metadata
+ assert result.metadata["response_id"] == "response-123"
+ assert result.metadata["status"] == "completed"
+ assert result.metadata["deployment_name"] == "my-gpt5-deployment"
+
+ @patch("providers.azure_openai.AzureOpenAI")
+ def test_generate_content_resolves_alias(self, mock_azure_class):
+ """Test that generate_content resolves aliases before making API call."""
+ mock_client = MagicMock()
+ mock_azure_class.return_value = mock_client
+
+ mock_response = MagicMock()
+ mock_response.output_text = "Test response"
+ mock_response.id = "test-id"
+ mock_response.status = "completed"
+ mock_response.usage = MagicMock()
+ mock_response.usage.input_tokens = 50
+ mock_response.usage.output_tokens = 25
+ mock_response.usage.total_tokens = 75
+
+ mock_client.responses.create.return_value = mock_response
+
+ provider = AzureOpenAIProvider(
+ api_key="test-key",
+ azure_endpoint="https://test.openai.azure.com",
+ api_version="2025-03-01-preview",
+ deployment_name="gpt-5-codex",
+ )
+
+ # Use alias "codex"
+ result = provider.generate_content(
+ prompt="Test prompt",
+ model_name="codex",
+ temperature=1.0,
+ )
+
+ # Verify API was called with deployment name (not the alias)
+ call_kwargs = mock_client.responses.create.call_args[1]
+ assert call_kwargs["model"] == "gpt-5-codex" # Uses deployment name
+
+ # Verify result uses resolved model name
+ assert result.model_name == "gpt-5-codex"