diff --git a/.env.example b/.env.example index 7d6b3012..74aa4d05 100644 --- a/.env.example +++ b/.env.example @@ -26,6 +26,42 @@ DIAL_API_KEY=your_dial_api_key_here # DIAL_API_HOST=https://core.dialx.ai # Optional: Base URL without /openai suffix (auto-appended) # DIAL_API_VERSION=2025-01-01-preview # Optional: API version header for DIAL requests +# Azure OpenAI (uses Responses API - works with GPT-5 and GPT-5-Codex) +# ============================================================================== +# IMPORTANT: This integration uses the Azure OpenAI Responses API exclusively +# Works with both GPT-5 (general purpose) and GPT-5-Codex (code-specialized) models +# +# How to obtain credentials: +# 1. Log in to Azure Portal (https://portal.azure.com) +# 2. Navigate to your Azure OpenAI resource +# 3. Go to "Keys and Endpoint" section +# 4. Copy the API Key and Endpoint URL +# 5. Note your deployment name (defined when you created the deployment) +# +# All 4 variables below are REQUIRED for Azure OpenAI to work: +AZURE_OPENAI_API_KEY=your_azure_openai_key_here +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ +AZURE_OPENAI_API_VERSION=2025-04-01-preview +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex +# +# Configuration Notes: +# - API Key: Found in Azure Portal > Azure OpenAI > Keys and Endpoint +# - Endpoint: Must include https:// and trailing slash (e.g., https://your-resource.openai.azure.com/) +# - API Version: Must be 2025-03-01-preview or later for Responses API support +# - Deployment Name: Your custom deployment name from Azure Portal (e.g., gpt-5, gpt-5-codex, or any custom name) +# +# Model Information: +# - Deployment name is user-defined in Azure Portal when creating the deployment +# - Common deployment names: gpt-5, gpt-5-codex, or custom names you choose +# - GPT-5: General purpose model (400K context, 128K output, reasoning tokens) +# - GPT-5-Codex: Code-specialized variant with enhanced programming capabilities +# +# Important Constraints: +# - Temperature is fixed at 1.0 for all Azure OpenAI models (cannot be changed) +# - Do NOT use placeholder values like "your_azure_openai_key_here" - they will fail +# - All 4 variables must be set with real values from your Azure deployment +# ============================================================================== + # Option 2: Use OpenRouter for access to multiple models through one API # Get your OpenRouter API key from: https://openrouter.ai/ # If using OpenRouter, comment out the native API keys above diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml new file mode 100644 index 00000000..31c04fdf --- /dev/null +++ b/.github/workflows/claude-code-review.yml @@ -0,0 +1,57 @@ +name: Claude Code Review + +on: + pull_request: + types: [opened, synchronize] + # Optional: Only run on specific file changes + # paths: + # - "src/**/*.ts" + # - "src/**/*.tsx" + # - "src/**/*.js" + # - "src/**/*.jsx" + +jobs: + claude-review: + # Optional: Filter by PR author + # if: | + # github.event.pull_request.user.login == 'external-contributor' || + # github.event.pull_request.user.login == 'new-developer' || + # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' + + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code Review + id: claude-review + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + prompt: | + REPO: ${{ github.repository }} + PR NUMBER: ${{ github.event.pull_request.number }} + + Please review this pull request and provide feedback on: + - Code quality and best practices + - Potential bugs or issues + - Performance considerations + - Security concerns + - Test coverage + + Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback. + + Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR. + + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://docs.claude.com/en/docs/claude-code/sdk#command-line for available options + claude_args: '--allowed-tools "Bash(gh issue view:*),Bash(gh search:*),Bash(gh issue list:*),Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr list:*)"' + diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 00000000..b1a3201d --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,50 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # This is an optional setting that allows Claude to read CI results on PRs + additional_permissions: | + actions: read + + # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. + # prompt: 'Update the pull request description to include a summary of changes.' + + # Optional: Add claude_args to customize behavior and configuration + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://docs.claude.com/en/docs/claude-code/sdk#command-line for available options + # claude_args: '--model claude-opus-4-1-20250805 --allowed-tools Bash(gh pr:*)' + diff --git a/CHANGELOG.md b/CHANGELOG.md index 20498392..bafbce43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ +## v1.0.0 (2025-10-04) + +- Initial Release + ## v5.21.0 (2025-10-03) ### Chores diff --git a/README.md b/README.md index 8f6b1318..5b5805a0 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [zen_web.webm](https://github.com/user-attachments/assets/851e3911-7f06-47c0-a4ab-a2601236697c)
- 🤖 Claude Code OR Gemini CLI OR Codex CLI + [Gemini / OpenAI / Grok / OpenRouter / DIAL / Ollama / Anthropic / Any Model] = Your Ultimate AI Development Team + 🤖 Claude Code OR Gemini CLI OR Codex CLI + [Gemini / OpenAI / Azure OpenAI / Grok / OpenRouter / DIAL / Ollama / Anthropic / Any Model] = Your Ultimate AI Development Team

@@ -85,6 +85,7 @@ For best results, use Claude Code with: - **[OpenRouter](https://openrouter.ai/)** - Access multiple models with one API - **[Gemini](https://makersuite.google.com/app/apikey)** - Google's latest models - **[OpenAI](https://platform.openai.com/api-keys)** - O3, GPT-5 series +- **[Azure OpenAI](https://portal.azure.com/)** - GPT-5, GPT-5-Codex via Responses API - **[X.AI](https://console.x.ai/)** - Grok models - **[DIAL](https://dialx.ai/)** - Vendor-agnostic model access - **[Ollama](https://ollama.ai/)** - Local models (free) @@ -247,8 +248,8 @@ DISABLED_TOOLS= - **[Context revival](docs/context-revival.md)** - Continue conversations even after context resets **Model Support** -- **Multiple providers** - Gemini, OpenAI, X.AI, OpenRouter, DIAL, Ollama -- **Latest models** - GPT-5, Gemini 2.5 Pro, O3, Grok-4, local Llama +- **Multiple providers** - Gemini, OpenAI, Azure OpenAI, X.AI, OpenRouter, DIAL, Ollama +- **Latest models** - GPT-5, GPT-5-Codex, Gemini 2.5 Pro, O3, Grok-4, local Llama - **[Thinking modes](docs/advanced-usage.md#thinking-modes)** - Control reasoning depth vs cost - **Vision support** - Analyze images, diagrams, screenshots @@ -307,6 +308,7 @@ Built with the power of **Multi-Model AI** collaboration 🤝 - [Claude Code](https://claude.ai/code) - Your AI coding orchestrator - [Gemini 2.5 Pro & Flash](https://ai.google.dev/) - Extended thinking & fast analysis - [OpenAI O3 & GPT-5](https://openai.com/) - Strong reasoning & latest capabilities +- [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service) - Enterprise GPT-5 & GPT-5-Codex via Responses API ### Star History diff --git a/config.py b/config.py index 1e5a2b1c..1e3db855 100644 --- a/config.py +++ b/config.py @@ -14,9 +14,9 @@ # These values are used in server responses and for tracking releases # IMPORTANT: This is the single source of truth for version and author info # Semantic versioning: MAJOR.MINOR.PATCH -__version__ = "5.21.0" +__version__ = "1.0.0" # Last update date in ISO format -__updated__ = "2025-10-03" +__updated__ = "2025-10-04" # Primary maintainer __author__ = "Fahad Gilani" diff --git a/docs/AZURE_OPENAI_TROUBLESHOOTING.md b/docs/AZURE_OPENAI_TROUBLESHOOTING.md new file mode 100644 index 00000000..212dec5e --- /dev/null +++ b/docs/AZURE_OPENAI_TROUBLESHOOTING.md @@ -0,0 +1,653 @@ +# Azure OpenAI Troubleshooting Guide + +This guide provides comprehensive troubleshooting information for Azure OpenAI integration with Zen MCP Server. + +## Implementation Overview + +**IMPORTANT:** This implementation uses Azure OpenAI **Responses API** exclusively. + +- Works with both **GPT-5** and **GPT-5-Codex** models +- Uses Responses API (not Chat Completions API) as required by GPT-5-Codex +- Different content extraction methods than standard Chat Completions +- Supports multi-turn conversations with proper session management + +--- + +## Common Issues and Solutions + +### 1. Missing Environment Variables + +**Problem:** Azure OpenAI provider not available or returns configuration errors. + +**Solution:** Ensure all required environment variables are set in your `.env` file: + +```bash +# Required Azure OpenAI Configuration +AZURE_OPENAI_API_KEY=your-api-key-here +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex # or gpt-5 +AZURE_OPENAI_API_VERSION=2025-03-01-preview # Must be 2025-03-01-preview or later +``` + +**Verify configuration:** +```bash +# Check if variables are set +grep "AZURE_OPENAI" .env + +# Expected output should show all four variables with values +``` + +--- + +### 2. Invalid API Key + +**Problem:** Authentication errors when making API calls. + +**Error Message:** +``` +401 Unauthorized: Invalid API key provided +``` + +**Solution:** +1. Verify your API key in Azure Portal: + - Go to Azure Portal → Your Azure OpenAI resource + - Navigate to "Keys and Endpoint" + - Copy either KEY 1 or KEY 2 + - Update `AZURE_OPENAI_API_KEY` in `.env` file + +2. Ensure no extra spaces or quotes in the API key: +```bash +# Correct format +AZURE_OPENAI_API_KEY=abcd1234567890... + +# Incorrect format (no quotes needed) +AZURE_OPENAI_API_KEY="abcd1234567890..." +``` + +--- + +### 3. Wrong Endpoint Format + +**Problem:** Connection errors or invalid endpoint errors. + +**Error Message:** +``` +Invalid URL or endpoint format +``` + +**Solution:** Ensure endpoint follows correct format: + +```bash +# Correct format +AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com + +# Incorrect formats (missing https://) +AZURE_OPENAI_ENDPOINT=your-resource-name.openai.azure.com + +# Incorrect formats (trailing slash) +AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com/ +``` + +**Verify endpoint:** +```bash +# Test endpoint connectivity +curl -I https://your-resource-name.openai.azure.com +``` + +--- + +### 4. Old API Version + +**Problem:** Responses API not available or unsupported API version. + +**Error Message:** +``` +API version not supported or Responses API not available +``` + +**Solution:** Update to required API version: + +```bash +# Required version for Responses API +AZURE_OPENAI_API_VERSION=2025-03-01-preview + +# Older versions NOT supported for Responses API +AZURE_OPENAI_API_VERSION=2024-10-21 # Too old +AZURE_OPENAI_API_VERSION=2024-08-01-preview # Too old +``` + +**Note:** The Responses API requires API version `2025-03-01-preview` or later. Earlier versions only support Chat Completions API. + +--- + +### 5. Deployment Name Mismatch + +**Problem:** Deployment not found or model not available. + +**Error Message:** +``` +404 Not Found: The API deployment for this resource does not exist +``` + +**Solution:** +1. Verify deployment name in Azure Portal: + - Go to Azure Portal → Your Azure OpenAI resource + - Navigate to "Model deployments" + - Copy exact deployment name (case-sensitive) + +2. Update deployment name in `.env`: +```bash +# Use exact deployment name from Azure Portal +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex + +# Common deployment names +# AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5 +# AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex +# AZURE_OPENAI_DEPLOYMENT_NAME=my-gpt5-deployment +``` + +**Verify deployment exists:** +```bash +# List deployments using Azure CLI +az cognitiveservices account deployment list \ + --name your-resource-name \ + --resource-group your-resource-group +``` + +--- + +### 6. Temperature Validation Errors + +**Problem:** Invalid temperature value for GPT-5-Codex. + +**Error Message:** +``` +Temperature must be exactly 1.0 for GPT-5-Codex model +Invalid temperature value: must be 1.0 +``` + +**Solution:** The implementation enforces temperature=1.0 for GPT-5-Codex: + +```python +# Temperature is automatically set to 1.0 for GPT-5-Codex +# No configuration needed - handled internally + +# For other models (if supported later), temperature can vary +# But for GPT-5-Codex: temperature=1.0 is required +``` + +**Note:** This is a GPT-5-Codex requirement enforced by Azure, not a server limitation. + +--- + +### 7. Rate Limiting + +**Problem:** Too many requests or quota exceeded. + +**Error Message:** +``` +429 Too Many Requests: Rate limit exceeded +403 Forbidden: Quota exceeded +``` + +**Solution:** +1. Check your Azure quota: + - Go to Azure Portal → Your Azure OpenAI resource + - Navigate to "Quotas" + - Verify Tokens Per Minute (TPM) limit + +2. Implement retry logic (already built-in): + - The server automatically retries with exponential backoff + - Wait a few moments between requests + +3. Request quota increase: + - Contact Azure support to increase your TPM quota + - Upgrade to higher tier if available + +--- + +## Responses API Specific Issues + +### Understanding Responses API + +**Key Differences from Chat Completions API:** + +1. **Endpoint URL:** + ```bash + # Responses API (what we use) + POST https://{resource}.openai.azure.com/openai/deployments/{deployment}/responses + + # Chat Completions API (NOT used) + POST https://{resource}.openai.azure.com/openai/deployments/{deployment}/chat/completions + ``` + +2. **Content Extraction:** + ```python + # Responses API - two possible formats + # Format 1: output_text field + content = response_data.get("output_text", "") + + # Format 2: output array + output = response_data.get("output", []) + if output and len(output) > 0: + content = output[0].get("content", "") + ``` + +3. **Required Models:** + - GPT-5-Codex: **Requires** Responses API + - GPT-5: Works with Responses API + +### Responses API Error Handling + +**Problem:** Empty or missing response content. + +**Solution:** The implementation handles multiple content extraction methods: + +```python +# Check multiple possible response formats +# 1. Try output_text field +# 2. Try output array +# 3. Try choices array (fallback) +# 4. Return error if none found +``` + +If you see empty responses, check server logs: +```bash +tail -n 100 logs/mcp_server.log | grep "Azure OpenAI" +``` + +--- + +## Configuration Validation + +### Verify Azure Credentials + +**Step 1: Check environment variables** +```bash +# View current configuration +grep "AZURE_OPENAI" .env + +# Expected output: +# AZURE_OPENAI_API_KEY=sk-... +# AZURE_OPENAI_ENDPOINT=https://... +# AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex +# AZURE_OPENAI_API_VERSION=2025-03-01-preview +``` + +**Step 2: Test API key validity** +```bash +# Using curl to test authentication +curl -X POST "https://your-resource.openai.azure.com/openai/deployments/gpt-5-codex/responses?api-version=2025-03-01-preview" \ + -H "api-key: your-api-key-here" \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [{"role": "user", "content": "test"}], + "temperature": 1.0 + }' +``` + +### Test Endpoint Connectivity + +**Check DNS resolution:** +```bash +# Verify endpoint resolves +nslookup your-resource.openai.azure.com +``` + +**Check network connectivity:** +```bash +# Test HTTPS connection +curl -I https://your-resource.openai.azure.com +``` + +**Expected response:** +``` +HTTP/2 401 +# 401 is expected without API key - confirms endpoint is reachable +``` + +### Verify Deployment Exists + +**Using Azure CLI:** +```bash +# List all deployments +az cognitiveservices account deployment list \ + --name your-resource-name \ + --resource-group your-resource-group \ + --query "[].{name:name, model:properties.model.name}" \ + --output table + +# Expected output: +# Name Model +# ---------------- ------------- +# gpt-5-codex gpt-5-codex +``` + +**Using Azure Portal:** +1. Navigate to your Azure OpenAI resource +2. Click "Model deployments" +3. Verify deployment name and model + +### Verify API Version Support + +**Check supported API versions:** +```bash +# List available API versions for your resource +az cognitiveservices account show \ + --name your-resource-name \ + --resource-group your-resource-group \ + --query "properties.capabilities" +``` + +**Ensure using latest version:** +- API version must be `2025-03-01-preview` or later +- Older versions do not support Responses API + +--- + +## Common Error Messages + +### Authentication Errors + +**Error:** `401 Unauthorized` +```json +{ + "error": { + "code": "401", + "message": "Access denied due to invalid subscription key or wrong API endpoint." + } +} +``` + +**Solutions:** +1. Verify `AZURE_OPENAI_API_KEY` is correct +2. Check API key is not expired +3. Ensure using correct endpoint +4. Regenerate API key if needed + +--- + +### API Version Errors + +**Error:** `API version not supported` +```json +{ + "error": { + "code": "InvalidApiVersion", + "message": "The requested API version is not supported." + } +} +``` + +**Solutions:** +1. Update `AZURE_OPENAI_API_VERSION=2025-03-01-preview` +2. Verify your resource supports this API version +3. Check Azure region availability + +--- + +### Deployment Not Found Errors + +**Error:** `404 Not Found` +```json +{ + "error": { + "code": "DeploymentNotFound", + "message": "The API deployment for this resource does not exist." + } +} +``` + +**Solutions:** +1. Verify deployment name is correct (case-sensitive) +2. Check deployment exists in Azure Portal +3. Ensure deployment is in "Succeeded" state +4. Verify using correct resource/endpoint + +--- + +### Temperature Constraint Errors + +**Error:** `Invalid temperature value` +```json +{ + "error": { + "code": "InvalidParameter", + "message": "Temperature must be exactly 1.0 for GPT-5-Codex model." + } +} +``` + +**Solutions:** +- This is enforced by Azure for GPT-5-Codex +- The implementation automatically sets temperature=1.0 +- If you see this error, check server logs for configuration issues + +--- + +### Content Extraction Errors + +**Error:** Empty response or missing content + +**Symptoms:** +- Tool returns empty string +- No visible output from model +- Logs show successful API call but no content + +**Solutions:** +1. Check server logs for response format: +```bash +tail -n 200 logs/mcp_server.log | grep "Azure OpenAI response" +``` + +2. Verify Responses API is being used (not Chat Completions): +```bash +grep "responses?" logs/mcp_server.log +``` + +3. Check for multiple content extraction attempts in logs + +--- + +## Testing and Validation + +### Run Integration Tests + +**Test Azure OpenAI provider:** +```bash +# Run integration tests (requires API keys) +./run_integration_tests.sh + +# Run specific Azure OpenAI tests +python -m pytest tests/ -v -k "azure" -m integration +``` + +**Expected output:** +``` +tests/test_azure_openai_integration.py::test_azure_provider_registration PASSED +tests/test_azure_openai_integration.py::test_azure_api_call PASSED +tests/test_azure_openai_integration.py::test_azure_responses_api PASSED +``` + +### Check Server Logs + +**View recent Azure activity:** +```bash +# Filter for Azure OpenAI logs +tail -n 500 logs/mcp_server.log | grep -i "azure" + +# View tool activity +tail -n 100 logs/mcp_activity.log + +# Follow logs in real-time +tail -f logs/mcp_server.log +``` + +**Look for:** +- Provider registration confirmation +- API call attempts +- Response format handling +- Error messages + +### Verify Provider Registration + +**Check provider availability:** +```bash +# Start server and check logs +./run-server.sh + +# Look for registration message +grep "Azure OpenAI provider registered" logs/mcp_server.log +``` + +**Expected log entry:** +``` +INFO: Azure OpenAI provider registered successfully +INFO: Deployment: gpt-5-codex +INFO: API Version: 2025-03-01-preview +``` + +### Manual API Testing + +**Test Responses API directly:** +```bash +# Create test script +cat > test_azure.sh << 'EOF' +#!/bin/bash +source .env + +curl -X POST "${AZURE_OPENAI_ENDPOINT}/openai/deployments/${AZURE_OPENAI_DEPLOYMENT_NAME}/responses?api-version=${AZURE_OPENAI_API_VERSION}" \ + -H "api-key: ${AZURE_OPENAI_API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [ + {"role": "user", "content": "Say hello in one word"} + ], + "temperature": 1.0, + "max_tokens": 10 + }' +EOF + +chmod +x test_azure.sh +./test_azure.sh +``` + +**Expected response:** +```json +{ + "output_text": "Hello", + "usage": { + "prompt_tokens": 12, + "completion_tokens": 1, + "total_tokens": 13 + } +} +``` + +--- + +## Advanced Troubleshooting + +### Enable Debug Logging + +**Increase log verbosity:** +```bash +# Set debug level in environment +export LOG_LEVEL=DEBUG + +# Restart server +./run-server.sh + +# View detailed logs +tail -f logs/mcp_server.log +``` + +### Network Diagnostics + +**Check firewall rules:** +```bash +# Test connectivity to Azure endpoint +telnet your-resource.openai.azure.com 443 + +# Check SSL certificate +openssl s_client -connect your-resource.openai.azure.com:443 +``` + +**Verify DNS:** +```bash +# Check DNS resolution +dig your-resource.openai.azure.com + +# Alternative DNS check +host your-resource.openai.azure.com +``` + +### Analyze Request/Response + +**Enable request logging:** +```python +# In providers/azure_openai_provider.py +# Temporarily add debug prints to see full request/response + +logger.debug(f"Request URL: {url}") +logger.debug(f"Request headers: {headers}") +logger.debug(f"Request body: {json.dumps(payload, indent=2)}") +logger.debug(f"Response status: {response.status_code}") +logger.debug(f"Response body: {response.text}") +``` + +**Check cassette recordings:** +```bash +# View recorded API interactions +ls -la tests/cassettes/azure_*.yaml +``` + +--- + +## Additional Resources + +### Azure Documentation + +- [Azure OpenAI Service Documentation](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/) +- [Responses API Reference](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference#responses-api) +- [GPT-5-Codex Model Details](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models#gpt-5-codex) +- [API Version Support](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/api-version-deprecation) + +### Project Documentation + +- Main README: `README.md` +- Development Guide: `CLAUDE.md` +- Integration Tests: `tests/test_azure_openai_integration.py` +- Provider Implementation: `providers/azure_openai.py` + +### Getting Help + +1. **Check server logs:** `tail -n 500 logs/mcp_server.log` +2. **Run diagnostics:** `./run_integration_tests.sh` +3. **Review Azure Portal:** Verify configuration and quotas +4. **Contact Azure Support:** For Azure-specific issues +5. **GitHub Issues:** Report bugs or request features + +--- + +## Summary Checklist + +Before opening an issue, verify: + +- [ ] All environment variables set correctly in `.env` +- [ ] API key is valid and not expired +- [ ] Endpoint format is correct (https://...) +- [ ] API version is `2025-03-01-preview` or later +- [ ] Deployment name matches Azure Portal exactly +- [ ] Deployment is in "Succeeded" state +- [ ] Quota/rate limits not exceeded +- [ ] Network connectivity to Azure endpoint +- [ ] Server logs checked for specific errors +- [ ] Integration tests run successfully + +--- + +**Last Updated:** 2025-10-03 +**API Version Required:** 2025-03-01-preview or later +**Supported Models:** GPT-5, GPT-5-Codex +**Implementation:** Azure OpenAI Responses API diff --git a/docs/configuration.md b/docs/configuration.md index 12e9d655..b2e6be1b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -30,10 +30,17 @@ OPENAI_API_KEY=your-openai-key GEMINI_API_KEY=your_gemini_api_key_here # Get from: https://makersuite.google.com/app/apikey -# OpenAI API +# OpenAI API OPENAI_API_KEY=your_openai_api_key_here # Get from: https://platform.openai.com/api-keys +# Azure OpenAI API (Responses API - supports GPT-5 and GPT-5-Codex) +AZURE_OPENAI_API_KEY=your_azure_openai_api_key_here +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ +AZURE_OPENAI_API_VERSION=2025-04-01-preview +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex +# Get from: https://portal.azure.com/ (Keys and Endpoint section) + # X.AI GROK API XAI_API_KEY=your_xai_api_key_here # Get from: https://console.x.ai/ @@ -59,6 +66,57 @@ CUSTOM_MODEL_NAME=llama3.2 # Default model - Use standard localhost URLs since the server runs natively - Example: `http://localhost:11434/v1` for Ollama +### Azure OpenAI Configuration + +Azure OpenAI integration uses the **Responses API** exclusively, supporting both GPT-5 and GPT-5-Codex models with enterprise-grade features. + +**Setup Steps:** + +1. **Create Azure OpenAI Resource:** + - Navigate to [Azure Portal](https://portal.azure.com/) + - Create or select an Azure OpenAI resource + - Deploy a GPT-5 or GPT-5-Codex model + +2. **Get Credentials:** + - Go to your Azure OpenAI resource + - Navigate to "Keys and Endpoint" section + - Copy the API key and endpoint URL + +3. **Configure Environment Variables:** + ```env + # Required for Azure OpenAI + AZURE_OPENAI_API_KEY=your_api_key_from_azure + AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ + AZURE_OPENAI_API_VERSION=2025-04-01-preview + AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex + ``` + +**Supported Models:** +- **`gpt-5`** - Intelligence score 16, 400K context window, 128K max output tokens +- **`gpt-5-codex`** - Intelligence score 17, specialized for code generation and analysis + +**Key Features:** +- **Responses API Implementation** - Uses Azure's Responses API (not Chat Completions API) +- **Extended Thinking Support** - Full support for extended reasoning capabilities +- **Deployment-Based Routing** - Routes requests through deployment names rather than model names +- **Large Context Windows** - 400K token context, 128K token output capacity +- **Temperature Constraint** - Temperature is fixed at 1.0 (cannot be adjusted) + +**Important Notes:** +- Azure OpenAI requires all 4 environment variables to be configured +- The deployment name must match your Azure deployment (not the model name directly) +- Temperature is always set to 1.0 and cannot be modified +- Uses deployment-based routing: requests go to your specific deployment endpoint + +**Example Configuration:** +```env +# Example Azure OpenAI setup for GPT-5-Codex +AZURE_OPENAI_API_KEY=abc123def456ghi789jkl012mno345pqr +AZURE_OPENAI_ENDPOINT=https://my-company-openai.openai.azure.com/ +AZURE_OPENAI_API_VERSION=2025-04-01-preview +AZURE_OPENAI_DEPLOYMENT_NAME=my-gpt5-codex-deployment +``` + ### Model Configuration **Default Model Selection:** @@ -70,10 +128,12 @@ DEFAULT_MODEL=auto # Claude picks best model for each task (recommended) **Available Models:** - **`auto`**: Claude automatically selects the optimal model - **`pro`** (Gemini 2.5 Pro): Extended thinking, deep analysis -- **`flash`** (Gemini 2.0 Flash): Ultra-fast responses +- **`flash`** (Gemini 2.0 Flash): Ultra-fast responses - **`o3`**: Strong logical reasoning (200K context) - **`o3-mini`**: Balanced speed/quality (200K context) - **`o4-mini`**: Latest reasoning model, optimized for shorter contexts +- **`gpt-5`**: Azure OpenAI GPT-5 via Responses API (400K context, 128K output) +- **`gpt-5-codex`**: Azure OpenAI GPT-5-Codex specialized for code (400K context, 128K output) - **`grok-3`**: GROK-3 advanced reasoning (131K context) - **`grok-4-latest`**: GROK-4 latest flagship model (256K context) - **Custom models**: via OpenRouter or local APIs @@ -190,6 +250,18 @@ LOG_LEVEL=DEBUG CONVERSATION_TIMEOUT_HOURS=1 ``` +### Azure OpenAI Setup +```env +# Azure OpenAI with GPT-5-Codex +DEFAULT_MODEL=auto +AZURE_OPENAI_API_KEY=your-azure-key +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ +AZURE_OPENAI_API_VERSION=2025-04-01-preview +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex +LOG_LEVEL=INFO +CONVERSATION_TIMEOUT_HOURS=3 +``` + ### Production Setup ```env # Production with cost controls diff --git a/providers/azure_openai.py b/providers/azure_openai.py new file mode 100644 index 00000000..08d603f8 --- /dev/null +++ b/providers/azure_openai.py @@ -0,0 +1,473 @@ +"""Azure OpenAI model provider implementation using Responses API. + +IMPORTANT: This implementation uses Azure OpenAI's **Responses API** exclusively, +which works with both **GPT-5** and **GPT-5-Codex** models, as well as O3 reasoning +models and GPT-4.1. The Responses API is required for GPT-5-Codex and provides +consistent behavior across all Azure OpenAI models. + +This provider supports Azure OpenAI deployments using the Responses API format, +which is required for advanced models like gpt-5, gpt-5-codex, gpt-5-mini, +gpt-5-nano, o3-mini, and gpt-4.1. +""" + +import logging +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from tools.models import ToolModelCategory + +from openai import AzureOpenAI + +from .base import ModelProvider +from .shared import ModelCapabilities, ModelResponse, ProviderType, TemperatureConstraint + +logger = logging.getLogger(__name__) + + +class AzureOpenAIProvider(ModelProvider): + """Azure OpenAI provider using Responses API. + + IMPORTANT: This implementation uses Azure OpenAI's **Responses API** exclusively, + which works with both **GPT-5** and **GPT-5-Codex** models, as well as all variants + (gpt-5-mini, gpt-5-nano), O3 reasoning models (o3-mini), and GPT-4.1. The Responses + API is required for GPT-5-Codex and provides consistent behavior across all Azure + OpenAI models. + + This provider connects to Azure OpenAI deployments and uses the Responses API + (client.responses.create) instead of the Chat Completions API. This is required + for certain advanced models like gpt-5-codex and provides extended reasoning + capabilities for gpt-5, gpt-5-mini, and o3-mini. + + Supported Models: + - gpt-5: Advanced reasoning model (400K context, 128K output) + - gpt-5-codex: Elite code generation (400K context, 128K output) + - gpt-5-mini: Faster, cost-effective variant (400K context, 128K output) + - gpt-5-nano: Fastest, most cost-effective (400K context, 128K output) + - o3-mini: Strong reasoning model (200K context, 64K output) + - gpt-4.1: Extended context window (1M context, 32K output) + + Configuration: + - api_key: Azure OpenAI API key + - azure_endpoint: Azure OpenAI endpoint URL + - api_version: API version (must be 2025-03-01-preview or later) + - deployment_name: The deployment name to use (e.g., "gpt-5", "gpt-5-codex") + """ + + # Model configurations using ModelCapabilities objects + MODEL_CAPABILITIES = { + "gpt-5": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="gpt-5", + friendly_name="Azure OpenAI (GPT-5)", + intelligence_score=16, + context_window=400_000, # 400K tokens + max_output_tokens=128_000, # 128K max output tokens + supports_extended_thinking=True, # Supports reasoning tokens + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=True, # GPT-5 supports vision + max_image_size_mb=20.0, # 20MB per OpenAI docs + supports_temperature=True, + temperature_constraint=TemperatureConstraint.create("range"), + description="Azure GPT-5 (400K context, 128K output) - Advanced reasoning model with extended thinking", + aliases=["gpt5", "azure-gpt5", "azure-gpt-5"], + ), + "gpt-5-codex": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="gpt-5-codex", + friendly_name="Azure OpenAI (GPT-5 Codex)", + intelligence_score=17, + context_window=400_000, # 400K tokens + max_output_tokens=128_000, # 128K max output tokens + supports_extended_thinking=True, # Codex supports advanced reasoning + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=False, # Codex is code-focused + max_image_size_mb=0.0, + supports_temperature=False, # Requires fixed temperature=1.0 + temperature_constraint=TemperatureConstraint.create("fixed"), + description="Azure GPT-5 Codex (400K context, 128K output) - Elite code generation with deep reasoning (temperature=1.0 required)", + aliases=["gpt5-codex", "gpt5codex", "codex", "azure-codex", "azure-gpt5-codex"], + ), + "gpt-5-mini": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="gpt-5-mini", + friendly_name="Azure OpenAI (GPT-5 Mini)", + intelligence_score=14, + context_window=400_000, # 400K tokens + max_output_tokens=128_000, # 128K max output tokens + supports_extended_thinking=True, # Supports reasoning tokens + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=True, # GPT-5 variants support vision + max_image_size_mb=20.0, # 20MB per OpenAI docs + supports_temperature=True, + temperature_constraint=TemperatureConstraint.create("range"), + description="Azure GPT-5-Mini - Faster, cost-effective variant", + aliases=["gpt5-mini", "gpt5mini", "mini", "azure-mini"], + ), + "gpt-5-nano": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="gpt-5-nano", + friendly_name="Azure OpenAI (GPT-5 Nano)", + intelligence_score=12, + context_window=400_000, # 400K tokens + max_output_tokens=128_000, # 128K max output tokens + supports_extended_thinking=False, # Nano does not support extended thinking + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=True, # GPT-5 variants support vision + max_image_size_mb=20.0, # 20MB per OpenAI docs + supports_temperature=True, + temperature_constraint=TemperatureConstraint.create("range"), + description="Azure GPT-5-Nano - Fastest, most cost-effective", + aliases=["gpt5-nano", "gpt5nano", "nano", "azure-nano"], + ), + "o3-mini": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="o3-mini", + friendly_name="Azure OpenAI (O3 Mini)", + intelligence_score=15, + context_window=200_000, # 200K tokens + max_output_tokens=64_000, # 64K max output tokens + supports_extended_thinking=True, # O3 supports advanced reasoning + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=False, # O3 is reasoning-focused, not vision + max_image_size_mb=0.0, + supports_temperature=False, # Reasoning model requires fixed temperature=1.0 + temperature_constraint=TemperatureConstraint.create("fixed"), + description="Azure O3-Mini - Strong reasoning model (temperature=1.0 required)", + aliases=["o3mini", "azure-o3-mini"], + ), + "gpt-4.1": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="gpt-4.1", + friendly_name="Azure OpenAI (GPT-4.1)", + intelligence_score=14, + context_window=1_000_000, # 1M tokens + max_output_tokens=32_000, # 32K max output tokens + supports_extended_thinking=False, # GPT-4.1 does not support extended thinking + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=True, # GPT-4.1 supports vision + max_image_size_mb=20.0, # 20MB per OpenAI docs + supports_temperature=True, + temperature_constraint=TemperatureConstraint.create("range"), + description="Azure GPT-4.1 - Extended context window", + aliases=["gpt4.1", "azure-gpt4.1"], + ), + } + + def __init__(self, api_key: str, **kwargs): + """Initialize Azure OpenAI provider. + + Args: + api_key: Azure OpenAI API key + **kwargs: Additional configuration including: + - azure_endpoint: Azure OpenAI endpoint URL (required) + - api_version: API version (required, must be 2025-03-01-preview or later) + - deployment_name: Deployment name (required) + + Raises: + ValueError: If required configuration is missing + """ + super().__init__(api_key, **kwargs) + + # Validate required kwargs + self.azure_endpoint = kwargs.get("azure_endpoint") + self.api_version = kwargs.get("api_version") + self.deployment_name = kwargs.get("deployment_name") + + if not self.azure_endpoint: + raise ValueError("azure_endpoint is required for Azure OpenAI provider") + if not self.api_version: + raise ValueError("api_version is required for Azure OpenAI provider") + if not self.deployment_name: + raise ValueError("deployment_name is required for Azure OpenAI provider") + + # Validate API version supports Responses API + if self.api_version < "2025-03-01-preview": + logger.warning( + f"API version {self.api_version} may not support Responses API. " + "Recommended: 2025-03-01-preview or later" + ) + + # Lazy client initialization + self._client: Optional[AzureOpenAI] = None + + logger.info( + f"Initialized Azure OpenAI provider: endpoint={self.azure_endpoint}, " + f"deployment={self.deployment_name}, api_version={self.api_version}" + ) + + def _get_client(self) -> AzureOpenAI: + """Get or create the Azure OpenAI client (lazy initialization).""" + if self._client is None: + self._client = AzureOpenAI( + api_key=self.api_key, + azure_endpoint=self.azure_endpoint, + api_version=self.api_version, + ) + logger.debug("Created Azure OpenAI client") + return self._client + + def get_provider_type(self) -> ProviderType: + """Get the provider type.""" + return ProviderType.AZURE + + def generate_content( + self, + prompt: str, + model_name: str, + system_prompt: Optional[str] = None, + temperature: float = 0.3, + max_output_tokens: Optional[int] = None, + **kwargs, + ) -> ModelResponse: + """Generate content using Azure OpenAI Responses API. + + Args: + prompt: User prompt/message + model_name: Model name (will be resolved to deployment) + system_prompt: Optional system prompt + temperature: Temperature parameter (default 0.3) + max_output_tokens: Maximum output tokens + **kwargs: Additional parameters + + Returns: + ModelResponse with generated content and usage data + + Raises: + ValueError: If model is not supported + Exception: If API call fails + """ + # Resolve model name and get capabilities + resolved_model = self._resolve_model_name(model_name) + capabilities = self.get_capabilities(resolved_model) + + # Validate parameters + self.validate_parameters(resolved_model, temperature, **kwargs) + + # Build input messages in Responses API format + input_messages = [] + if system_prompt: + input_messages.append({"role": "system", "content": system_prompt}) + input_messages.append({"role": "user", "content": prompt}) + + # Prepare API parameters + api_params = { + "model": self.deployment_name, + "input": input_messages, + } + + # Add max_output_tokens if specified + if max_output_tokens: + api_params["max_output_tokens"] = max_output_tokens + elif capabilities.max_output_tokens: + api_params["max_output_tokens"] = capabilities.max_output_tokens + + # Add temperature if model supports it + if capabilities.supports_temperature: + api_params["temperature"] = temperature + + logger.debug( + f"Azure OpenAI Responses API request: deployment={self.deployment_name}, " + f"model={resolved_model}, max_tokens={api_params.get('max_output_tokens')}" + ) + + try: + # Get client and make API call + client = self._get_client() + response = client.responses.create(**api_params) + + # Extract content from response + content = self._extract_content(response) + + # Extract usage data + usage = self._extract_usage(response) + + # Build ModelResponse + model_response = ModelResponse( + content=content, + usage=usage, + model_name=resolved_model, + friendly_name=capabilities.friendly_name, + provider=ProviderType.AZURE, + metadata={ + "response_id": response.id if hasattr(response, "id") else None, + "status": response.status if hasattr(response, "status") else None, + "deployment_name": self.deployment_name, + }, + ) + + logger.debug( + f"Azure OpenAI response: tokens={usage.get('total_tokens', 0)}, " + f"status={response.status if hasattr(response, 'status') else 'N/A'}" + ) + + return model_response + + except Exception as exc: + logger.error(f"Azure OpenAI API error: {exc}", exc_info=True) + raise + + def _extract_content(self, response) -> str: + """Extract text content from Responses API response. + + The Responses API returns content in different formats: + 1. output_text: Condensed text representation (preferred) + 2. output array: Array of output items (text, reasoning, etc.) + + Args: + response: API response object + + Returns: + Extracted text content + + Raises: + ValueError: If no content can be extracted + """ + # Try output_text first (condensed representation) + if hasattr(response, "output_text") and response.output_text: + logger.debug("Extracted content from output_text") + return response.output_text + + # Parse output array for text items + if hasattr(response, "output") and response.output: + text_parts = [] + + for item in response.output: + item_type = getattr(item, "type", None) + + if item_type == "text" or item_type == "message": + # Text output item + if hasattr(item, "content") and item.content: + if isinstance(item.content, list) and len(item.content) > 0: + # Content is a list of text parts + text_parts.append(item.content[0].text) + elif isinstance(item.content, str): + # Content is a string + text_parts.append(item.content) + elif hasattr(item, "text"): + # Direct text attribute + text_parts.append(item.text) + + elif item_type == "reasoning": + # Reasoning output (optional: include summary) + if hasattr(item, "summary") and item.summary: + logger.debug(f"Reasoning summary: {item.summary}") + # Optionally include reasoning in output + # text_parts.append(f"[Reasoning: {item.summary}]") + + if text_parts: + content = "\n".join(text_parts) + logger.debug(f"Extracted content from output array ({len(text_parts)} parts)") + return content + + # No content found + logger.warning("No content found in response") + raise ValueError("No content available in response") + + def _extract_usage(self, response) -> dict[str, int]: + """Extract token usage from Responses API response. + + Args: + response: API response object + + Returns: + Dictionary with token usage (input_tokens, output_tokens, total_tokens) + """ + usage = {} + + if hasattr(response, "usage") and response.usage: + usage_obj = response.usage + + # Extract input tokens + if hasattr(usage_obj, "input_tokens"): + usage["input_tokens"] = usage_obj.input_tokens + usage["prompt_tokens"] = usage_obj.input_tokens + elif hasattr(usage_obj, "prompt_tokens"): + usage["prompt_tokens"] = usage_obj.prompt_tokens + usage["input_tokens"] = usage_obj.prompt_tokens + + # Extract output tokens + if hasattr(usage_obj, "output_tokens"): + usage["output_tokens"] = usage_obj.output_tokens + usage["completion_tokens"] = usage_obj.output_tokens + elif hasattr(usage_obj, "completion_tokens"): + usage["completion_tokens"] = usage_obj.completion_tokens + usage["output_tokens"] = usage_obj.completion_tokens + + # Extract total tokens + if hasattr(usage_obj, "total_tokens"): + usage["total_tokens"] = usage_obj.total_tokens + else: + # Calculate total if not provided + input_tokens = usage.get("input_tokens", 0) + output_tokens = usage.get("output_tokens", 0) + usage["total_tokens"] = input_tokens + output_tokens + + logger.debug(f"Token usage: {usage}") + + return usage + + def close(self) -> None: + """Clean up resources.""" + if self._client is not None: + # AzureOpenAI client doesn't require explicit cleanup + self._client = None + logger.debug("Closed Azure OpenAI client") + + def get_preferred_model(self, category: "ToolModelCategory", allowed_models: list[str]) -> Optional[str]: + """Get Azure's preferred model for a given category from allowed models. + + Args: + category: The tool category requiring a model + allowed_models: Pre-filtered list of models allowed by restrictions + + Returns: + Preferred model name or None + """ + from tools.models import ToolModelCategory + + if not allowed_models: + return None + + # Helper to find first available from preference list + def find_first(preferences: list[str]) -> Optional[str]: + """Return first available model from preference list.""" + for model in preferences: + if model in allowed_models: + return model + return None + + if category == ToolModelCategory.EXTENDED_REASONING: + # Prefer models with extended thinking support + # Order: gpt-5-codex > o3-mini > gpt-5 > gpt-5-mini + preferred = find_first(["gpt-5-codex", "o3-mini", "gpt-5", "gpt-5-mini"]) + return preferred if preferred else allowed_models[0] + + elif category == ToolModelCategory.FAST_RESPONSE: + # Prefer faster models with good performance + # Order: gpt-5-mini > gpt-5-nano > gpt-5 > gpt-4.1 + preferred = find_first(["gpt-5-mini", "gpt-5-nano", "gpt-5", "gpt-4.1"]) + return preferred if preferred else allowed_models[0] + + else: # BALANCED or default + # Prefer gpt-5-codex for code tasks, then balanced options + # Order: gpt-5-codex > gpt-5 > gpt-5-mini > o3-mini > gpt-4.1 > gpt-5-nano + preferred = find_first(["gpt-5-codex", "gpt-5", "gpt-5-mini", "o3-mini", "gpt-4.1", "gpt-5-nano"]) + return preferred if preferred else allowed_models[0] diff --git a/providers/registry.py b/providers/registry.py index 6f412ff7..4fd2fe1f 100644 --- a/providers/registry.py +++ b/providers/registry.py @@ -37,6 +37,7 @@ class ModelProviderRegistry: PROVIDER_PRIORITY_ORDER = [ ProviderType.GOOGLE, # Direct Gemini access ProviderType.OPENAI, # Direct OpenAI access + ProviderType.AZURE, # Azure OpenAI access ProviderType.XAI, # Direct X.AI GROK access ProviderType.DIAL, # DIAL unified API access ProviderType.CUSTOM, # Local/self-hosted models @@ -122,6 +123,24 @@ def get_provider(cls, provider_type: ProviderType, force_new: bool = False) -> O provider_kwargs["base_url"] = gemini_base_url logging.info(f"Initialized Gemini provider with custom endpoint: {gemini_base_url}") provider = provider_class(**provider_kwargs) + elif provider_type == ProviderType.AZURE: + # For Azure OpenAI, check required configuration + if not api_key: + return None + azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + azure_version = os.getenv("AZURE_OPENAI_API_VERSION", "2025-04-01-preview") + deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME") + + if not azure_endpoint or not deployment_name: + logging.warning("Azure OpenAI requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_DEPLOYMENT_NAME") + return None + + provider = provider_class( + api_key=api_key, + azure_endpoint=azure_endpoint, + api_version=azure_version, + deployment_name=deployment_name, + ) else: if not api_key: return None @@ -265,6 +284,7 @@ def _get_api_key_for_provider(cls, provider_type: ProviderType) -> Optional[str] ProviderType.OPENROUTER: "OPENROUTER_API_KEY", ProviderType.CUSTOM: "CUSTOM_API_KEY", # Can be empty for providers that don't need auth ProviderType.DIAL: "DIAL_API_KEY", + ProviderType.AZURE: "AZURE_OPENAI_API_KEY", } env_var = key_mapping.get(provider_type) diff --git a/providers/shared/provider_type.py b/providers/shared/provider_type.py index 44153f0a..8db043b3 100644 --- a/providers/shared/provider_type.py +++ b/providers/shared/provider_type.py @@ -12,5 +12,6 @@ class ProviderType(Enum): OPENAI = "openai" XAI = "xai" OPENROUTER = "openrouter" + AZURE = "azure" CUSTOM = "custom" DIAL = "dial" diff --git a/pyproject.toml b/pyproject.toml index 74cf6091..e3e9bfd3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "zen-mcp-server" -version = "5.21.0" +version = "1.0.0" description = "AI-powered MCP server with multiple model providers" requires-python = ">=3.9" dependencies = [ diff --git a/server.py b/server.py index a8bf47e2..f5aa5461 100644 --- a/server.py +++ b/server.py @@ -412,6 +412,7 @@ def configure_providers(): value = os.getenv(key) logger.debug(f" {key}: {'[PRESENT]' if value else '[MISSING]'}") from providers import ModelProviderRegistry + from providers.azure_openai import AzureOpenAIProvider from providers.custom import CustomProvider from providers.dial import DIALModelProvider from providers.gemini import GeminiModelProvider @@ -453,6 +454,17 @@ def configure_providers(): has_native_apis = True logger.info("X.AI API key found - GROK models available") + # Check for Azure OpenAI API key + azure_key = os.getenv("AZURE_OPENAI_API_KEY") + azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME") + if azure_key and azure_endpoint and azure_deployment: + if (azure_key != "your_azure_openai_key_here" and + azure_endpoint != "https://your-resource.openai.azure.com/"): + valid_providers.append("Azure OpenAI") + has_native_apis = True + logger.info(f"Azure OpenAI found - deployment: {azure_deployment}") + # Check for DIAL API key dial_key = os.getenv("DIAL_API_KEY") if dial_key and dial_key != "your_dial_api_key_here": @@ -497,6 +509,10 @@ def configure_providers(): ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) if openai_key and openai_key != "your_openai_api_key_here": ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) + if azure_key and azure_endpoint and azure_deployment: + if (azure_key != "your_azure_openai_key_here" and + azure_endpoint != "https://your-resource.openai.azure.com/"): + ModelProviderRegistry.register_provider(ProviderType.AZURE, AzureOpenAIProvider) if xai_key and xai_key != "your_xai_api_key_here": ModelProviderRegistry.register_provider(ProviderType.XAI, XAIModelProvider) if dial_key and dial_key != "your_dial_api_key_here": @@ -522,6 +538,7 @@ def custom_provider_factory(api_key=None): "At least one API configuration is required. Please set either:\n" "- GEMINI_API_KEY for Gemini models\n" "- OPENAI_API_KEY for OpenAI models\n" + "- AZURE_OPENAI_API_KEY + AZURE_OPENAI_ENDPOINT + AZURE_OPENAI_DEPLOYMENT_NAME for Azure OpenAI models\n" "- XAI_API_KEY for X.AI GROK models\n" "- DIAL_API_KEY for DIAL models\n" "- OPENROUTER_API_KEY for OpenRouter (multiple models)\n" diff --git a/tests/test_azure_openai_provider.py b/tests/test_azure_openai_provider.py new file mode 100644 index 00000000..cbb254da --- /dev/null +++ b/tests/test_azure_openai_provider.py @@ -0,0 +1,747 @@ +"""Tests for Azure OpenAI provider implementation using Responses API.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from providers.azure_openai import AzureOpenAIProvider +from providers.shared import ProviderType + + +class TestAzureOpenAIProvider: + """Test Azure OpenAI provider functionality.""" + + def setup_method(self): + """Set up clean state before each test.""" + # Clear restriction service cache before each test + import utils.model_restrictions + + utils.model_restrictions._restriction_service = None + + def teardown_method(self): + """Clean up after each test to avoid singleton issues.""" + # Clear restriction service cache after each test + import utils.model_restrictions + + utils.model_restrictions._restriction_service = None + + def test_initialization_success(self): + """Test successful provider initialization with all required parameters.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + assert provider.api_key == "test-key" + assert provider.azure_endpoint == "https://test.openai.azure.com" + assert provider.api_version == "2025-03-01-preview" + assert provider.deployment_name == "gpt-5" + assert provider.get_provider_type() == ProviderType.AZURE + + def test_initialization_missing_azure_endpoint(self): + """Test initialization fails without azure_endpoint.""" + with pytest.raises(ValueError, match="azure_endpoint is required"): + AzureOpenAIProvider( + api_key="test-key", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + def test_initialization_missing_api_version(self): + """Test initialization fails without api_version.""" + with pytest.raises(ValueError, match="api_version is required"): + AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + deployment_name="gpt-5", + ) + + def test_initialization_missing_deployment_name(self): + """Test initialization fails without deployment_name.""" + with pytest.raises(ValueError, match="deployment_name is required"): + AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + ) + + def test_initialization_old_api_version_warning(self): + """Test warning is logged for API versions older than 2025-03-01-preview.""" + with patch("providers.azure_openai.logger") as mock_logger: + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2024-06-01", + deployment_name="gpt-5", + ) + + # Verify provider was created and warning was logged + assert provider is not None + mock_logger.warning.assert_called_once() + warning_message = mock_logger.warning.call_args[0][0] + assert "may not support Responses API" in warning_message + + def test_model_validation_gpt5(self): + """Test model name validation for GPT-5.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Test valid models + assert provider.validate_model_name("gpt-5") is True + assert provider.validate_model_name("gpt-5-codex") is True + + # Test valid aliases + assert provider.validate_model_name("gpt5") is True + assert provider.validate_model_name("azure-gpt5") is True + assert provider.validate_model_name("azure-gpt-5") is True + assert provider.validate_model_name("codex") is True + assert provider.validate_model_name("gpt5-codex") is True + assert provider.validate_model_name("gpt5codex") is True + assert provider.validate_model_name("azure-codex") is True + assert provider.validate_model_name("azure-gpt5-codex") is True + + # Test invalid models + assert provider.validate_model_name("gpt-4") is False + assert provider.validate_model_name("o3") is False + assert provider.validate_model_name("invalid-model") is False + + def test_resolve_model_name(self): + """Test model name resolution for aliases.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Test GPT-5 aliases + assert provider._resolve_model_name("gpt5") == "gpt-5" + assert provider._resolve_model_name("azure-gpt5") == "gpt-5" + assert provider._resolve_model_name("azure-gpt-5") == "gpt-5" + + # Test GPT-5 Codex aliases + assert provider._resolve_model_name("gpt5-codex") == "gpt-5-codex" + assert provider._resolve_model_name("gpt5codex") == "gpt-5-codex" + assert provider._resolve_model_name("codex") == "gpt-5-codex" + assert provider._resolve_model_name("azure-codex") == "gpt-5-codex" + assert provider._resolve_model_name("azure-gpt5-codex") == "gpt-5-codex" + + # Test full names pass through unchanged + assert provider._resolve_model_name("gpt-5") == "gpt-5" + assert provider._resolve_model_name("gpt-5-codex") == "gpt-5-codex" + + def test_get_capabilities_gpt5(self): + """Test getting model capabilities for GPT-5.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + capabilities = provider.get_capabilities("gpt-5") + + assert capabilities.model_name == "gpt-5" + assert capabilities.friendly_name == "Azure OpenAI (GPT-5)" + assert capabilities.provider == ProviderType.AZURE + assert capabilities.intelligence_score == 16 + assert capabilities.context_window == 400_000 + assert capabilities.max_output_tokens == 128_000 + assert capabilities.supports_extended_thinking is True + assert capabilities.supports_system_prompts is True + assert capabilities.supports_streaming is True + assert capabilities.supports_function_calling is True + assert capabilities.supports_json_mode is True + assert capabilities.supports_images is True + assert capabilities.max_image_size_mb == 20.0 + assert capabilities.supports_temperature is True + # GPT-5 uses RangeTemperatureConstraint (not fixed) + assert capabilities.temperature_constraint.min_temp == 0.0 + assert capabilities.temperature_constraint.max_temp == 2.0 + + def test_get_capabilities_gpt5_codex(self): + """Test getting model capabilities for GPT-5 Codex.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5-codex", + ) + + capabilities = provider.get_capabilities("gpt-5-codex") + + assert capabilities.model_name == "gpt-5-codex" + assert capabilities.friendly_name == "Azure OpenAI (GPT-5 Codex)" + assert capabilities.provider == ProviderType.AZURE + assert capabilities.intelligence_score == 17 + assert capabilities.context_window == 400_000 + assert capabilities.max_output_tokens == 128_000 + assert capabilities.supports_extended_thinking is True + assert capabilities.supports_system_prompts is True + assert capabilities.supports_streaming is True + assert capabilities.supports_function_calling is True + assert capabilities.supports_json_mode is True + assert capabilities.supports_images is False + assert capabilities.max_image_size_mb == 0.0 + # GPT-5-Codex requires fixed temperature=1.0 + assert capabilities.supports_temperature is False + assert capabilities.temperature_constraint.value == 1.0 + + def test_get_capabilities_with_alias(self): + """Test getting model capabilities with alias resolves correctly.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + capabilities = provider.get_capabilities("gpt5") + assert capabilities.model_name == "gpt-5" + assert capabilities.friendly_name == "Azure OpenAI (GPT-5)" + + capabilities = provider.get_capabilities("codex") + assert capabilities.model_name == "gpt-5-codex" + assert capabilities.friendly_name == "Azure OpenAI (GPT-5 Codex)" + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_basic(self, mock_azure_class): + """Test basic content generation using Responses API.""" + # Set up mock Azure client + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + # Mock the response object + mock_response = MagicMock() + mock_response.output_text = "This is the response content" + mock_response.id = "test-response-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 100 + mock_response.usage.output_tokens = 50 + mock_response.usage.total_tokens = 150 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Generate content + result = provider.generate_content( + prompt="Test prompt", + model_name="gpt-5", + temperature=1.0, + ) + + # Verify API was called correctly + mock_client.responses.create.assert_called_once() + call_kwargs = mock_client.responses.create.call_args[1] + + assert call_kwargs["model"] == "gpt-5" + assert call_kwargs["temperature"] == 1.0 + assert len(call_kwargs["input"]) == 1 + assert call_kwargs["input"][0]["role"] == "user" + assert call_kwargs["input"][0]["content"] == "Test prompt" + + # Verify response + assert result.content == "This is the response content" + assert result.model_name == "gpt-5" + assert result.friendly_name == "Azure OpenAI (GPT-5)" + assert result.provider == ProviderType.AZURE + assert result.usage["input_tokens"] == 100 + assert result.usage["output_tokens"] == 50 + assert result.usage["total_tokens"] == 150 + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_with_system_prompt(self, mock_azure_class): + """Test content generation with system prompt.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Response with system prompt" + mock_response.id = "test-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 150 + mock_response.usage.output_tokens = 75 + mock_response.usage.total_tokens = 225 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content( + prompt="User message", + model_name="gpt-5", + system_prompt="You are a helpful assistant", + temperature=1.0, + ) + + # Verify messages include system prompt + call_kwargs = mock_client.responses.create.call_args[1] + assert len(call_kwargs["input"]) == 2 + assert call_kwargs["input"][0]["role"] == "system" + assert call_kwargs["input"][0]["content"] == "You are a helpful assistant" + assert call_kwargs["input"][1]["role"] == "user" + assert call_kwargs["input"][1]["content"] == "User message" + + assert result.content == "Response with system prompt" + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_extracts_from_output_array(self, mock_azure_class): + """Test content extraction from output array when output_text is not available.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + # Mock response with output array (no output_text) + mock_response = MagicMock() + mock_response.output_text = None + + # Create mock output items + text_item = MagicMock() + text_item.type = "text" + text_item.content = [MagicMock(text="Text from output array")] + + mock_response.output = [text_item] + mock_response.id = "test-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 50 + mock_response.usage.output_tokens = 25 + mock_response.usage.total_tokens = 75 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content( + prompt="Test prompt", + model_name="gpt-5", + temperature=1.0, + ) + + # Verify content extracted from output array + assert result.content == "Text from output array" + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_extracts_from_message_type(self, mock_azure_class): + """Test content extraction from output array with message type.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + # Mock response with output array containing message type + mock_response = MagicMock() + mock_response.output_text = None + + message_item = MagicMock() + message_item.type = "message" + message_item.content = "Direct message content" + + mock_response.output = [message_item] + mock_response.id = "test-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 30 + mock_response.usage.output_tokens = 20 + mock_response.usage.total_tokens = 50 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content( + prompt="Test", + model_name="gpt-5", + temperature=1.0, + ) + + assert result.content == "Direct message content" + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_no_content_error(self, mock_azure_class): + """Test error when no content can be extracted from response.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + # Mock response with no content + mock_response = MagicMock() + mock_response.output_text = None + mock_response.output = [] + mock_response.usage = MagicMock() + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + with pytest.raises(ValueError, match="No content available in response"): + provider.generate_content( + prompt="Test", + model_name="gpt-5", + temperature=1.0, + ) + + @patch("providers.azure_openai.AzureOpenAI") + def test_token_usage_extraction(self, mock_azure_class): + """Test token usage extraction from response.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Test response" + mock_response.id = "test-id" + mock_response.status = "completed" + + # Test with input_tokens and output_tokens format + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 200 + mock_response.usage.output_tokens = 100 + mock_response.usage.total_tokens = 300 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content( + prompt="Test", model_name="gpt-5", temperature=1.0 + ) + + assert result.usage["input_tokens"] == 200 + assert result.usage["prompt_tokens"] == 200 + assert result.usage["output_tokens"] == 100 + assert result.usage["completion_tokens"] == 100 + assert result.usage["total_tokens"] == 300 + + @patch("providers.azure_openai.AzureOpenAI") + def test_token_usage_extraction_alternative_format(self, mock_azure_class): + """Test token usage extraction with prompt_tokens and completion_tokens format.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Test response" + mock_response.id = "test-id" + mock_response.status = "completed" + + # Test with prompt_tokens and completion_tokens format + # Create a custom mock class that only has specific attributes + class UsageWithLegacyFields: + prompt_tokens = 250 + completion_tokens = 125 + + mock_response.usage = UsageWithLegacyFields() + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content( + prompt="Test", model_name="gpt-5", temperature=1.0 + ) + + assert result.usage["prompt_tokens"] == 250 + assert result.usage["input_tokens"] == 250 + assert result.usage["completion_tokens"] == 125 + assert result.usage["output_tokens"] == 125 + assert result.usage["total_tokens"] == 375 # Calculated + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_with_max_output_tokens(self, mock_azure_class): + """Test content generation with explicit max_output_tokens.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Response" + mock_response.id = "test-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 50 + mock_response.usage.output_tokens = 25 + mock_response.usage.total_tokens = 75 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content( + prompt="Test", + model_name="gpt-5", + max_output_tokens=4000, + temperature=1.0, + ) + + # Verify max_output_tokens was passed and result is not None + assert result is not None + call_kwargs = mock_client.responses.create.call_args[1] + assert call_kwargs["max_output_tokens"] == 4000 + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_api_error(self, mock_azure_class): + """Test error handling when API call fails.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + # Simulate API error + mock_client.responses.create.side_effect = Exception("API Error") + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + with pytest.raises(Exception, match="API Error"): + provider.generate_content( + prompt="Test", + model_name="gpt-5", + temperature=1.0, + ) + + def test_provider_type(self): + """Test get_provider_type returns ProviderType.AZURE.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + assert provider.get_provider_type() == ProviderType.AZURE + + def test_get_preferred_model_extended_reasoning(self): + """Test get_preferred_model for extended reasoning category.""" + from tools.models import ToolModelCategory + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Test with both models available + allowed = ["gpt-5", "gpt-5-codex"] + preferred = provider.get_preferred_model(ToolModelCategory.EXTENDED_REASONING, allowed) + assert preferred == "gpt-5-codex" # Codex preferred for extended reasoning + + # Test with only gpt-5 available + allowed = ["gpt-5"] + preferred = provider.get_preferred_model(ToolModelCategory.EXTENDED_REASONING, allowed) + assert preferred == "gpt-5" + + # Test with empty list + preferred = provider.get_preferred_model(ToolModelCategory.EXTENDED_REASONING, []) + assert preferred is None + + def test_get_preferred_model_fast_response(self): + """Test get_preferred_model for fast response category.""" + from tools.models import ToolModelCategory + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Test with both models available + allowed = ["gpt-5", "gpt-5-codex"] + preferred = provider.get_preferred_model(ToolModelCategory.FAST_RESPONSE, allowed) + assert preferred == "gpt-5" # gpt-5 preferred for fast response + + # Test with only codex available + allowed = ["gpt-5-codex"] + preferred = provider.get_preferred_model(ToolModelCategory.FAST_RESPONSE, allowed) + assert preferred == "gpt-5-codex" + + def test_get_preferred_model_balanced(self): + """Test get_preferred_model for balanced category.""" + from tools.models import ToolModelCategory + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Test with both models available + allowed = ["gpt-5", "gpt-5-codex"] + preferred = provider.get_preferred_model(ToolModelCategory.BALANCED, allowed) + assert preferred == "gpt-5-codex" # Codex preferred for code tasks + + # Test with only gpt-5 available + allowed = ["gpt-5"] + preferred = provider.get_preferred_model(ToolModelCategory.BALANCED, allowed) + assert preferred == "gpt-5" + + @patch("providers.azure_openai.AzureOpenAI") + def test_close_cleanup(self, mock_azure_class): + """Test close method properly cleans up resources.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Initialize client by calling _get_client + provider._get_client() + assert provider._client is not None + + # Close should set client to None + provider.close() + assert provider._client is None + + @patch("providers.azure_openai.AzureOpenAI") + def test_lazy_client_initialization(self, mock_azure_class): + """Test that Azure client is lazily initialized on first use.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Client should not be initialized yet + assert provider._client is None + mock_azure_class.assert_not_called() + + # Get client should initialize it + client = provider._get_client() + assert client is not None + assert provider._client is not None + mock_azure_class.assert_called_once_with( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + ) + + # Second call should return same client + client2 = provider._get_client() + assert client2 is client + mock_azure_class.assert_called_once() # Still only called once + + @patch("providers.azure_openai.AzureOpenAI") + def test_metadata_in_response(self, mock_azure_class): + """Test that response metadata includes deployment and status info.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Test content" + mock_response.id = "response-123" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 50 + mock_response.usage.output_tokens = 25 + mock_response.usage.total_tokens = 75 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="my-gpt5-deployment", + ) + + result = provider.generate_content( + prompt="Test", model_name="gpt-5", temperature=1.0 + ) + + # Verify metadata + assert result.metadata["response_id"] == "response-123" + assert result.metadata["status"] == "completed" + assert result.metadata["deployment_name"] == "my-gpt5-deployment" + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_resolves_alias(self, mock_azure_class): + """Test that generate_content resolves aliases before making API call.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Test response" + mock_response.id = "test-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 50 + mock_response.usage.output_tokens = 25 + mock_response.usage.total_tokens = 75 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5-codex", + ) + + # Use alias "codex" + result = provider.generate_content( + prompt="Test prompt", + model_name="codex", + temperature=1.0, + ) + + # Verify API was called with deployment name (not the alias) + call_kwargs = mock_client.responses.create.call_args[1] + assert call_kwargs["model"] == "gpt-5-codex" # Uses deployment name + + # Verify result uses resolved model name + assert result.model_name == "gpt-5-codex"