diff --git a/.claude/settings.json b/.claude/settings.json index 8ee1dfe1..5d673567 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -1,7 +1,76 @@ { "permissions": { "allow": [ - ], - "deny": [] - } + "Bash(poetry run pytest:*)", + "Bash(poetry install:*)", + "Bash(python -m pytest tests/adapters/filesystem/test_mock_file_system_adapter.py -v)", + "Bash(python:*)", + "Bash(poetry run:*)", + "Bash(rm:*)", + "Bash(mkdir:*)", + "Bash(poetry --version)", + "Bash(poetry:*)", + "Bash(ls:*)", + "Bash(find:*)", + "Bash(grep:*)", + "Bash(where python)", + "Bash(set APP_NAME=Test Agent)", + "Bash(del test_validation.py)", + "Bash(E:agent_smith.venvScriptspython.exe -m pytest tests/integration/ -v)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/ -v)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/ -v --tb=no -q)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_docker_process_integration.py -v)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_filesystem_integration.py -v)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_us_001_task_selection.py -v)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_us_002_isolated_agent_execution.py -v)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_us_003_automated_e2e_test_workflow.py -v)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/ --tb=no -q)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/ --ignore=tests/integration/test_di_container_integration.py --ignore=tests/integration/test_git_integration.py --ignore=tests/integration/test_llm_integration.py --ignore=tests/integration/test_observability_integration.py --ignore=tests/integration/test_us_007_proactive_plan_validation.py --ignore=tests/integration/test_us_012_shell_injection_prevention.py --tb=no -q)", + "Bash(where poetry)", + "Bash(\"C:\\Users\\idnot\\AppData\\Roaming\\Python\\Scripts\\poetry.exe\" run pytest tests/integration/ -v)", + "Bash(powershell:*)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/ -v --tb=short)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -c \"from opentelemetry.metrics import __all__; print(__all__)\")", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_di_container_integration.py -v --tb=short)", + "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_di_container_integration.py::TestDIContainerIntegration::test_service_dependency_resolution -v --tb=short)", + "Bash(cmd /c \"poetry run python test_orchestrator_basic.py\")", + "Bash(wc:*)", + "mcp__sequential-thinking__sequentialthinking", + "Bash(cmd:*)", + "Bash(echo $0)", + "Bash(cat:*)", + "Bash(export:*)", + "Bash(.venv/Scripts/pytest.exe tests/security/test_context_loader_security.py -v)", + "Bash(.venv/Scripts/pytest.exe tests/core/orchestration/ -v)", + "Bash(.venv/Scripts/pytest.exe tests/mocks/mock_planner.py -v)", + "Bash(.venv/Scripts/pytest.exe tests/adapters/ -v)", + "Bash(.venv/Scripts/pytest.exe --collect-only tests/)", + "Bash(.venv/Scripts/pytest.exe tests/adapters/docker/test_mock_docker_process_adapter.py -v)", + "Bash(.venv/Scripts/pytest.exe tests/adapters/filesystem/test_mock_file_system_adapter.py -v)", + "Bash(.venv/Scripts/pytest.exe tests/integration/test_orchestrator_basic_workflow_integration.py -v)", + "Bash(.venv/Scripts/pytest.exe tests/integration/test_us_012_shell_injection_prevention.py -v)", + "Bash(.venv/Scripts/pytest.exe tests/adapters/observability/test_mock_observability_adapter.py -v)", + "Bash(.venv/Scripts/pytest.exe tests/core/domain/test_exceptions.py -v)", + "Bash(.venv/Scripts/pytest.exe tests/adapters/scrubbers/test_regex_scrubber.py -v)", + "Bash(.venv/Scripts/pytest.exe tests/integration/test_context_loader_integration.py -v)", + "Bash(.venv/Scripts/pytest.exe tests/performance/ -v)", + "Bash(.venv/Scripts/pytest.exe tests/core/context/ -v)", + "Bash(.venv/Scripts/pytest.exe tests/core/config/ -v)", + "Bash(.venv/Scripts/pytest.exe tests/integration/test_us_007_proactive_plan_validation.py -v)", + "Bash(for:*)", + "Bash(do echo \"Processing $file\")", + "Bash(head:*)", + "Bash(done)" + ] + }, + "enableAllProjectMcpServers": true, + "enabledMcpjsonServers": [ + "sequential-thinking", + "firecrawl", + "tavily", + "perplexity", + "Ref", + "serena", + "zen" + ] } \ No newline at end of file diff --git a/.env.example b/.env.example index 51cfa1a6..2ed1bd45 100644 --- a/.env.example +++ b/.env.example @@ -35,6 +35,52 @@ DIAL_API_KEY=your_dial_api_key_here # DIAL_API_HOST=https://core.dialx.ai # Optional: Base URL without /openai suffix (auto-appended) # DIAL_API_VERSION=2025-01-01-preview # Optional: API version header for DIAL requests +# Azure OpenAI GPT-5 and GPT-5-Codex Configuration +# ============================================================================== +# CRITICAL: Azure OpenAI GPT-5/GPT-5-Codex models have UNIQUE REQUIREMENTS: +# +# 1. RESPONSES API ONLY - Chat Completions API is NOT implemented for these models +# 2. TEMPERATURE CONSTRAINT - Must be exactly 1.0 (returns 400 error if changed) +# 3. MINIMUM OUTPUT TOKENS - Must be at least 16 (returns 400 error if less) +# 4. API VERSION - Must be 2025-03-01-preview or later for Responses API +# +# How to obtain credentials: +# 1. Log in to Azure Portal (https://portal.azure.com) +# 2. Navigate to your Azure OpenAI resource +# 3. Go to "Keys and Endpoint" section +# 4. Copy the API Key and Endpoint URL +# 5. Deploy a GPT-5 or GPT-5-Codex model and note the deployment name +# +# All 4 variables below are REQUIRED for Azure OpenAI to work: +AZURE_OPENAI_API_KEY=your_azure_openai_key_here +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ +AZURE_OPENAI_API_VERSION=2025-04-01-preview # Minimum: 2025-03-01-preview +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5 # Your deployment name (e.g., gpt-5, gpt-5-codex) +# +# Model Capabilities Comparison: +# ┌─────────────┬────────────┬────────────┬──────────┬─────────┬──────────────┐ +# │ Model │ Context │ Max Output │ Vision │ Code │ Intelligence │ +# ├─────────────┼────────────┼────────────┼──────────┼─────────┼──────────────┤ +# │ GPT-5 │ 400K │ 128K │ Yes │ Good │ Score: 16 │ +# │ GPT-5-Codex │ 400K │ 128K │ No │ Elite │ Score: 17 │ +# └─────────────┴────────────┴────────────┴──────────┴─────────┴──────────────┘ +# +# Known Constraints (from Azure OpenAI documentation and community testing): +# - Temperature: Fixed at 1.0, error message: "Unsupported value. Only the default (1) value is supported" +# - Max Output Tokens: Minimum 16 tokens required for reasoning models +# - Responses API: Different response format than Chat Completions API +# - No streaming support for reasoning tokens (only final output streams) +# +# Example deployment names (user-defined in Azure Portal): +# - gpt-5 # General purpose reasoning model +# - gpt-5-codex # Code-specialized variant +# - my-gpt5-deployment # Custom name you choose +# - prod-gpt5-codex # Environment-specific naming +# +# IMPORTANT: These are reasoning models with extended thinking capabilities. +# They use additional "reasoning tokens" internally before generating the output. +# ============================================================================== + # Option 2: Use OpenRouter for access to multiple models through one API # Get your OpenRouter API key from: https://openrouter.ai/ # If using OpenRouter, comment out the native API keys above diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 00000000..b1a3201d --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,50 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # This is an optional setting that allows Claude to read CI results on PRs + additional_permissions: | + actions: read + + # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. + # prompt: 'Update the pull request description to include a summary of changes.' + + # Optional: Add claude_args to customize behavior and configuration + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://docs.claude.com/en/docs/claude-code/sdk#command-line for available options + # claude_args: '--model claude-opus-4-1-20250805 --allowed-tools Bash(gh pr:*)' + diff --git a/.gitignore b/.gitignore index be60b015..d2dbb87a 100644 --- a/.gitignore +++ b/.gitignore @@ -144,6 +144,9 @@ cython_debug/ # VS Code .vscode/ +# Cursor +.cursor/ + # macOS .DS_Store @@ -161,9 +164,6 @@ htmlcov/ coverage.xml .pytest_cache/ -# Test simulation artifacts (dynamically created during testing) -test_simulation_files/.claude/ - # Temporary test directories test-setup/ @@ -175,8 +175,8 @@ FEATURE_*.md # Local user instructions CLAUDE.local.md -# Claude Code personal settings -.claude/settings.local.json +# Claude Code +.claude/ # Standalone mode files .zen_venv/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 97840e82..bafbce43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,396 +2,9 @@ -## v7.8.1 (2025-10-07) - -### Bug Fixes - -- Updated model description to fix test - ([`04f7ce5`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/04f7ce5b03804564263f53a765931edba9c320cd)) - -### Chores - -- Sync version to config.py [skip ci] - ([`c27e81d`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/c27e81d6d2f22978816f798a161a869d1ab5f025)) - -### Refactoring - -- Moved registries into a separate module and code cleanup - ([`7c36b92`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/7c36b9255a13007a10af4fadefc21aadfce482b0)) - - -## v7.8.0 (2025-10-07) - -### Chores - -- Sync version to config.py [skip ci] - ([`3e5fa96`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/3e5fa96c981bbd7b844a9887a518ffe266b78e9b)) - -### Documentation - -- Consensus video - ([`2352684`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/23526841922a73c68094e5205e19af04a1f6c8cc)) - -- Formatting - ([`7d7c74b`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/7d7c74b5a38b7d1adf132b8e28034017df7aa852)) - -- Link to videos from main page - ([`e8ef193`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/e8ef193daba393b55a3beaaba49721bb9182378a)) - -- Update README.md - ([`7b13543`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/7b13543824fc0af729daf753ecdddba9ee7d9f1e)) - -### Features - -- All native providers now read from catalog files like OpenRouter / Custom configs. Allows for - greater control over the capabilities - ([`2a706d5`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/2a706d5720c0bf97b71c3e0fc95c15f78015bedf)) - -- Provider cleanup - ([`9268dda`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/9268ddad2a07306351765b47098134512739f49f)) - -### Refactoring - -- New base class for model registry / loading - ([`02d13da`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/02d13da897016d7491b4a10a1195983385d66654)) - - -## v7.7.0 (2025-10-07) - -### Chores - -- Sync version to config.py [skip ci] - ([`70ae62a`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/70ae62a2cd663c3abcabddd1be1bc6ed9512d7df)) - -### Documentation - -- Video - ([`ed5dda7`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/ed5dda7c5a9439c2835cc69d76e6377169ad048a)) - -### Features - -- More aliases - ([`5f0aaf5`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/5f0aaf5f69c9d188d817b5ffbf6738c61da40ec7)) - - -## v7.6.0 (2025-10-07) - -### Chores - -- Sync version to config.py [skip ci] - ([`c1c75ba`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/c1c75ba304c2840329650c46273e87eab9b05906)) - -- Sync version to config.py [skip ci] - ([`0fa9b66`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/0fa9b6658099c8e0d79fda0c7d2347f62d0e6137)) - -### Documentation - -- Info about AI client timeouts - ([`3ddfed5`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/3ddfed5ef09000791e1c94b041c43dc273ed53a8)) - -### Features - -- Add support for openai/gpt-5-pro model - ([`abed075`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/abed075b2eaa99e9618202f47ff921094baae952)) - - -## v7.5.2 (2025-10-06) - -### Bug Fixes - -- Handle 429 response https://github.com/BeehiveInnovations/zen-mcp-server/issues/273 - ([`cbe1d79`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/cbe1d7993276bd014b495cbd2d0ece1f5d7583d9)) - -### Chores - -- Sync version to config.py [skip ci] - ([`74fdd36`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/74fdd36de92d34681fcc5a2f772c3d05634f0a55)) - - -## v7.5.1 (2025-10-06) - -### Chores - -- Sync version to config.py [skip ci] - ([`004e379`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/004e379cf2f1853829dccb15fa72ec18d282f1a4)) - - -## v7.5.0 (2025-10-06) - -### Chores - -- Sync version to config.py [skip ci] - ([`71e7cd5`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/71e7cd55b1f4955a6d718fddc0de419414d133b6)) - -### Documentation - -- Video - ([`775e4d5`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/775e4d50b826858095c5f2a61a07fc01c4a00816)) - -- Videos - ([`bb2066c`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/bb2066c909f6581ba40fc5ddef3870954ae553ab)) - -### Features - -- Support for GPT-5-Pro highest reasoning model - https://github.com/BeehiveInnovations/zen-mcp-server/issues/275 - ([`a65485a`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/a65485a1e52fc79739000426295a27d096f4c9d8)) - - -## v7.4.0 (2025-10-06) - -### Chores - -- Sync version to config.py [skip ci] - ([`76bf98e`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/76bf98e5cd972dabd3c79b25fcb9b9a717b23f6d)) - -### Features - -- Improved prompt - ([`b1e9963`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/b1e9963991a41dff082ec1dce5691c318f105e6d)) - - -## v7.3.0 (2025-10-06) - -### Chores - -- Sync version to config.py [skip ci] - ([`e7920d0`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/e7920d0ed16c0e6de9d1ccaa0b58d3fb5cbd7f2f)) - -### Documentation - -- Fixed typo - ([`3ab0aa8`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/3ab0aa8314ad5992bcb00de549a0fab2e522751d)) - -- Fixed typo - ([`c17ce3c`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/c17ce3cf958d488b97fa7127942542ab514b58bd)) - -- Update apilookup.md - ([`1918679`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/19186794edac4fce5523e671310aecff4cbfdc81)) - -- Update README.md - ([`23c6c78`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/23c6c78bf152ede6e7b5f7b7770b12a8442845a3)) - -### Features - -- Codex supports web-search natively but needs to be turned on, run-server script asks if the user - would like this done - ([`97ba7e4`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/97ba7e44ce7e3fd874759514ed2f0738033fc801)) - - -## v7.2.0 (2025-10-06) - -### Chores - -- Sync version to config.py [skip ci] - ([`1854b1e`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/1854b1e26b705cda0dc3f4d733647f1454aa0352)) - -### Documentation - -- Updated - ([`bb57f71`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/bb57f719666ab6a586d835688ff8086282a5a0dc)) - -### Features - -- New tool to perform apilookup (latest APIs / SDKs / language features etc) - https://github.com/BeehiveInnovations/zen-mcp-server/issues/204 - ([`5bea595`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/5bea59540f58b3c45044828c10f131aed104dd1c)) - -### Refactoring - -- De-duplicate roles to avoid explosion when more CLIs get added - ([`c42e9e9`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/c42e9e9c34d7ae4732e2e4fbed579b681a6d170d)) - - -## v7.1.1 (2025-10-06) - -### Bug Fixes - -- Clink missing in toml - ([`1ff77fa`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/1ff77faa800ad6c2dde49cad98dfa72035fe1c81)) - -### Chores - -- Sync version to config.py [skip ci] - ([`e02e78d`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/e02e78d903b35f4c01b8039f4157e97b38d3ec7b)) - -### Documentation - -- Example for codex cli - ([`344c42b`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/344c42bcbfb543bfd05cbc27fd5b419c76b77954)) - -- Example for codex cli - ([`c3044de`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/c3044de7424e638dde5c8ec49adb6c3c7c5a60b2)) - -- Update README.md - ([`2e719ae`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/2e719ae35e7979f7b83bd910867e79863a7f9ceb)) - - -## v7.1.0 (2025-10-05) - -### Chores - -- Sync version to config.py [skip ci] - ([`d54bfdd`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/d54bfdd49797d076ec9cade44c56292a8089c744)) - -### Features - -- Support for codex as external CLI - ([`561e4aa`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/561e4aaaa8a89eb89c03985b9e7720cc98ef666c)) - - -## v7.0.2 (2025-10-05) - -### Chores - -- Sync version to config.py [skip ci] - ([`f2142a2`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/f2142a22ec50abc54b464eedd6b8239d20c509be)) - - -## v7.0.1 (2025-10-05) - -### Bug Fixes - -- --yolo needed for running shell commands, documentation added - ([`15ae3f2`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/15ae3f24babccf42f43be5028bf8c60c05a6beaf)) - -### Chores - -- Sync version to config.py [skip ci] - ([`bc4a27b`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/bc4a27b18a4a3f45afb22178e61ea0be4d6a273c)) - -### Documentation - -- Updated intro - ([`fb668c3`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/fb668c39b5f6e3dd37f7027f953f6004f258f2bf)) - - -## v7.0.0 (2025-10-05) - -### Chores - -- Sync version to config.py [skip ci] - ([`0d46976`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/0d46976a8aa85254e4dbe06f5e71161cd3b13938)) - -- Sync version to config.py [skip ci] - ([`8296bf8`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/8296bf871c39597a904c70e7d98c72fcb4dc5a84)) - -### Documentation - -- Instructions for OpenCode - ([`bd66622`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/bd666227c8f7557483f7e24fb8544fc0456600dc)) - -- Updated intro - ([`615873c`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/615873c3db2ecf5ce6475caa3445e1da9a2517bd)) - -### Features - -- Huge update - Link another CLI (such as `gemini` directly from with Claude Code / Codex). - https://github.com/BeehiveInnovations/zen-mcp-server/issues/208 - ([`a2ccb48`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/a2ccb48e9a5080a75dbfd483b5f09fc719c887e5)) - -### Refactoring - -- Fixed test - ([`9c99b9b`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/9c99b9b35219f54db8d7be0958d4390a106631ae)) - -- Include file modification dates too - ([`47973e9`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/47973e945efa2cdbdb8f3404d467d7f1abc62b0a)) - - -## v6.1.0 (2025-10-04) - -### Chores - -- Sync version to config.py [skip ci] - ([`18095d7`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/18095d7d398e4bf3d24c57a52c81ac619acb1b89)) - -### Documentation - -- Updated intro - ([`aa65394`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/aa6539472c4ddf1c3c1bac446fdee03e75e1cb50)) - -### Features - -- Support for Qwen Code - ([`fe9968b`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/fe9968b633d0312b82426e9ebddfe1d6515be3c5)) - - -## v6.0.0 (2025-10-04) - -### Chores - -- Sync version to config.py [skip ci] - ([`ae8749a`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/ae8749ab37bdaa7e225b5219820adeb74ca9a552)) - -### Documentation - -- Updated - ([`e91ed2a`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/e91ed2a924b1702edf9e1417479ac0dee0ca1553)) - -### Features - -- Azure OpenAI / Azure AI Foundry support. Models should be defined in conf/azure_models.json (or a - custom path). See .env.example for environment variables or see readme. - https://github.com/BeehiveInnovations/zen-mcp-server/issues/265 - ([`ff9a07a`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/ff9a07a37adf7a24aa87c63b3ba9db88bdff467b)) - -- Breaking change - OpenRouter models are now read from conf/openrouter_models.json while Custom / - Self-hosted models are read from conf/custom_models.json - ([`ff9a07a`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/ff9a07a37adf7a24aa87c63b3ba9db88bdff467b)) - -- OpenAI/compatible models (such as Azure OpenAI) can declare if they use the response API instead - via `use_openai_responses_api` - ([`3824d13`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/3824d131618683572e9e8fffa6b25ccfabf4cf50)) - -- OpenRouter / Custom Models / Azure can separately also use custom config paths now (see - .env.example ) - ([`ff9a07a`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/ff9a07a37adf7a24aa87c63b3ba9db88bdff467b)) - -### Refactoring - -- Breaking change: `is_custom` property has been removed from model_capabilities.py (and thus - custom_models.json) given each models are now read from separate configuration files - ([`ff9a07a`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/ff9a07a37adf7a24aa87c63b3ba9db88bdff467b)) - -- Model registry class made abstract, OpenRouter / Custom Provider / Azure OpenAI now subclass these - ([`ff9a07a`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/ff9a07a37adf7a24aa87c63b3ba9db88bdff467b)) - - -## v5.22.0 (2025-10-04) - -### Bug Fixes - -- CI test - ([`bc93b53`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/bc93b5343bbd8657b95ab47c00a2cb99a68a009f)) - -- Listmodels to always honor restricted models - ([`4015e91`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/4015e917ed32ae374ec6493b74993fcb34f4a971)) - -### Chores - -- Sync version to config.py [skip ci] - ([`054e34e`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/054e34e31ca5bee5a11c0e3e6537f58e8897c79c)) - -- Sync version to config.py [skip ci] - ([`c0334d7`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/c0334d77922f1b05e3fd755851da112567fb9ae6)) - -### Features - -- Centralized environment handling, ensures ZEN_MCP_FORCE_ENV_OVERRIDE is honored correctly - ([`2c534ac`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/2c534ac06e4c6078b96781dfb55c5759b982afe8)) - -### Refactoring - -- Don't retry on 429 - ([`d184024`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/d18402482087f52b7bd07755c9304ed00ed20592)) - -- Improved retry logic and moved core logic to base class - ([`f955100`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/f955100f3a82973ccd987607e1d8a1bbe07828c8)) - -- Removed subclass override when the base class should be resolving the model name - ([`06d7701`](https://github.com/BeehiveInnovations/zen-mcp-server/commit/06d7701cc3ee09732ab713fa9c7c004199154483)) +## v1.0.0 (2025-10-04) +- Initial Release ## v5.21.0 (2025-10-03) diff --git a/README.md b/README.md index 17a6ada0..e46ccb5a 100644 --- a/README.md +++ b/README.md @@ -1,70 +1,23 @@ # Zen MCP: Many Workflows. One Context. -
- - [Zen in action](https://github.com/user-attachments/assets/0d26061e-5f21-4ab1-b7d0-f883ddc2c3da) - -👉 **[Watch more examples](#-watch-tools-in-action)** - -### Your CLI + Multiple Models = Your AI Dev Team - -**Use the 🤖 CLI you love:** -[Claude Code](https://www.anthropic.com/claude-code) · [Gemini CLI](https://github.com/google-gemini/gemini-cli) · [Codex CLI](https://github.com/openai/codex) · [Qwen Code CLI](https://qwenlm.github.io/qwen-code-docs/) · [Cursor](https://cursor.com) · _and more_ - -**With multiple models within a single prompt:** -Gemini · OpenAI · Anthropic · Grok · Azure · Ollama · OpenRouter · DIAL · On-Device Model +[zen_web.webm](https://github.com/user-attachments/assets/851e3911-7f06-47c0-a4ab-a2601236697c) +
+ 🤖 Claude Code OR Gemini CLI OR Codex CLI + [Gemini / OpenAI / Azure OpenAI / Grok / OpenRouter / DIAL / Ollama / Anthropic / Any Model] = Your Ultimate AI Development Team
---- - -## 🆕 Now with CLI-to-CLI Bridge - -The new **[`clink`](docs/tools/clink.md)** (CLI + Link) tool connects external AI CLIs directly into your workflow: - -- **Connect external CLIs** like [Gemini CLI](https://github.com/google-gemini/gemini-cli) and [Codex CLI](https://github.com/openai/codex) directly into your workflow -- **Codex Subagents** - Launch isolated Codex instances from _within_ Codex itself! Offload heavy tasks (code reviews, bug hunting) to fresh contexts while your main session's context window remains unpolluted. Each subagent returns only final results. -- **Context Isolation** - Run separate investigations without polluting your primary workspace -- **Role Specialization** - Spawn `planner`, `codereviewer`, or custom role agents with specialized system prompts -- **Full CLI Capabilities** - Web search, file inspection, MCP tool access, latest documentation lookups -- **Seamless Continuity** - Sub-CLIs participate as first-class members with full conversation context between tools - -```bash -# Codex spawns Codex subagent for isolated code review in fresh context -clink with codex codereviewer to audit auth module for security issues -# Subagent reviews in isolation, returns final report without cluttering your context as codex reads each file and walks the directory structure - -# Consensus from different AI models → Implementation handoff with full context preservation between tools -Use consensus with gpt-5 and gemini-pro to decide: dark mode or offline support next -Continue with clink gemini - implement the recommended feature -# Gemini receives full debate context and starts coding immediately -``` - -👉 **[Learn more about clink](docs/tools/clink.md)** - ---- - -## Why Zen MCP? - -**Why rely on one AI model when you can orchestrate them all?** - -A Model Context Protocol server that supercharges tools like [Claude Code](https://www.anthropic.com/claude-code), [Codex CLI](https://developers.openai.com/codex/cli), and IDE clients such -as [Cursor](https://cursor.com) or the [Claude Dev VS Code extension](https://marketplace.visualstudio.com/items?itemName=Anthropic.claude-vscode). **Zen MCP connects your favorite AI tool -to multiple AI models** for enhanced code analysis, problem-solving, and collaborative development. +
-### True AI Collaboration with Conversation Continuity +**AI orchestration for Claude Code** - A Model Context Protocol server that gives your CLI of choice (e.g. [Claude Code](https://www.anthropic.com/claude-code)) access to multiple AI models for enhanced code analysis, problem-solving, and collaborative development. Zen +works with Claude Code, Gemini CLI, Codex CLI, and IDE clients like [Cursor](https://cursor.com) and the [Claude Dev extension for VS Code](https://marketplace.visualstudio.com/items?itemName=Anthropic.claude-vscode). -Zen supports **conversation threading** so your CLI can **discuss ideas with multiple AI models, exchange reasoning, get second opinions, and even run collaborative debates between models** to help you reach deeper insights and better solutions. +**True AI collaboration with conversation continuity** - Claude stays in control but gets perspectives from the best AI for each subtask. Context carries forward seamlessly across tools and models, enabling complex workflows like: code reviews with multiple models → automated planning → implementation → pre-commit validation. -Your CLI always stays in control but gets perspectives from the best AI for each subtask. Context carries forward seamlessly across tools and models, enabling complex workflows like: code reviews with multiple models → automated planning → implementation → pre-commit validation. - -> **You're in control.** Your CLI of choice orchestrates the AI team, but you decide the workflow. Craft powerful prompts that bring in Gemini Pro, GPT 5, Flash, or local offline models exactly when needed. +> **You're in control.** Claude orchestrates the AI team, but you decide the workflow. Craft powerful prompts that bring in Gemini Pro, GPT 5, Flash, or local offline models exactly when needed.
Reasons to Use Zen MCP -A typical workflow with Claude Code as an example: - 1. **Multi-Model Orchestration** - Claude coordinates with Gemini Pro, O3, GPT-5, and 50+ other models to get the best analysis for each task 2. **Context Revival Magic** - Even after Claude's context resets, continue conversations seamlessly by having other models "remind" Claude of the discussion @@ -115,26 +68,13 @@ and review into consideration to aid with its final pre-commit review. > **You're** the one who crafts the powerful prompt that makes Claude bring in Gemini, Flash, O3 — or fly solo. > You're the guide. The prompter. The puppeteer. > #### You are the AI - **Actually Intelligent**. -
#### Recommended AI Stack -
-For Claude Code Users - -For best results when using [Claude Code](https://claude.ai/code): +For best results, use Claude Code with: +- **Opus 4.1** - All agentic work and orchestration +- **Gemini 2.5 Pro** - Deep thinking, code reviews, debugging, pre-commit analysis -- **Sonnet 4.5** - All agentic work and orchestration -- **Gemini 2.5 Pro** OR **GPT-5-Pro** - Deep thinking, additional code reviews, debugging and validations, pre-commit analysis -
- -
-For Codex Users - -For best results when using [Codex CLI](https://developers.openai.com/codex/cli): - -- **GPT-5 Codex Medium** - All agentic work and orchestration -- **Gemini 2.5 Pro** OR **GPT-5-Pro** - Deep thinking, additional code reviews, debugging and validations, pre-commit analysis
## Quick Start (5 minutes) @@ -144,12 +84,14 @@ For best results when using [Codex CLI](https://developers.openai.com/codex/cli) **1. Get API Keys** (choose one or more): - **[OpenRouter](https://openrouter.ai/)** - Access multiple models with one API - **[Gemini](https://makersuite.google.com/app/apikey)** - Google's latest models -- **[OpenAI](https://platform.openai.com/api-keys)** - O3, GPT-5 series -- **[Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/)** - Enterprise deployments of GPT-4o, GPT-4.1, GPT-5 family +- **[OpenAI](https://platform.openai.com/api-keys)** - O3, O4 reasoning models +- **[Azure OpenAI](https://portal.azure.com/)** - GPT-5 (vision), GPT-5-Codex (elite code) via Responses API* - **[X.AI](https://console.x.ai/)** - Grok models - **[DIAL](https://dialx.ai/)** - Vendor-agnostic model access - **[Ollama](https://ollama.ai/)** - Local models (free) +*Azure GPT-5 models: 400K context, 128K output, temperature=1.0 only, min 16 output tokens + **2. Install** (choose one): **Option A: Clone and Automatic Setup** (recommended) @@ -158,7 +100,7 @@ git clone https://github.com/BeehiveInnovations/zen-mcp-server.git cd zen-mcp-server # Handles everything: setup, config, API keys from system environment. -# Auto-configures Claude Desktop, Claude Code, Gemini CLI, Codex CLI, Qwen CLI +# Auto-configures Claude Desktop, Claude Code, Gemini CLI, Codex CLI # Enable / disable additional settings in .env ./run-server.sh ``` @@ -188,23 +130,16 @@ cd zen-mcp-server "Use zen to analyze this code for security issues with gemini pro" "Debug this error with o3 and then get flash to suggest optimizations" "Plan the migration strategy with zen, get consensus from multiple models" -"clink with cli_name=\"gemini\" role=\"planner\" to draft a phased rollout plan" ``` -👉 **[Complete Setup Guide](docs/getting-started.md)** with detailed installation, configuration for Gemini / Codex / Qwen, and troubleshooting +👉 **[Complete Setup Guide](docs/getting-started.md)** with detailed installation, configuration for Gemini / Codex, and troubleshooting 👉 **[Cursor & VS Code Setup](docs/getting-started.md#ide-clients)** for IDE integration instructions -📺 **[Watch tools in action](#-watch-tools-in-action)** to see real-world examples - -## Provider Configuration - -Zen activates any provider that has credentials in your `.env`. See `.env.example` for deeper customization. ## Core Tools > **Note:** Each tool comes with its own multi-step workflow, parameters, and descriptions that consume valuable context window space even when not in use. To optimize performance, some tools are disabled by default. See [Tool Configuration](#tool-configuration) below to enable them. **Collaboration & Planning** *(Enabled by default)* -- **[`clink`](docs/tools/clink.md)** - Bridge requests to external AI CLIs (Gemini planner, codereviewer, etc.) - **[`chat`](docs/tools/chat.md)** - Brainstorm ideas, get second opinions, validate approaches - **[`thinkdeep`](docs/tools/thinkdeep.md)** - Extended reasoning, edge case analysis, alternative perspectives - **[`planner`](docs/tools/planner.md)** - Break down complex projects into structured, actionable plans @@ -223,7 +158,6 @@ Zen activates any provider that has credentials in your `.env`. See `.env.exampl - **[`docgen`](docs/tools/docgen.md)** - Generate documentation with complexity analysis **Utilities** -- **[`apilookup`](docs/tools/apilookup.md)** - Forces current-year API/SDK documentation lookups in a sub-process (saves tokens within the current context window), prevents outdated training data responses - **[`challenge`](docs/tools/challenge.md)** - Prevent "You're absolutely right!" responses with critical analysis - **[`tracer`](docs/tools/tracer.md)** *(disabled by default - [enable](#tool-configuration))* - Static analysis prompts for call-flow mapping @@ -237,7 +171,6 @@ To optimize context window usage, only essential tools are enabled by default: **Enabled by default:** - `chat`, `thinkdeep`, `planner`, `consensus` - Core collaboration tools - `codereview`, `precommit`, `debug` - Essential code quality tools -- `apilookup` - Rapid API/SDK information lookup - `challenge` - Critical thinking utility **Disabled by default:** @@ -301,74 +234,13 @@ DISABLED_TOOLS= } ``` -**Note:** +**Note:** - Essential tools (`version`, `listmodels`) cannot be disabled - After changing tool configuration, restart your Claude session for changes to take effect - Each tool adds to context window usage, so only enable what you need -## 📺 Watch Tools In Action - -
-Chat Tool - Collaborative decision making and multi-turn conversations - -**Picking Redis vs Memcached:** - -[Chat Redis or Memcached_web.webm](https://github.com/user-attachments/assets/41076cfe-dd49-4dfc-82f5-d7461b34705d) - -**Multi-turn conversation with continuation:** - -[Chat With Gemini_web.webm](https://github.com/user-attachments/assets/37bd57ca-e8a6-42f7-b5fb-11de271e95db) - -
- -
-Consensus Tool - Multi-model debate and decision making - -**Multi-model consensus debate:** - -[Zen Consensus Debate](https://github.com/user-attachments/assets/76a23dd5-887a-4382-9cf0-642f5cf6219e) - -
- -
-PreCommit Tool - Comprehensive change validation - -**Pre-commit validation workflow:** - -
- -
- -
- -
-API Lookup Tool - Current vs outdated API documentation - -**Without Zen - outdated APIs:** - -[API without Zen](https://github.com/user-attachments/assets/01a79dc9-ad16-4264-9ce1-76a56c3580ee) - -**With Zen - current APIs:** - -[API with Zen](https://github.com/user-attachments/assets/5c847326-4b66-41f7-8f30-f380453dce22) - -
- -
-Challenge Tool - Critical thinking vs reflexive agreement - -**Without Zen:** - -![without_zen@2x](https://github.com/user-attachments/assets/64f3c9fb-7ca9-4876-b687-25e847edfd87) - -**With Zen:** - -![with_zen@2x](https://github.com/user-attachments/assets/9d72f444-ba53-4ab1-83e5-250062c6ee70) - -
- ## Key Features **AI Orchestration** @@ -378,10 +250,10 @@ DISABLED_TOOLS= - **[Context revival](docs/context-revival.md)** - Continue conversations even after context resets **Model Support** -- **Multiple providers** - Gemini, OpenAI, Azure, X.AI, OpenRouter, DIAL, Ollama -- **Latest models** - GPT-5, Gemini 2.5 Pro, O3, Grok-4, local Llama +- **Multiple providers** - Gemini, OpenAI, Azure OpenAI, X.AI, OpenRouter, DIAL, Ollama +- **Latest models** - Azure GPT-5/GPT-5-Codex (Responses API), Gemini 2.5 Pro, O3, Grok-4, local Llama - **[Thinking modes](docs/advanced-usage.md#thinking-modes)** - Control reasoning depth vs cost -- **Vision support** - Analyze images, diagrams, screenshots +- **Vision support** - Analyze images with GPT-5, Gemini, and other vision-capable models **Developer Experience** - **Guided workflows** - Systematic investigation prevents rushed analysis @@ -419,7 +291,6 @@ DISABLED_TOOLS= - [Tools Reference](docs/tools/) - All tools with examples - [Advanced Usage](docs/advanced-usage.md) - Power user features - [Configuration](docs/configuration.md) - Environment variables, restrictions -- [Adding Providers](docs/adding_providers.md) - Provider-specific setup (OpenAI, Azure, custom gateways) - [Model Ranking Guide](docs/model_ranking.md) - How intelligence scores drive auto-mode suggestions **🔧 Setup & Support** @@ -435,12 +306,11 @@ Apache 2.0 License - see [LICENSE](LICENSE) file for details. Built with the power of **Multi-Model AI** collaboration 🤝 - **A**ctual **I**ntelligence by real Humans -- [MCP (Model Context Protocol)](https://modelcontextprotocol.com) -- [Codex CLI](https://developers.openai.com/codex/cli) -- [Claude Code](https://claude.ai/code) -- [Gemini](https://ai.google.dev/) -- [OpenAI](https://openai.com/) -- [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/) +- [MCP (Model Context Protocol)](https://modelcontextprotocol.com) by Anthropic +- [Claude Code](https://claude.ai/code) - Your AI coding orchestrator +- [Gemini 2.5 Pro & Flash](https://ai.google.dev/) - Extended thinking & fast analysis +- [OpenAI O3 & O4](https://openai.com/) - Strong reasoning & systematic analysis +- [Azure OpenAI GPT-5](https://azure.microsoft.com/en-us/products/ai-services/openai-service) - GPT-5 (vision) & GPT-5-Codex (elite code) via Responses API ### Star History diff --git a/conf/openai_models.json b/conf/openai_models.json index a7e0674a..fecd1176 100644 --- a/conf/openai_models.json +++ b/conf/openai_models.json @@ -41,7 +41,7 @@ "supports_function_calling": true, "supports_json_mode": true, "supports_images": true, - "supports_temperature": true, + "supports_temperature": false, "max_image_size_mb": 20.0, "temperature_constraint": "fixed" }, @@ -62,7 +62,7 @@ "supports_function_calling": true, "supports_json_mode": true, "supports_images": true, - "supports_temperature": true, + "supports_temperature": false, "max_image_size_mb": 20.0, "use_openai_response_api": true, "default_reasoning_effort": "high", @@ -86,7 +86,7 @@ "supports_function_calling": true, "supports_json_mode": true, "supports_images": true, - "supports_temperature": true, + "supports_temperature": false, "max_image_size_mb": 20.0, "temperature_constraint": "fixed" }, @@ -227,8 +227,9 @@ "supports_function_calling": true, "supports_json_mode": true, "supports_images": true, - "supports_temperature": true, + "supports_temperature": false, "max_image_size_mb": 20.0, + "temperature_constraint": "fixed", "use_openai_response_api": true } ] diff --git a/config.py b/config.py index 61fcf7c2..1e3db855 100644 --- a/config.py +++ b/config.py @@ -8,15 +8,15 @@ Configuration values can be overridden by environment variables where appropriate. """ -from utils.env import get_env +import os # Version and metadata # These values are used in server responses and for tracking releases # IMPORTANT: This is the single source of truth for version and author info # Semantic versioning: MAJOR.MINOR.PATCH -__version__ = "7.8.1" +__version__ = "1.0.0" # Last update date in ISO format -__updated__ = "2025-10-07" +__updated__ = "2025-10-04" # Primary maintainer __author__ = "Fahad Gilani" @@ -25,13 +25,12 @@ # This should be a stable, high-performance model suitable for code analysis # Can be overridden by setting DEFAULT_MODEL environment variable # Special value "auto" means Claude should pick the best model for each task -DEFAULT_MODEL = get_env("DEFAULT_MODEL", "auto") or "auto" +DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "auto") # Auto mode detection - when DEFAULT_MODEL is "auto", Claude picks the model IS_AUTO_MODE = DEFAULT_MODEL.lower() == "auto" -# Each provider (gemini.py, openai.py, xai.py, dial.py, openrouter.py, custom.py, azure_openai.py) -# defines its own MODEL_CAPABILITIES +# Each provider (gemini.py, openai_provider.py, xai.py) defines its own MODEL_CAPABILITIES # with detailed descriptions. Tools use ModelProviderRegistry.get_available_model_names() # to get models only from enabled providers (those with valid API keys). # @@ -62,7 +61,7 @@ # Thinking Mode Defaults # DEFAULT_THINKING_MODE_THINKDEEP: Default thinking depth for extended reasoning tool # Higher modes use more computational budget but provide deeper analysis -DEFAULT_THINKING_MODE_THINKDEEP = get_env("DEFAULT_THINKING_MODE_THINKDEEP", "high") or "high" +DEFAULT_THINKING_MODE_THINKDEEP = os.getenv("DEFAULT_THINKING_MODE_THINKDEEP", "high") # Consensus Tool Defaults # Consensus timeout and rate limiting settings @@ -118,7 +117,7 @@ def _calculate_mcp_prompt_limit() -> int: Maximum character count for user input prompts """ # Check for Claude's MAX_MCP_OUTPUT_TOKENS environment variable - max_tokens_str = get_env("MAX_MCP_OUTPUT_TOKENS") + max_tokens_str = os.getenv("MAX_MCP_OUTPUT_TOKENS") if max_tokens_str: try: @@ -144,7 +143,7 @@ def _calculate_mcp_prompt_limit() -> int: # Examples: "fr-FR", "en-US", "zh-CN", "zh-TW", "ja-JP", "ko-KR", "es-ES", # "de-DE", "it-IT", "pt-PT" # Leave empty for default language (English) -LOCALE = get_env("LOCALE", "") or "" +LOCALE = os.getenv("LOCALE", "") # Threading configuration # Simple in-memory conversation threading for stateless MCP environment diff --git a/docs/AZURE_OPENAI_TROUBLESHOOTING.md b/docs/AZURE_OPENAI_TROUBLESHOOTING.md new file mode 100644 index 00000000..6de13454 --- /dev/null +++ b/docs/AZURE_OPENAI_TROUBLESHOOTING.md @@ -0,0 +1,700 @@ +# Azure OpenAI Troubleshooting Guide + +This guide provides comprehensive troubleshooting information for Azure OpenAI integration with Zen MCP Server. + +## Implementation Overview + +**IMPORTANT:** This implementation uses Azure OpenAI **Responses API** exclusively. + +### Supported Models +- **GPT-5**: Advanced reasoning with vision support (400K context, 128K output) +- **GPT-5-Codex**: Elite code generation without vision (400K context, 128K output) + +### Critical Constraints (MUST READ) +1. **Responses API Only** - Chat Completions API is NOT implemented +2. **Temperature Fixed at 1.0** - Cannot be changed, returns 400 error if modified +3. **Minimum Output Tokens: 16** - Values below 16 cause 400 errors +4. **API Version: 2025-03-01-preview or later** - Required for Responses API support + +### Key Differences from Standard OpenAI +- Uses Responses API endpoint: `/openai/deployments/{deployment}/responses` +- Different content extraction from response (uses `output_text` or `output` array) +- Supports reasoning tokens (internal thinking process before output) +- No temperature control (hardcoded to 1.0 by Azure) + +--- + +## Common Issues and Solutions + +### 1. Missing Environment Variables + +**Problem:** Azure OpenAI provider not available or returns configuration errors. + +**Solution:** Ensure all required environment variables are set in your `.env` file: + +```bash +# Required Azure OpenAI Configuration +AZURE_OPENAI_API_KEY=your-api-key-here +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex # or gpt-5 +AZURE_OPENAI_API_VERSION=2025-03-01-preview # Must be 2025-03-01-preview or later +``` + +**Verify configuration:** +```bash +# Check if variables are set +grep "AZURE_OPENAI" .env + +# Expected output should show all four variables with values +``` + +--- + +### 2. Invalid API Key + +**Problem:** Authentication errors when making API calls. + +**Error Message:** +``` +401 Unauthorized: Invalid API key provided +``` + +**Solution:** +1. Verify your API key in Azure Portal: + - Go to Azure Portal → Your Azure OpenAI resource + - Navigate to "Keys and Endpoint" + - Copy either KEY 1 or KEY 2 + - Update `AZURE_OPENAI_API_KEY` in `.env` file + +2. Ensure no extra spaces or quotes in the API key: +```bash +# Correct format +AZURE_OPENAI_API_KEY=abcd1234567890... + +# Incorrect format (no quotes needed) +AZURE_OPENAI_API_KEY="abcd1234567890..." +``` + +--- + +### 3. Wrong Endpoint Format + +**Problem:** Connection errors or invalid endpoint errors. + +**Error Message:** +``` +Invalid URL or endpoint format +``` + +**Solution:** Ensure endpoint follows correct format: + +```bash +# Correct format +AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com + +# Incorrect formats (missing https://) +AZURE_OPENAI_ENDPOINT=your-resource-name.openai.azure.com + +# Incorrect formats (trailing slash) +AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com/ +``` + +**Verify endpoint:** +```bash +# Test endpoint connectivity +curl -I https://your-resource-name.openai.azure.com +``` + +--- + +### 4. Old API Version + +**Problem:** Responses API not available or unsupported API version. + +**Error Message:** +``` +API version not supported or Responses API not available +``` + +**Solution:** Update to required API version: + +```bash +# Required version for Responses API +AZURE_OPENAI_API_VERSION=2025-03-01-preview + +# Older versions NOT supported for Responses API +AZURE_OPENAI_API_VERSION=2024-10-21 # Too old +AZURE_OPENAI_API_VERSION=2024-08-01-preview # Too old +``` + +**Note:** The Responses API requires API version `2025-03-01-preview` or later. Earlier versions only support Chat Completions API. + +--- + +### 5. Deployment Name Mismatch + +**Problem:** Deployment not found or model not available. + +**Error Message:** +``` +404 Not Found: The API deployment for this resource does not exist +``` + +**Solution:** +1. Verify deployment name in Azure Portal: + - Go to Azure Portal → Your Azure OpenAI resource + - Navigate to "Model deployments" + - Copy exact deployment name (case-sensitive) + +2. Update deployment name in `.env`: +```bash +# Use exact deployment name from Azure Portal +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex + +# Common deployment names +# AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5 +# AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex +# AZURE_OPENAI_DEPLOYMENT_NAME=my-gpt5-deployment +``` + +**Verify deployment exists:** +```bash +# List deployments using Azure CLI +az cognitiveservices account deployment list \ + --name your-resource-name \ + --resource-group your-resource-group +``` + +--- + +### 6. Temperature Validation Errors + +**Problem:** Invalid temperature value for GPT-5 or GPT-5-Codex. + +**Error Messages:** +``` +400 Bad Request: "Unsupported value. Only the default (1) value is supported" +Temperature must be exactly 1.0 for GPT-5/GPT-5-Codex models +``` + +**Root Cause:** Azure OpenAI reasoning models (GPT-5, GPT-5-Codex) do not support temperature adjustment. + +**Solution:** The server automatically enforces temperature=1.0: + +```python +# Temperature is automatically set to 1.0 for all GPT-5 models +# This is handled internally by the provider +temperature = 1.0 # Cannot be changed + +# Do NOT attempt to override in prompts or configuration +# The Azure API will reject any temperature value other than 1.0 +``` + +**Community Reports:** +- Multiple users have confirmed this constraint in OpenAI forums +- Error message: "Unsupported value … Only the default (1) value is supported" +- Applies to both GPT-5 and GPT-5-Codex deployments + +**Note:** This is an Azure OpenAI platform requirement for reasoning models, not a server limitation. + +--- + +### 7. Rate Limiting + +**Problem:** Too many requests or quota exceeded. + +**Error Message:** +``` +429 Too Many Requests: Rate limit exceeded +403 Forbidden: Quota exceeded +``` + +**Solution:** +1. Check your Azure quota: + - Go to Azure Portal → Your Azure OpenAI resource + - Navigate to "Quotas" + - Verify Tokens Per Minute (TPM) limit + +2. Implement retry logic (already built-in): + - The server automatically retries with exponential backoff + - Wait a few moments between requests + +3. Request quota increase: + - Contact Azure support to increase your TPM quota + - Upgrade to higher tier if available + +--- + +## Responses API Specific Issues + +### Understanding Responses API + +**Key Differences from Chat Completions API:** + +1. **Endpoint URL:** + ```bash + # Responses API (what we use) + POST https://{resource}.openai.azure.com/openai/deployments/{deployment}/responses + + # Chat Completions API (NOT used) + POST https://{resource}.openai.azure.com/openai/deployments/{deployment}/chat/completions + ``` + +2. **Content Extraction:** + ```python + # Responses API - two possible formats + # Format 1: output_text field + content = response_data.get("output_text", "") + + # Format 2: output array + output = response_data.get("output", []) + if output and len(output) > 0: + content = output[0].get("content", "") + ``` + +3. **Required Models:** + - GPT-5-Codex: **Requires** Responses API + - GPT-5: Works with Responses API + +### Responses API Error Handling + +**Problem:** Empty or missing response content. + +**Solution:** The implementation handles multiple content extraction methods: + +```python +# Check multiple possible response formats +# 1. Try output_text field +# 2. Try output array +# 3. Try choices array (fallback) +# 4. Return error if none found +``` + +If you see empty responses, check server logs: +```bash +tail -n 100 logs/mcp_server.log | grep "Azure OpenAI" +``` + +--- + +## Configuration Validation + +### Verify Azure Credentials + +**Step 1: Check environment variables** +```bash +# View current configuration +grep "AZURE_OPENAI" .env + +# Expected output: +# AZURE_OPENAI_API_KEY=sk-... +# AZURE_OPENAI_ENDPOINT=https://... +# AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex +# AZURE_OPENAI_API_VERSION=2025-03-01-preview +``` + +**Step 2: Test API key validity** +```bash +# Using curl to test authentication +curl -X POST "https://your-resource.openai.azure.com/openai/deployments/gpt-5-codex/responses?api-version=2025-03-01-preview" \ + -H "api-key: your-api-key-here" \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [{"role": "user", "content": "test"}], + "temperature": 1.0 + }' +``` + +### Test Endpoint Connectivity + +**Check DNS resolution:** +```bash +# Verify endpoint resolves +nslookup your-resource.openai.azure.com +``` + +**Check network connectivity:** +```bash +# Test HTTPS connection +curl -I https://your-resource.openai.azure.com +``` + +**Expected response:** +``` +HTTP/2 401 +# 401 is expected without API key - confirms endpoint is reachable +``` + +### Verify Deployment Exists + +**Using Azure CLI:** +```bash +# List all deployments +az cognitiveservices account deployment list \ + --name your-resource-name \ + --resource-group your-resource-group \ + --query "[].{name:name, model:properties.model.name}" \ + --output table + +# Expected output: +# Name Model +# ---------------- ------------- +# gpt-5-codex gpt-5-codex +``` + +**Using Azure Portal:** +1. Navigate to your Azure OpenAI resource +2. Click "Model deployments" +3. Verify deployment name and model + +### Verify API Version Support + +**Check supported API versions:** +```bash +# List available API versions for your resource +az cognitiveservices account show \ + --name your-resource-name \ + --resource-group your-resource-group \ + --query "properties.capabilities" +``` + +**Ensure using latest version:** +- API version must be `2025-03-01-preview` or later +- Older versions do not support Responses API + +### 8. Minimum Output Token Errors + +**Problem:** Request fails with invalid max_output_tokens value. + +**Error Message:** +``` +400 Bad Request: max_output_tokens must be at least 16 +Invalid parameter: max_output_tokens < 16 +``` + +**Root Cause:** GPT-5/GPT-5-Codex reasoning models require minimum 16 output tokens. + +**Solution:** The server enforces this minimum automatically: + +```python +# Server automatically enforces minimum +max_output_tokens = max(16, requested_tokens) + +# If you see this error, check: +# 1. Server logs for the actual value being sent +# 2. Ensure you're not manually setting a lower value +``` + +**Official Documentation Note:** +- OpenAI/Azure docs specify this for reasoning models +- Applies to both Responses API and Chat Completions (where available) +- Parameter name varies: `max_output_tokens` (Responses) vs `max_completion_tokens` (Chat) + +--- + +## Common Error Messages + +### Authentication Errors + +**Error:** `401 Unauthorized` +```json +{ + "error": { + "code": "401", + "message": "Access denied due to invalid subscription key or wrong API endpoint." + } +} +``` + +**Solutions:** +1. Verify `AZURE_OPENAI_API_KEY` is correct +2. Check API key is not expired +3. Ensure using correct endpoint +4. Regenerate API key if needed + +--- + +### API Version Errors + +**Error:** `API version not supported` +```json +{ + "error": { + "code": "InvalidApiVersion", + "message": "The requested API version is not supported." + } +} +``` + +**Solutions:** +1. Update `AZURE_OPENAI_API_VERSION=2025-03-01-preview` +2. Verify your resource supports this API version +3. Check Azure region availability + +--- + +### Deployment Not Found Errors + +**Error:** `404 Not Found` +```json +{ + "error": { + "code": "DeploymentNotFound", + "message": "The API deployment for this resource does not exist." + } +} +``` + +**Solutions:** +1. Verify deployment name is correct (case-sensitive) +2. Check deployment exists in Azure Portal +3. Ensure deployment is in "Succeeded" state +4. Verify using correct resource/endpoint + +--- + +### Temperature Constraint Errors + +**Error:** `Invalid temperature value` +```json +{ + "error": { + "code": "InvalidParameter", + "message": "Temperature must be exactly 1.0 for GPT-5-Codex model." + } +} +``` + +**Solutions:** +- This is enforced by Azure for GPT-5-Codex +- The implementation automatically sets temperature=1.0 +- If you see this error, check server logs for configuration issues + +--- + +### Content Extraction Errors + +**Error:** Empty response or missing content + +**Symptoms:** +- Tool returns empty string +- No visible output from model +- Logs show successful API call but no content + +**Solutions:** +1. Check server logs for response format: +```bash +tail -n 200 logs/mcp_server.log | grep "Azure OpenAI response" +``` + +2. Verify Responses API is being used (not Chat Completions): +```bash +grep "responses?" logs/mcp_server.log +``` + +3. Check for multiple content extraction attempts in logs + +--- + +## Testing and Validation + +### Run Integration Tests + +**Test Azure OpenAI provider:** +```bash +# Run integration tests (requires API keys) +./run_integration_tests.sh + +# Run specific Azure OpenAI tests +python -m pytest tests/ -v -k "azure" -m integration +``` + +**Expected output:** +``` +tests/test_azure_openai_integration.py::test_azure_provider_registration PASSED +tests/test_azure_openai_integration.py::test_azure_api_call PASSED +tests/test_azure_openai_integration.py::test_azure_responses_api PASSED +``` + +### Check Server Logs + +**View recent Azure activity:** +```bash +# Filter for Azure OpenAI logs +tail -n 500 logs/mcp_server.log | grep -i "azure" + +# View tool activity +tail -n 100 logs/mcp_activity.log + +# Follow logs in real-time +tail -f logs/mcp_server.log +``` + +**Look for:** +- Provider registration confirmation +- API call attempts +- Response format handling +- Error messages + +### Verify Provider Registration + +**Check provider availability:** +```bash +# Start server and check logs +./run-server.sh + +# Look for registration message +grep "Azure OpenAI provider registered" logs/mcp_server.log +``` + +**Expected log entry:** +``` +INFO: Azure OpenAI provider registered successfully +INFO: Deployment: gpt-5-codex +INFO: API Version: 2025-03-01-preview +``` + +### Manual API Testing + +**Test Responses API directly:** +```bash +# Create test script +cat > test_azure.sh << 'EOF' +#!/bin/bash +source .env + +curl -X POST "${AZURE_OPENAI_ENDPOINT}/openai/deployments/${AZURE_OPENAI_DEPLOYMENT_NAME}/responses?api-version=${AZURE_OPENAI_API_VERSION}" \ + -H "api-key: ${AZURE_OPENAI_API_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [ + {"role": "user", "content": "Say hello in one word"} + ], + "temperature": 1.0, + "max_tokens": 10 + }' +EOF + +chmod +x test_azure.sh +./test_azure.sh +``` + +**Expected response:** +```json +{ + "output_text": "Hello", + "usage": { + "prompt_tokens": 12, + "completion_tokens": 1, + "total_tokens": 13 + } +} +``` + +--- + +## Advanced Troubleshooting + +### Enable Debug Logging + +**Increase log verbosity:** +```bash +# Set debug level in environment +export LOG_LEVEL=DEBUG + +# Restart server +./run-server.sh + +# View detailed logs +tail -f logs/mcp_server.log +``` + +### Network Diagnostics + +**Check firewall rules:** +```bash +# Test connectivity to Azure endpoint +telnet your-resource.openai.azure.com 443 + +# Check SSL certificate +openssl s_client -connect your-resource.openai.azure.com:443 +``` + +**Verify DNS:** +```bash +# Check DNS resolution +dig your-resource.openai.azure.com + +# Alternative DNS check +host your-resource.openai.azure.com +``` + +### Analyze Request/Response + +**Enable request logging:** +```python +# In providers/azure_openai_provider.py +# Temporarily add debug prints to see full request/response + +logger.debug(f"Request URL: {url}") +logger.debug(f"Request headers: {headers}") +logger.debug(f"Request body: {json.dumps(payload, indent=2)}") +logger.debug(f"Response status: {response.status_code}") +logger.debug(f"Response body: {response.text}") +``` + +**Check cassette recordings:** +```bash +# View recorded API interactions +ls -la tests/cassettes/azure_*.yaml +``` + +--- + +## Additional Resources + +### Azure Documentation + +- [Azure OpenAI Service Documentation](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/) +- [Responses API Reference](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/reference#responses-api) +- [GPT-5-Codex Model Details](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models#gpt-5-codex) +- [API Version Support](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/api-version-deprecation) + +### Project Documentation + +- Main README: `README.md` +- Development Guide: `CLAUDE.md` +- Integration Tests: `tests/test_azure_openai_integration.py` +- Provider Implementation: `providers/azure_openai.py` + +### Getting Help + +1. **Check server logs:** `tail -n 500 logs/mcp_server.log` +2. **Run diagnostics:** `./run_integration_tests.sh` +3. **Review Azure Portal:** Verify configuration and quotas +4. **Contact Azure Support:** For Azure-specific issues +5. **GitHub Issues:** Report bugs or request features + +--- + +## Summary Checklist + +Before opening an issue, verify: + +- [ ] All environment variables set correctly in `.env` +- [ ] API key is valid and not expired +- [ ] Endpoint format is correct (https://...) +- [ ] API version is `2025-03-01-preview` or later +- [ ] Deployment name matches Azure Portal exactly +- [ ] Deployment is in "Succeeded" state +- [ ] Quota/rate limits not exceeded +- [ ] Network connectivity to Azure endpoint +- [ ] Server logs checked for specific errors +- [ ] Integration tests run successfully + +--- + +**Last Updated:** 2025-10-03 +**API Version Required:** 2025-03-01-preview or later +**Supported Models:** GPT-5, GPT-5-Codex +**Implementation:** Azure OpenAI Responses API diff --git a/docs/advanced-usage.md b/docs/advanced-usage.md index 58b99d7a..04d115f4 100644 --- a/docs/advanced-usage.md +++ b/docs/advanced-usage.md @@ -41,7 +41,8 @@ Regardless of your default configuration, you can specify models per request: | **`o3-mini`** | OpenAI | 200K tokens | Balanced speed/quality | Moderate complexity tasks | | **`o4-mini`** | OpenAI | 200K tokens | Latest reasoning model | Optimized for shorter contexts | | **`gpt4.1`** | OpenAI | 1M tokens | Latest GPT-4 with extended context | Large codebase analysis, comprehensive reviews | -| **`gpt5`** (GPT-5) | OpenAI | 400K tokens | Advanced model with reasoning support | Complex problems requiring advanced reasoning | +| **`gpt-5`** | Azure OpenAI | 400K tokens | Advanced reasoning model with vision support (Responses API only, temp=1.0) | Complex analysis, architectural design, image understanding | +| **`gpt-5-codex`** | Azure OpenAI | 400K tokens | Elite code generation, no vision (Responses API only, temp=1.0) | Code generation, refactoring, technical documentation | | **`gpt5-mini`** (GPT-5 Mini) | OpenAI | 400K tokens | Efficient variant with reasoning | Balanced performance and capability | | **`gpt5-nano`** (GPT-5 Nano) | OpenAI | 400K tokens | Fastest, cheapest GPT-5 variant | Summarization and classification tasks | | **`grok-4`** | X.AI | 256K tokens | Latest flagship Grok model with reasoning, vision | Complex analysis, reasoning tasks | @@ -61,8 +62,11 @@ cloud models (expensive/powerful) AND local models (free/private) in the same co - **Flash Lite 2.0**: Text-only lightweight model (no thinking support) - **O3/O4 Models**: Excellent reasoning, systematic analysis, 200K context - **GPT-4.1**: Extended context window (1M tokens), general capabilities -- **GPT-5 Series**: Advanced reasoning models, 400K context - - **GPT-5**: Full-featured with reasoning support and vision +- **Azure GPT-5 Series**: Advanced reasoning models via Responses API, 400K context, 128K output + - **GPT-5**: Full reasoning with vision support (temperature fixed at 1.0, min 16 output tokens) + - **GPT-5-Codex**: Elite code generation without vision (temperature fixed at 1.0, min 16 output tokens) + - Note: Both use Azure's Responses API exclusively, Chat Completions API not implemented +- **GPT-5 Variants** (if available via OpenAI directly): - **GPT-5 Mini**: Balanced efficiency and capability - **GPT-5 Nano**: Optimized for fast, low-cost tasks - **Grok-4**: Extended thinking support, vision capabilities, 256K context diff --git a/docs/azure-gpt5-guide.md b/docs/azure-gpt5-guide.md new file mode 100644 index 00000000..58ad7396 --- /dev/null +++ b/docs/azure-gpt5-guide.md @@ -0,0 +1,372 @@ +# Azure OpenAI GPT-5 and GPT-5-Codex Complete Guide + +This comprehensive guide covers everything you need to know about using Azure OpenAI's GPT-5 and GPT-5-Codex models with the Zen MCP Server. + +## Table of Contents + +- [Overview](#overview) +- [Model Comparison](#model-comparison) +- [Critical Requirements](#critical-requirements) +- [Setup Walkthrough](#setup-walkthrough) +- [Technical Constraints](#technical-constraints) +- [Best Practices](#best-practices) +- [Cost Optimization](#cost-optimization) +- [Integration Patterns](#integration-patterns) +- [Troubleshooting](#troubleshooting) +- [FAQ](#frequently-asked-questions) + +## Overview + +Azure OpenAI provides access to two powerful GPT-5 models through the **Responses API** (not Chat Completions API): + +- **GPT-5**: Advanced reasoning model with vision support for general-purpose tasks +- **GPT-5-Codex**: Elite code generation model specialized for programming tasks + +Both models offer: +- 400K token context window (2x larger than O3's 200K) +- 128K token max output (4x larger than GPT-4.1's 32K) +- Extended reasoning capabilities with internal "reasoning tokens" +- Enterprise-grade security and compliance through Azure + +## Model Comparison + +| Feature | GPT-5 | GPT-5-Codex | +|---------|-------|-------------| +| **Context Window** | 400,000 tokens | 400,000 tokens | +| **Max Output** | 128,000 tokens | 128,000 tokens | +| **Vision Support** | ✅ Yes | ❌ No | +| **Code Specialization** | Good | Elite | +| **Intelligence Score** | 16 | 17 | +| **Temperature** | Fixed at 1.0 | Fixed at 1.0 | +| **Min Output Tokens** | 16 | 16 | +| **API Type** | Responses API only | Responses API only | +| **Best For** | General reasoning, architecture design, image analysis | Code generation, refactoring, technical documentation | + +## Critical Requirements + +### ⚠️ MUST READ - These are hard requirements, not recommendations: + +1. **Responses API Only** + - Chat Completions API is NOT implemented for these models + - Endpoint: `/openai/deployments/{deployment}/responses` + - Different response format than standard OpenAI + +2. **Temperature Constraint** + - MUST be exactly 1.0 + - Cannot be changed via API or configuration + - Error if different: `"Unsupported value. Only the default (1) value is supported"` + +3. **Minimum Output Tokens** + - Must be at least 16 tokens + - Parameter: `max_output_tokens` (not `max_completion_tokens`) + - Error if less: `400 Bad Request: max_output_tokens must be at least 16` + +4. **API Version Requirement** + - Must use `2025-03-01-preview` or later + - Earlier versions don't support Responses API + - Recommended: `2025-04-01-preview` + +## Setup Walkthrough + +### Step 1: Azure Portal Setup + +1. **Create Azure OpenAI Resource** + ``` + Portal → Create Resource → Search "Azure OpenAI" → Create + - Resource Group: Select or create new + - Region: Choose supported region (e.g., East US, West Europe) + - Pricing Tier: Standard S0 + - Name: Your unique resource name + ``` + +2. **Request Model Access** + ``` + Resource → Model deployments → Request access + - Select GPT-5 and/or GPT-5-Codex + - Provide use case justification + - Wait for approval (usually 24-48 hours) + ``` + +3. **Deploy the Model** + ``` + Resource → Model deployments → Create new deployment + - Model: gpt-5 or gpt-5-codex + - Deployment name: Choose a name (e.g., "gpt5-prod") + - Version: Select latest available + - Capacity (TPM): Set based on needs (e.g., 120K) + ``` + +4. **Get Credentials** + ``` + Resource → Keys and Endpoint + - Copy KEY 1 or KEY 2 + - Copy Endpoint URL + - Note the deployment name from step 3 + ``` + +### Step 2: Configure Zen MCP Server + +1. **Edit `.env` file:** + ```env + # Azure OpenAI Configuration (all 4 required) + AZURE_OPENAI_API_KEY=your_key_from_step_4 + AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ + AZURE_OPENAI_API_VERSION=2025-04-01-preview + AZURE_OPENAI_DEPLOYMENT_NAME=gpt5-prod # Your deployment name from step 3 + + # Set as default model (optional) + DEFAULT_MODEL=gpt-5 # or gpt-5-codex + ``` + +2. **Verify Configuration:** + ```bash + # Check environment variables + grep "AZURE_OPENAI" .env + + # Start server + ./run-server.sh + + # Check logs for successful registration + tail -f logs/mcp_server.log | grep "Azure" + ``` + +### Step 3: Test the Setup + +In Claude, test with: +``` +"Use gpt-5 to explain the architecture of this codebase" +"Use gpt-5-codex to refactor this function for better performance" +``` + +## Technical Constraints + +### Responses API Format + +The Responses API uses a different format than Chat Completions: + +**Request Structure:** +```json +{ + "messages": [ + {"role": "system", "content": "..."}, + {"role": "user", "content": "..."} + ], + "temperature": 1.0, // Must be exactly 1.0 + "max_output_tokens": 4096 // Minimum 16 +} +``` + +**Response Structure:** +```json +{ + "output_text": "Generated response here...", + "usage": { + "prompt_tokens": 150, + "completion_tokens": 500, + "total_tokens": 650, + "reasoning_tokens": 2000 // Internal thinking tokens (not visible) + } +} +``` + +### Reasoning Tokens + +GPT-5 models use internal "reasoning tokens" that: +- Are not visible in the response +- Contribute to processing time +- Enable advanced problem-solving +- Don't count against output token limits +- Can be substantial (thousands of tokens) + +### Error Handling + +Common errors and their solutions: + +| Error Code | Message | Solution | +|------------|---------|----------| +| 400 | "Unsupported value" for temperature | Server auto-sets to 1.0, check logs | +| 400 | "max_output_tokens must be at least 16" | Increase to minimum 16 | +| 401 | "Unauthorized" | Check API key validity | +| 404 | "Deployment not found" | Verify deployment name | +| 429 | "Rate limit exceeded" | Check TPM quota in Azure | + +## Best Practices + +### When to Use GPT-5 vs GPT-5-Codex + +**Use GPT-5 when you need:** +- General reasoning and analysis +- Architecture design and planning +- Image/diagram understanding +- Business logic and documentation +- Multi-domain problem solving + +**Use GPT-5-Codex when you need:** +- Code generation and completion +- Refactoring and optimization +- Technical documentation +- Bug fixing and debugging +- API design and implementation + +### Optimal Workflows + +1. **Architecture Review with Images:** + ``` + "Use gpt-5 to analyze this architecture diagram and identify bottlenecks" + ``` + +2. **Code Generation Pipeline:** + ``` + "Use gpt-5 to design the API structure, then use gpt-5-codex to implement it" + ``` + +3. **Comprehensive Code Review:** + ``` + "Use gpt-5-codex for code quality review, then gpt-5 for architectural implications" + ``` + +### Token Management + +With 400K context and 128K output capacity: + +- **Batch Processing**: Process entire codebases in single requests +- **Detailed Analysis**: Request comprehensive reports without truncation +- **Context Preservation**: Include extensive history and documentation +- **Full Implementations**: Generate complete modules, not just snippets + +## Cost Optimization + +### Understanding Pricing + +Azure OpenAI charges per 1K tokens: +- Input tokens (prompt) +- Output tokens (completion) +- Reasoning tokens (internal, may be charged separately) + +### Optimization Strategies + +1. **Use GPT-5-Codex only for code tasks** + - Higher intelligence score but same cost + - Specialized for programming + +2. **Leverage the large context window** + - Batch multiple questions in one request + - Include all relevant context upfront + +3. **Monitor token usage** + ```bash + # Check logs for token consumption + grep "usage" logs/mcp_server.log | tail -20 + ``` + +4. **Set appropriate TPM limits in Azure** + - Prevents unexpected costs + - Ensures predictable billing + +## Integration Patterns + +### Multi-Model Workflows + +**Pattern 1: Design → Implement → Review** +``` +"Use gpt-5 to design the system architecture, +then gpt-5-codex to implement the core modules, +finally gemini pro to review the implementation" +``` + +**Pattern 2: Vision → Code → Test** +``` +"Use gpt-5 to analyze this UI mockup image, +then gpt-5-codex to generate the React components, +finally o3 to create comprehensive tests" +``` + +### Conversation Threading + +The Zen MCP Server maintains context across model switches: + +```python +# Step 1: GPT-5 analyzes requirements +"Use gpt-5 to analyze these requirements and create a technical spec" + +# Step 2: GPT-5-Codex implements (knows about step 1) +"Now use gpt-5-codex to implement based on the spec" + +# Step 3: Review with another model (knows about steps 1 & 2) +"Use gemini pro to review if the implementation matches the spec" +``` + +## Troubleshooting + +### Quick Diagnostic Checklist + +- [ ] All 4 Azure environment variables set? +- [ ] API version is 2025-03-01-preview or later? +- [ ] Deployment name matches exactly (case-sensitive)? +- [ ] Endpoint includes `https://` prefix? +- [ ] API key is valid and not expired? +- [ ] Model deployment is in "Succeeded" state? +- [ ] Quota/TPM limits not exceeded? + +### Debug Commands + +```bash +# Test Azure connectivity +curl -I https://your-resource.openai.azure.com + +# Check provider registration +grep "Azure OpenAI provider registered" logs/mcp_server.log + +# Monitor API calls +tail -f logs/mcp_server.log | grep -E "(Azure|GPT-5|Responses)" + +# View token usage +grep "reasoning_tokens" logs/mcp_server.log +``` + +### Getting Help + +1. Check [AZURE_OPENAI_TROUBLESHOOTING.md](AZURE_OPENAI_TROUBLESHOOTING.md) +2. Review server logs: `logs/mcp_server.log` +3. Verify Azure Portal settings +4. Contact Azure support for quota/access issues + +## Frequently Asked Questions + +**Q: Can I change the temperature for GPT-5 models?** +A: No, temperature is fixed at 1.0 for all Azure GPT-5 models. This is an Azure platform constraint. + +**Q: Why use Responses API instead of Chat Completions?** +A: GPT-5-Codex requires Responses API. For consistency, both models use the same API. + +**Q: Can I use multiple GPT-5 deployments?** +A: Yes, but only one at a time. Change `AZURE_OPENAI_DEPLOYMENT_NAME` to switch. + +**Q: How do reasoning tokens affect billing?** +A: Reasoning tokens are internal and may be billed separately. Check Azure pricing documentation. + +**Q: Can I use GPT-5 for image generation?** +A: No, GPT-5 supports image analysis (input) but not generation (output). + +**Q: What's the difference in response time?** +A: GPT-5-Codex is optimized for code and may be faster for programming tasks. GPT-5 may take longer due to broader reasoning. + +**Q: Can I use streaming with GPT-5?** +A: Final output can stream, but reasoning tokens are processed internally first. + +**Q: Is GPT-5-Codex better than GPT-5 for all code tasks?** +A: Generally yes, but GPT-5 might be better for high-level architecture and design discussions. + +## Summary + +Azure OpenAI's GPT-5 and GPT-5-Codex models offer unprecedented capabilities with their 400K context and 128K output capacity. By understanding their constraints (Responses API only, temperature=1.0, min 16 tokens) and following best practices, you can leverage these powerful models effectively in your workflows. + +Key takeaways: +- Always use Responses API, not Chat Completions +- Temperature is fixed at 1.0 (non-negotiable) +- Minimum 16 output tokens required +- GPT-5 for general + vision, GPT-5-Codex for code +- Leverage the massive context window for comprehensive analysis +- Combine with other models for optimal workflows + +For additional help, refer to the [main documentation](index.md) or [troubleshooting guide](AZURE_OPENAI_TROUBLESHOOTING.md). \ No newline at end of file diff --git a/docs/configuration.md b/docs/configuration.md index a27ce52a..16b8c0dd 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -30,10 +30,17 @@ OPENAI_API_KEY=your-openai-key GEMINI_API_KEY=your_gemini_api_key_here # Get from: https://makersuite.google.com/app/apikey -# OpenAI API +# OpenAI API OPENAI_API_KEY=your_openai_api_key_here # Get from: https://platform.openai.com/api-keys +# Azure OpenAI API (Responses API - supports GPT-5 and GPT-5-Codex) +AZURE_OPENAI_API_KEY=your_azure_openai_api_key_here +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ +AZURE_OPENAI_API_VERSION=2025-04-01-preview +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex +# Get from: https://portal.azure.com/ (Keys and Endpoint section) + # X.AI GROK API XAI_API_KEY=your_xai_api_key_here # Get from: https://console.x.ai/ @@ -59,35 +66,163 @@ CUSTOM_MODEL_NAME=llama3.2 # Default model - Use standard localhost URLs since the server runs natively - Example: `http://localhost:11434/v1` for Ollama -### Model Configuration +### Azure OpenAI GPT-5 Models Configuration + +Azure OpenAI integration uses the **Responses API** exclusively for GPT-5 and GPT-5-Codex models. These models have specific requirements and constraints that differ from standard OpenAI models. + +#### Critical Requirements for GPT-5/GPT-5-Codex + +| Requirement | Details | Error if Violated | +|------------|---------|-------------------| +| **API Type** | Responses API ONLY | Chat Completions API not implemented | +| **Temperature** | Must be exactly 1.0 | 400 Error: "Unsupported value. Only the default (1) value is supported" | +| **Min Output Tokens** | Minimum 16 tokens | 400 Error if max_output_tokens < 16 | +| **API Version** | 2025-03-01-preview or later | Responses API not available in older versions | + +#### Setup Steps + +1. **Create Azure OpenAI Resource:** + - Navigate to [Azure Portal](https://portal.azure.com/) + - Create or select an Azure OpenAI resource + - Request access to GPT-5 or GPT-5-Codex models if needed + +2. **Deploy the Model:** + - Go to "Model deployments" in your Azure OpenAI resource + - Click "Create new deployment" + - Select either `gpt-5` or `gpt-5-codex` as the model + - Choose a deployment name (e.g., `gpt-5-production`) + - Set capacity (TPM - Tokens Per Minute) + +3. **Get Credentials:** + - Navigate to "Keys and Endpoint" section + - Copy either KEY 1 or KEY 2 + - Copy the Endpoint URL + +4. **Configure Environment Variables:** + ```env + # All 4 variables are REQUIRED + AZURE_OPENAI_API_KEY=your_api_key_from_azure + AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ + AZURE_OPENAI_API_VERSION=2025-04-01-preview # Minimum version for Responses API + AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5 # Your deployment name from Azure Portal + ``` + +#### Model Comparison + +| Model | Context Window | Max Output | Vision Support | Code Specialization | Intelligence Score | Use Case | +|-------|---------------|------------|----------------|--------------------|--------------------|----------| +| **gpt-5** | 400K tokens | 128K tokens | ✅ Yes | Good | 16 | General purpose reasoning, complex analysis with images | +| **gpt-5-codex** | 400K tokens | 128K tokens | ❌ No | Elite | 17 | Code generation, refactoring, technical documentation | + +#### Key Technical Details + +**Responses API Specifics:** +- Endpoint: `/openai/deployments/{deployment}/responses` +- Different response format than Chat Completions API +- Supports reasoning tokens (internal thinking process) +- Response extraction from `output_text` or `output` array fields + +**Reasoning Tokens:** +- GPT-5 models use internal "reasoning tokens" before generating output +- These tokens are not visible in the response but affect processing time +- Contributes to the model's advanced problem-solving capabilities + +**Constraints and Limitations:** +```python +# Temperature MUST be 1.0 +temperature = 1.0 # Cannot be changed + +# Minimum output tokens +max_output_tokens = max(16, requested_tokens) # Enforced minimum of 16 + +# API Version requirement +api_version = "2025-04-01-preview" # Or later versions +``` -**Default Model Selection:** +#### Example Configurations + +**GPT-5 General Purpose:** ```env -# Options: 'auto', 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', etc. -DEFAULT_MODEL=auto # Claude picks best model for each task (recommended) +AZURE_OPENAI_API_KEY=sk-proj-abc123... +AZURE_OPENAI_ENDPOINT=https://contoso-ai.openai.azure.com/ +AZURE_OPENAI_API_VERSION=2025-04-01-preview +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-general +DEFAULT_MODEL=gpt-5 # Use GPT-5 as default +``` + +**GPT-5-Codex for Development:** +```env +AZURE_OPENAI_API_KEY=sk-proj-xyz789... +AZURE_OPENAI_ENDPOINT=https://dev-team.openai.azure.com/ +AZURE_OPENAI_API_VERSION=2025-04-01-preview +AZURE_OPENAI_DEPLOYMENT_NAME=codex-production +DEFAULT_MODEL=gpt-5-codex # Use Codex as default +``` + +**Multi-Model Setup:** +```env +# You can only have ONE deployment active at a time +# To switch models, change the AZURE_OPENAI_DEPLOYMENT_NAME +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5 # or gpt-5-codex ``` -- **Available Models:** The canonical capability data for native providers lives in JSON manifests under `conf/`: - - `conf/openai_models.json` – OpenAI catalogue (can be overridden with `OPENAI_MODELS_CONFIG_PATH`) - - `conf/gemini_models.json` – Gemini catalogue (`GEMINI_MODELS_CONFIG_PATH`) - - `conf/xai_models.json` – X.AI / GROK catalogue (`XAI_MODELS_CONFIG_PATH`) - - `conf/openrouter_models.json` – OpenRouter catalogue (`OPENROUTER_MODELS_CONFIG_PATH`) - - `conf/dial_models.json` – DIAL aggregation catalogue (`DIAL_MODELS_CONFIG_PATH`) - - `conf/custom_models.json` – Custom/OpenAI-compatible endpoints (`CUSTOM_MODELS_CONFIG_PATH`) +#### Common Configuration Errors + +1. **Wrong API Version:** + ```env + # ❌ WRONG - Too old for Responses API + AZURE_OPENAI_API_VERSION=2024-10-01-preview + + # ✅ CORRECT + AZURE_OPENAI_API_VERSION=2025-04-01-preview + ``` + +2. **Incorrect Endpoint Format:** + ```env + # ❌ WRONG - Missing https:// + AZURE_OPENAI_ENDPOINT=your-resource.openai.azure.com/ - Each JSON file documents the allowed fields via its `_README` block and controls model aliases, capability limits, and feature flags. Edit these files (or point the matching `*_MODELS_CONFIG_PATH` variable to your own copy) when you want to adjust context windows, enable JSON mode, or expose additional aliases without touching Python code. + # ✅ CORRECT + AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ + ``` - The shipped defaults cover: +3. **Temperature Modification Attempts:** + ```python + # ❌ This will cause a 400 error + # The server enforces temperature=1.0 automatically + # Do NOT try to override it in your prompts + ``` - | Provider | Canonical Models | Notable Aliases | - |----------|-----------------|-----------------| - | OpenAI | `gpt-5`, `gpt-5-pro`, `gpt-5-mini`, `gpt-5-nano`, `gpt-5-codex`, `gpt-4.1`, `o3`, `o3-mini`, `o3-pro`, `o4-mini` | `gpt5`, `gpt5pro`, `mini`, `nano`, `codex`, `o3mini`, `o3pro`, `o4mini` | - | Gemini | `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-2.0-flash`, `gemini-2.0-flash-lite` | `pro`, `gemini-pro`, `flash`, `flash-2.0`, `flashlite` | - | X.AI | `grok-4`, `grok-3`, `grok-3-fast` | `grok`, `grok4`, `grok3`, `grok3fast`, `grokfast` | - | OpenRouter | See `conf/openrouter_models.json` for the continually evolving catalogue | e.g., `opus`, `sonnet`, `flash`, `pro`, `mistral` | - | Custom | User-managed entries such as `llama3.2` | Define your own aliases per entry | +#### Troubleshooting Quick Reference - > **Tip:** Copy the JSON file you need, customise it, and point the corresponding `*_MODELS_CONFIG_PATH` environment variable to your version. This lets you enable or disable capabilities (JSON mode, function calling, temperature support) without editing Python. +| Error | Cause | Solution | +|-------|-------|----------| +| 400 "Unsupported value" | Temperature ≠ 1.0 | Server auto-sets to 1.0, check logs | +| 400 "Invalid max_output_tokens" | Value < 16 | Minimum is 16 tokens | +| 404 "Deployment not found" | Wrong deployment name | Verify in Azure Portal | +| 401 "Unauthorized" | Invalid API key | Regenerate key in Azure Portal | +| "Responses API not available" | Old API version | Use 2025-03-01-preview or later | + +### Model Configuration + +**Default Model Selection:** +```env +# Options: 'auto', 'pro', 'flash', 'o3', 'o3-mini', 'o4-mini', etc. +DEFAULT_MODEL=auto # Claude picks best model for each task (recommended) +``` + +**Available Models:** +- **`auto`**: Claude automatically selects the optimal model +- **`pro`** (Gemini 2.5 Pro): Extended thinking, deep analysis +- **`flash`** (Gemini 2.0 Flash): Ultra-fast responses +- **`o3`**: Strong logical reasoning (200K context) +- **`o3-mini`**: Balanced speed/quality (200K context) +- **`o4-mini`**: Latest reasoning model, optimized for shorter contexts +- **`gpt-5`**: Azure OpenAI GPT-5 via Responses API (400K context, 128K output) +- **`gpt-5-codex`**: Azure OpenAI GPT-5-Codex specialized for code (400K context, 128K output) +- **`grok-3`**: GROK-3 advanced reasoning (131K context) +- **`grok-4-latest`**: GROK-4 latest flagship model (256K context) +- **Custom models**: via OpenRouter or local APIs ### Thinking Mode Configuration @@ -119,17 +254,34 @@ OPENAI_ALLOWED_MODELS=o3-mini,o4-mini,mini GOOGLE_ALLOWED_MODELS=flash,pro # X.AI GROK model restrictions -XAI_ALLOWED_MODELS=grok-3,grok-3-fast,grok-4 +XAI_ALLOWED_MODELS=grok-3,grok-3-fast,grok-4-latest # OpenRouter model restrictions (affects models via custom provider) OPENROUTER_ALLOWED_MODELS=opus,sonnet,mistral ``` -**Supported Model Names:** The names/aliases listed in the JSON manifests above are the authoritative source. Keep in mind: - -- Aliases are case-insensitive and defined per entry (for example, `mini` maps to `gpt-5-mini` by default, while `flash` maps to `gemini-2.5-flash`). -- When you override the manifest files you can add or remove aliases as needed; restriction policies (`*_ALLOWED_MODELS`) automatically pick up those changes. -- Models omitted from a manifest fall back to generic capability detection (where supported) and may have limited feature metadata. +**Supported Model Names:** + +**OpenAI Models:** +- `o3` (200K context, high reasoning) +- `o3-mini` (200K context, balanced) +- `o4-mini` (200K context, latest balanced) +- `mini` (shorthand for o4-mini) + +**Gemini Models:** +- `gemini-2.5-flash` (1M context, fast) +- `gemini-2.5-pro` (1M context, powerful) +- `flash` (shorthand for Flash model) +- `pro` (shorthand for Pro model) + +**X.AI GROK Models:** +- `grok-4-latest` (256K context, latest flagship model with reasoning, vision, and structured outputs) +- `grok-3` (131K context, advanced reasoning) +- `grok-3-fast` (131K context, higher performance) +- `grok` (shorthand for grok-4-latest) +- `grok4` (shorthand for grok-4-latest) +- `grok3` (shorthand for grok-3) +- `grokfast` (shorthand for grok-3-fast) **Example Configurations:** ```env @@ -148,15 +300,10 @@ XAI_ALLOWED_MODELS=grok,grok-3-fast ### Advanced Configuration -**Custom Model Configuration & Manifest Overrides:** +**Custom Model Configuration:** ```env -# Override default location of built-in catalogues -OPENAI_MODELS_CONFIG_PATH=/path/to/openai_models.json -GEMINI_MODELS_CONFIG_PATH=/path/to/gemini_models.json -XAI_MODELS_CONFIG_PATH=/path/to/xai_models.json -OPENROUTER_MODELS_CONFIG_PATH=/path/to/openrouter_models.json -DIAL_MODELS_CONFIG_PATH=/path/to/dial_models.json -CUSTOM_MODELS_CONFIG_PATH=/path/to/custom_models.json +# Override default location of custom_models.json +CUSTOM_MODELS_CONFIG_PATH=/path/to/your/custom_models.json ``` **Conversation Settings:** @@ -189,6 +336,18 @@ LOG_LEVEL=DEBUG CONVERSATION_TIMEOUT_HOURS=1 ``` +### Azure OpenAI Setup +```env +# Azure OpenAI with GPT-5-Codex +DEFAULT_MODEL=auto +AZURE_OPENAI_API_KEY=your-azure-key +AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/ +AZURE_OPENAI_API_VERSION=2025-04-01-preview +AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5-codex +LOG_LEVEL=INFO +CONVERSATION_TIMEOUT_HOURS=3 +``` + ### Production Setup ```env # Production with cost controls @@ -243,4 +402,4 @@ LOG_LEVEL=INFO - **[Advanced Usage Guide](advanced-usage.md)** - Advanced model usage patterns, thinking modes, and power user workflows - **[Context Revival Guide](context-revival.md)** - Conversation persistence and context revival across sessions -- **[AI-to-AI Collaboration Guide](ai-collaboration.md)** - Multi-model coordination and conversation threading +- **[AI-to-AI Collaboration Guide](ai-collaboration.md)** - Multi-model coordination and conversation threading \ No newline at end of file diff --git a/providers/azure_openai.py b/providers/azure_openai.py index b0ec76f6..93f96c2e 100644 --- a/providers/azure_openai.py +++ b/providers/azure_openai.py @@ -1,342 +1,484 @@ -"""Azure OpenAI provider built on the OpenAI-compatible implementation.""" +"""Azure OpenAI model provider implementation using Responses API. -from __future__ import annotations +IMPORTANT: This implementation uses Azure OpenAI's **Responses API** exclusively, +which works with both **GPT-5** and **GPT-5-Codex** models, as well as O3 reasoning +models and GPT-4.1. The Responses API is required for GPT-5-Codex and provides +consistent behavior across all Azure OpenAI models. + +This provider supports Azure OpenAI deployments using the Responses API format, +which is required for advanced models like gpt-5, gpt-5-codex, gpt-5-mini, +gpt-5-nano, o3-mini, and gpt-4.1. +""" import logging -from dataclasses import asdict, replace +from typing import TYPE_CHECKING, Optional -try: # pragma: no cover - optional dependency - from openai import AzureOpenAI -except ImportError: # pragma: no cover - AzureOpenAI = None # type: ignore[assignment] +if TYPE_CHECKING: + from tools.models import ToolModelCategory -from utils.env import get_env, suppress_env_vars +from openai import AzureOpenAI -from .openai import OpenAIModelProvider -from .openai_compatible import OpenAICompatibleProvider -from .registries.azure import AzureModelRegistry +from .base import ModelProvider from .shared import ModelCapabilities, ModelResponse, ProviderType, TemperatureConstraint logger = logging.getLogger(__name__) -class AzureOpenAIProvider(OpenAICompatibleProvider): - """Thin Azure wrapper that reuses the OpenAI-compatible request pipeline.""" +class AzureOpenAIProvider(ModelProvider): + """Azure OpenAI provider using Responses API. + + IMPORTANT: This implementation uses Azure OpenAI's **Responses API** exclusively, + which works with both **GPT-5** and **GPT-5-Codex** models, as well as all variants + (gpt-5-mini, gpt-5-nano), O3 reasoning models (o3-mini), and GPT-4.1. The Responses + API is required for GPT-5-Codex and provides consistent behavior across all Azure + OpenAI models. + + This provider connects to Azure OpenAI deployments and uses the Responses API + (client.responses.create) instead of the Chat Completions API. This is required + for certain advanced models like gpt-5-codex and provides extended reasoning + capabilities for gpt-5, gpt-5-mini, and o3-mini. + + Supported Models: + - gpt-5: Advanced reasoning model (400K context, 128K output) + - gpt-5-codex: Elite code generation (400K context, 128K output) + - gpt-5-mini: Faster, cost-effective variant (400K context, 128K output) + - gpt-5-nano: Fastest, most cost-effective (400K context, 128K output) + - o3-mini: Strong reasoning model (200K context, 64K output) + - gpt-4.1: Extended context window (1M context, 32K output) + + Configuration: + - api_key: Azure OpenAI API key + - azure_endpoint: Azure OpenAI endpoint URL + - api_version: API version (must be 2025-03-01-preview or later) + - deployment_name: The deployment name to use (e.g., "gpt-5", "gpt-5-codex") + """ + + # Model configurations using ModelCapabilities objects + MODEL_CAPABILITIES = { + "gpt-5": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="gpt-5", + friendly_name="Azure OpenAI (GPT-5)", + intelligence_score=16, + context_window=400_000, # 400K tokens + max_output_tokens=128_000, # 128K max output tokens + supports_extended_thinking=True, # Supports reasoning tokens + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=True, # GPT-5 supports vision + max_image_size_mb=20.0, # 20MB per OpenAI docs + supports_temperature=False, # Reasoning model: temperature not supported (fixed internally to 1.0) + temperature_constraint=TemperatureConstraint.create("fixed"), + description="Azure GPT-5 (400K context, 128K output) - Advanced reasoning model with extended thinking", + aliases=["gpt5", "azure-gpt5", "azure-gpt-5"], + ), + "gpt-5-codex": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="gpt-5-codex", + friendly_name="Azure OpenAI (GPT-5 Codex)", + intelligence_score=17, + context_window=400_000, # 400K tokens + max_output_tokens=128_000, # 128K max output tokens + supports_extended_thinking=True, # Codex supports advanced reasoning + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=False, # Codex is code-focused + max_image_size_mb=0.0, + supports_temperature=False, # Requires fixed temperature=1.0 + temperature_constraint=TemperatureConstraint.create("fixed"), + description="Azure GPT-5 Codex (400K context, 128K output) - Elite code generation with deep reasoning (temperature=1.0 required)", + aliases=["gpt5-codex", "gpt5codex", "codex", "azure-codex", "azure-gpt5-codex"], + ), + "gpt-5-mini": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="gpt-5-mini", + friendly_name="Azure OpenAI (GPT-5 Mini)", + intelligence_score=14, + context_window=400_000, # 400K tokens + max_output_tokens=128_000, # 128K max output tokens + supports_extended_thinking=True, # Supports reasoning tokens + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=True, # GPT-5 variants support vision + max_image_size_mb=20.0, # 20MB per OpenAI docs + supports_temperature=False, # Reasoning model: temperature not supported (fixed internally to 1.0) + temperature_constraint=TemperatureConstraint.create("fixed"), + description="Azure GPT-5-Mini - Faster, cost-effective variant", + aliases=["gpt5-mini", "gpt5mini", "mini", "azure-mini"], + ), + "gpt-5-nano": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="gpt-5-nano", + friendly_name="Azure OpenAI (GPT-5 Nano)", + intelligence_score=12, + context_window=400_000, # 400K tokens + max_output_tokens=128_000, # 128K max output tokens + supports_extended_thinking=False, # Nano does not support extended thinking + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=True, # GPT-5 variants support vision + max_image_size_mb=20.0, # 20MB per OpenAI docs + supports_temperature=False, # Reasoning model: temperature not supported (fixed internally to 1.0) + temperature_constraint=TemperatureConstraint.create("fixed"), + description="Azure GPT-5-Nano - Fastest, most cost-effective", + aliases=["gpt5-nano", "gpt5nano", "nano", "azure-nano"], + ), + "o3-mini": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="o3-mini", + friendly_name="Azure OpenAI (O3 Mini)", + intelligence_score=15, + context_window=200_000, # 200K tokens + max_output_tokens=64_000, # 64K max output tokens + supports_extended_thinking=True, # O3 supports advanced reasoning + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=False, # O3 is reasoning-focused, not vision + max_image_size_mb=0.0, + supports_temperature=False, # Reasoning model requires fixed temperature=1.0 + temperature_constraint=TemperatureConstraint.create("fixed"), + description="Azure O3-Mini - Strong reasoning model (temperature=1.0 required)", + aliases=["o3mini", "azure-o3-mini"], + ), + "gpt-4.1": ModelCapabilities( + provider=ProviderType.AZURE, + model_name="gpt-4.1", + friendly_name="Azure OpenAI (GPT-4.1)", + intelligence_score=14, + context_window=1_000_000, # 1M tokens + max_output_tokens=32_000, # 32K max output tokens + supports_extended_thinking=False, # GPT-4.1 does not support extended thinking + supports_system_prompts=True, + supports_streaming=True, + supports_function_calling=True, + supports_json_mode=True, + supports_images=True, # GPT-4.1 supports vision + max_image_size_mb=20.0, # 20MB per OpenAI docs + supports_temperature=True, + temperature_constraint=TemperatureConstraint.create("range"), + description="Azure GPT-4.1 - Extended context window", + aliases=["gpt4.1", "azure-gpt4.1"], + ), + } + + def __init__(self, api_key: str, **kwargs): + """Initialize Azure OpenAI provider. + + Args: + api_key: Azure OpenAI API key + **kwargs: Additional configuration including: + - azure_endpoint: Azure OpenAI endpoint URL (required) + - api_version: API version (required, must be 2025-03-01-preview or later) + - deployment_name: Deployment name (required) + + Raises: + ValueError: If required configuration is missing + """ + super().__init__(api_key, **kwargs) + + # Validate required kwargs + self.azure_endpoint = kwargs.get("azure_endpoint") + self.api_version = kwargs.get("api_version") + self.deployment_name = kwargs.get("deployment_name") + + if not self.azure_endpoint: + raise ValueError("azure_endpoint is required for Azure OpenAI provider") + if not self.api_version: + raise ValueError("api_version is required for Azure OpenAI provider") + if not self.deployment_name: + raise ValueError("deployment_name is required for Azure OpenAI provider") + + # Validate API version supports Responses API + if self.api_version < "2025-03-01-preview": + logger.warning( + f"API version {self.api_version} may not support Responses API. " + "Recommended: 2025-03-01-preview or later" + ) - FRIENDLY_NAME = "Azure OpenAI" - DEFAULT_API_VERSION = "2024-02-15-preview" + # Lazy client initialization + self._client: Optional[AzureOpenAI] = None - # The OpenAI-compatible base expects subclasses to expose capabilities via - # ``get_all_model_capabilities``. Azure deployments are user-defined, so we - # build the catalogue dynamically from environment configuration instead of - # relying on a static ``MODEL_CAPABILITIES`` map. - MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {} + logger.info( + f"Initialized Azure OpenAI provider: endpoint={self.azure_endpoint}, " + f"deployment={self.deployment_name}, api_version={self.api_version}" + ) - def __init__( - self, - api_key: str, - *, - azure_endpoint: str | None = None, - api_version: str | None = None, - deployments: dict[str, object] | None = None, - **kwargs, - ) -> None: - # Let the OpenAI-compatible base handle shared configuration such as - # timeouts, restriction-aware allowlists, and logging. ``base_url`` maps - # directly onto Azure's endpoint URL. - super().__init__(api_key, base_url=azure_endpoint, **kwargs) - - if not azure_endpoint: - azure_endpoint = get_env("AZURE_OPENAI_ENDPOINT") - if not azure_endpoint: - raise ValueError("Azure OpenAI endpoint is required via parameter or AZURE_OPENAI_ENDPOINT") - - self.azure_endpoint = azure_endpoint.rstrip("/") - self.api_version = api_version or get_env("AZURE_OPENAI_API_VERSION", self.DEFAULT_API_VERSION) - - registry_specs = self._load_registry_entries() - override_specs = self._normalise_deployments(deployments or {}) if deployments else {} - - self._model_specs = self._merge_specs(registry_specs, override_specs) - if not self._model_specs: - raise ValueError( - "Azure OpenAI provider requires at least one configured deployment. " - "Populate conf/azure_models.json or set AZURE_MODELS_CONFIG_PATH." + def _get_client(self) -> AzureOpenAI: + """Get or create the Azure OpenAI client (lazy initialization).""" + if self._client is None: + self._client = AzureOpenAI( + api_key=self.api_key, + azure_endpoint=self.azure_endpoint, + api_version=self.api_version, ) - - self._capabilities = self._build_capabilities_map() - self._deployment_map = {name: spec["deployment"] for name, spec in self._model_specs.items()} - self._deployment_alias_lookup = { - deployment.lower(): canonical for canonical, deployment in self._deployment_map.items() - } - self._canonical_lookup = {name.lower(): name for name in self._model_specs.keys()} - self._invalidate_capability_cache() - - # ------------------------------------------------------------------ - # Capability helpers - # ------------------------------------------------------------------ - def get_all_model_capabilities(self) -> dict[str, ModelCapabilities]: - return dict(self._capabilities) + logger.debug("Created Azure OpenAI client") + return self._client def get_provider_type(self) -> ProviderType: + """Get the provider type.""" return ProviderType.AZURE - def get_capabilities(self, model_name: str) -> ModelCapabilities: # type: ignore[override] - lowered = model_name.lower() - if lowered in self._deployment_alias_lookup: - canonical = self._deployment_alias_lookup[lowered] - return super().get_capabilities(canonical) - canonical = self._canonical_lookup.get(lowered) - if canonical: - return super().get_capabilities(canonical) - return super().get_capabilities(model_name) - - def validate_model_name(self, model_name: str) -> bool: # type: ignore[override] - lowered = model_name.lower() - if lowered in self._deployment_alias_lookup or lowered in self._canonical_lookup: - return True - return super().validate_model_name(model_name) - - def _build_capabilities_map(self) -> dict[str, ModelCapabilities]: - capabilities: dict[str, ModelCapabilities] = {} - - for canonical_name, spec in self._model_specs.items(): - template_capability: ModelCapabilities | None = spec.get("capability") - overrides = spec.get("overrides", {}) - - if template_capability: - cloned = replace(template_capability) - else: - template = OpenAIModelProvider.MODEL_CAPABILITIES.get(canonical_name) - - if template: - friendly = template.friendly_name.replace("OpenAI", "Azure OpenAI", 1) - cloned = replace( - template, - provider=ProviderType.AZURE, - friendly_name=friendly, - aliases=list(template.aliases), - ) - else: - deployment_name = spec.get("deployment", "") - cloned = ModelCapabilities( - provider=ProviderType.AZURE, - model_name=canonical_name, - friendly_name=f"Azure OpenAI ({canonical_name})", - description=f"Azure deployment '{deployment_name}' for {canonical_name}", - aliases=[], - ) - - if overrides: - overrides = dict(overrides) - temp_override = overrides.get("temperature_constraint") - if isinstance(temp_override, str): - overrides["temperature_constraint"] = TemperatureConstraint.create(temp_override) - - aliases_override = overrides.get("aliases") - if isinstance(aliases_override, str): - overrides["aliases"] = [alias.strip() for alias in aliases_override.split(",") if alias.strip()] - provider_override = overrides.get("provider") - if provider_override: - overrides.pop("provider", None) - - try: - cloned = replace(cloned, **overrides) - except TypeError: - base_data = asdict(cloned) - base_data.update(overrides) - base_data["provider"] = ProviderType.AZURE - temp_value = base_data.get("temperature_constraint") - if isinstance(temp_value, str): - base_data["temperature_constraint"] = TemperatureConstraint.create(temp_value) - cloned = ModelCapabilities(**base_data) - - if cloned.provider != ProviderType.AZURE: - cloned.provider = ProviderType.AZURE - - capabilities[canonical_name] = cloned - - return capabilities - - def _load_registry_entries(self) -> dict[str, dict]: - try: - registry = AzureModelRegistry() - except Exception as exc: # pragma: no cover - registry failure should not crash provider - logger.warning("Unable to load Azure model registry: %s", exc) - return {} - - entries: dict[str, dict] = {} - for model_name, capability, extra in registry.iter_entries(): - deployment = extra.get("deployment") - if not deployment: - logger.warning("Azure model '%s' missing deployment in registry", model_name) - continue - entries[model_name] = {"deployment": deployment, "capability": capability} - - return entries - - @staticmethod - def _merge_specs( - registry_specs: dict[str, dict], - override_specs: dict[str, dict], - ) -> dict[str, dict]: - specs: dict[str, dict] = {} - - for canonical, entry in registry_specs.items(): - specs[canonical] = { - "deployment": entry.get("deployment"), - "capability": entry.get("capability"), - "overrides": {}, - } - - for canonical, entry in override_specs.items(): - spec = specs.get(canonical, {"deployment": None, "capability": None, "overrides": {}}) - deployment = entry.get("deployment") - if deployment: - spec["deployment"] = deployment - overrides = {k: v for k, v in entry.items() if k not in {"deployment"}} - overrides.pop("capability", None) - if overrides: - spec["overrides"].update(overrides) - specs[canonical] = spec - - return {k: v for k, v in specs.items() if v.get("deployment")} - - @staticmethod - def _normalise_deployments(mapping: dict[str, object]) -> dict[str, dict]: - normalised: dict[str, dict] = {} - for canonical, spec in mapping.items(): - canonical_name = (canonical or "").strip() - if not canonical_name: - continue - - deployment_name: str | None = None - overrides: dict[str, object] = {} - - if isinstance(spec, str): - deployment_name = spec.strip() - elif isinstance(spec, dict): - deployment_name = spec.get("deployment") or spec.get("deployment_name") - overrides = {k: v for k, v in spec.items() if k not in {"deployment", "deployment_name"}} - - if not deployment_name: - continue - - normalised[canonical_name] = {"deployment": deployment_name.strip(), **overrides} - - return normalised - - # ------------------------------------------------------------------ - # Azure-specific configuration - # ------------------------------------------------------------------ - @property - def client(self): # type: ignore[override] - """Instantiate the Azure OpenAI client on first use.""" - - if self._client is None: - if AzureOpenAI is None: - raise ImportError( - "Azure OpenAI support requires the 'openai' package. Install it with `pip install openai`." - ) - - import httpx - - proxy_env_vars = ["HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"] - - with suppress_env_vars(*proxy_env_vars): - try: - timeout_config = self.timeout_config - - http_client = httpx.Client(timeout=timeout_config, follow_redirects=True) - - client_kwargs = { - "api_key": self.api_key, - "azure_endpoint": self.azure_endpoint, - "api_version": self.api_version, - "http_client": http_client, - } - - if self.DEFAULT_HEADERS: - client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy() - - logger.debug( - "Initializing Azure OpenAI client endpoint=%s api_version=%s timeouts=%s", - self.azure_endpoint, - self.api_version, - timeout_config, - ) - - self._client = AzureOpenAI(**client_kwargs) - - except Exception as exc: - logger.error("Failed to create Azure OpenAI client: %s", exc) - raise - - return self._client - - # ------------------------------------------------------------------ - # Request delegation - # ------------------------------------------------------------------ def generate_content( self, prompt: str, model_name: str, - system_prompt: str | None = None, + system_prompt: Optional[str] = None, temperature: float = 0.3, - max_output_tokens: int | None = None, - images: list[str] | None = None, + max_output_tokens: Optional[int] = None, **kwargs, ) -> ModelResponse: - canonical_name, deployment_name = self._resolve_canonical_and_deployment(model_name) - - # Delegate to the shared OpenAI-compatible implementation using the - # deployment name – Azure requires the deployment identifier in the - # ``model`` field. The returned ``ModelResponse`` is normalised so - # downstream consumers continue to see the canonical model name. - raw_response = super().generate_content( - prompt=prompt, - model_name=deployment_name, - system_prompt=system_prompt, - temperature=temperature, - max_output_tokens=max_output_tokens, - images=images, - **kwargs, - ) + """Generate content using Azure OpenAI Responses API. + + Args: + prompt: User prompt/message + model_name: Model name (will be resolved to deployment) + system_prompt: Optional system prompt + temperature: Temperature parameter (default 0.3) + max_output_tokens: Maximum output tokens + **kwargs: Additional parameters + + Returns: + ModelResponse with generated content and usage data + + Raises: + ValueError: If model is not supported + Exception: If API call fails + """ + # Resolve model name and get capabilities + resolved_model = self._resolve_model_name(model_name) + capabilities = self.get_capabilities(resolved_model) + + # Validate parameters + # For reasoning models (no temperature support), skip temperature validation + effective_temperature = temperature + try: + if not capabilities.supports_temperature: + effective_temperature = None + else: + # Coerce into allowed range if needed + effective_temperature = capabilities.temperature_constraint.get_corrected_value(temperature) + self.validate_parameters(resolved_model, effective_temperature, **kwargs) + except Exception: + # If validation fails unexpectedly, fall back to omitting temperature + effective_temperature = None + + # Build input messages in Responses API format + input_messages = [] + if system_prompt: + input_messages.append({"role": "system", "content": system_prompt}) + input_messages.append({"role": "user", "content": prompt}) + + # Prepare API parameters + api_params = { + "model": self.deployment_name, + "input": input_messages, + } - capabilities = self._capabilities.get(canonical_name) - friendly_name = capabilities.friendly_name if capabilities else self.FRIENDLY_NAME + # Add max_output_tokens if specified + if max_output_tokens: + api_params["max_output_tokens"] = max_output_tokens + elif capabilities.max_output_tokens: + api_params["max_output_tokens"] = capabilities.max_output_tokens - return ModelResponse( - content=raw_response.content, - usage=raw_response.usage, - model_name=canonical_name, - friendly_name=friendly_name, - provider=ProviderType.AZURE, - metadata={**raw_response.metadata, "deployment": deployment_name}, + # Add temperature only when supported + if capabilities.supports_temperature and effective_temperature is not None: + api_params["temperature"] = effective_temperature + + logger.debug( + f"Azure OpenAI Responses API request: deployment={self.deployment_name}, " + f"model={resolved_model}, max_tokens={api_params.get('max_output_tokens')}" ) - def _resolve_canonical_and_deployment(self, model_name: str) -> tuple[str, str]: - resolved_canonical = self._resolve_model_name(model_name) - - if resolved_canonical not in self._deployment_map: - # The base resolver may hand back the deployment alias. Try to map it - # back to a canonical entry. - for canonical, deployment in self._deployment_map.items(): - if deployment.lower() == resolved_canonical.lower(): - return canonical, deployment - raise ValueError(f"Model '{model_name}' is not configured for Azure OpenAI") - - return resolved_canonical, self._deployment_map[resolved_canonical] - - def _parse_allowed_models(self) -> set[str] | None: # type: ignore[override] - # Support both AZURE_ALLOWED_MODELS (inherited behaviour) and the - # clearer AZURE_OPENAI_ALLOWED_MODELS alias. - explicit = get_env("AZURE_OPENAI_ALLOWED_MODELS") - if explicit: - models = {m.strip().lower() for m in explicit.split(",") if m.strip()} - if models: - logger.info("Configured allowed models for Azure OpenAI: %s", sorted(models)) - self._allowed_alias_cache = {} - return models - - return super()._parse_allowed_models() + try: + # Get client and make API call + client = self._get_client() + response = client.responses.create(**api_params) + + # Extract content from response + content = self._extract_content(response) + + # Extract usage data + usage = self._extract_usage(response) + + # Build ModelResponse + model_response = ModelResponse( + content=content, + usage=usage, + model_name=resolved_model, + friendly_name=capabilities.friendly_name, + provider=ProviderType.AZURE, + metadata={ + "response_id": response.id if hasattr(response, "id") else None, + "status": response.status if hasattr(response, "status") else None, + "deployment_name": self.deployment_name, + }, + ) + + logger.debug( + f"Azure OpenAI response: tokens={usage.get('total_tokens', 0)}, " + f"status={response.status if hasattr(response, 'status') else 'N/A'}" + ) + + return model_response + + except Exception as exc: + logger.error(f"Azure OpenAI API error: {exc}", exc_info=True) + raise + + def _extract_content(self, response) -> str: + """Extract text content from Responses API response. + + The Responses API returns content in different formats: + 1. output_text: Condensed text representation (preferred) + 2. output array: Array of output items (text, reasoning, etc.) + + Args: + response: API response object + + Returns: + Extracted text content + + Raises: + ValueError: If no content can be extracted + """ + # Try output_text first (condensed representation) + if hasattr(response, "output_text") and response.output_text: + logger.debug("Extracted content from output_text") + return response.output_text + + # Parse output array for text items + if hasattr(response, "output") and response.output: + text_parts = [] + + for item in response.output: + item_type = getattr(item, "type", None) + + if item_type == "text" or item_type == "message": + # Text output item + if hasattr(item, "content") and item.content: + if isinstance(item.content, list) and len(item.content) > 0: + # Content is a list of text parts + text_parts.append(item.content[0].text) + elif isinstance(item.content, str): + # Content is a string + text_parts.append(item.content) + elif hasattr(item, "text"): + # Direct text attribute + text_parts.append(item.text) + + elif item_type == "reasoning": + # Reasoning output (optional: include summary) + if hasattr(item, "summary") and item.summary: + logger.debug(f"Reasoning summary: {item.summary}") + # Optionally include reasoning in output + # text_parts.append(f"[Reasoning: {item.summary}]") + + if text_parts: + content = "\n".join(text_parts) + logger.debug(f"Extracted content from output array ({len(text_parts)} parts)") + return content + + # No content found + logger.warning("No content found in response") + raise ValueError("No content available in response") + + def _extract_usage(self, response) -> dict[str, int]: + """Extract token usage from Responses API response. + + Args: + response: API response object + + Returns: + Dictionary with token usage (input_tokens, output_tokens, total_tokens) + """ + usage = {} + + if hasattr(response, "usage") and response.usage: + usage_obj = response.usage + + # Extract input tokens + if hasattr(usage_obj, "input_tokens"): + usage["input_tokens"] = usage_obj.input_tokens + usage["prompt_tokens"] = usage_obj.input_tokens + elif hasattr(usage_obj, "prompt_tokens"): + usage["prompt_tokens"] = usage_obj.prompt_tokens + usage["input_tokens"] = usage_obj.prompt_tokens + + # Extract output tokens + if hasattr(usage_obj, "output_tokens"): + usage["output_tokens"] = usage_obj.output_tokens + usage["completion_tokens"] = usage_obj.output_tokens + elif hasattr(usage_obj, "completion_tokens"): + usage["completion_tokens"] = usage_obj.completion_tokens + usage["output_tokens"] = usage_obj.completion_tokens + + # Extract total tokens + if hasattr(usage_obj, "total_tokens"): + usage["total_tokens"] = usage_obj.total_tokens + else: + # Calculate total if not provided + input_tokens = usage.get("input_tokens", 0) + output_tokens = usage.get("output_tokens", 0) + usage["total_tokens"] = input_tokens + output_tokens + + logger.debug(f"Token usage: {usage}") + + return usage + + def close(self) -> None: + """Clean up resources.""" + if self._client is not None: + # AzureOpenAI client doesn't require explicit cleanup + self._client = None + logger.debug("Closed Azure OpenAI client") + + def get_preferred_model(self, category: "ToolModelCategory", allowed_models: list[str]) -> Optional[str]: + """Get Azure's preferred model for a given category from allowed models. + + Args: + category: The tool category requiring a model + allowed_models: Pre-filtered list of models allowed by restrictions + + Returns: + Preferred model name or None + """ + from tools.models import ToolModelCategory + + if not allowed_models: + return None + + # Helper to find first available from preference list + def find_first(preferences: list[str]) -> Optional[str]: + """Return first available model from preference list.""" + for model in preferences: + if model in allowed_models: + return model + return None + + if category == ToolModelCategory.EXTENDED_REASONING: + # Prefer models with extended thinking support + # Order: gpt-5-codex > o3-mini > gpt-5 > gpt-5-mini + preferred = find_first(["gpt-5-codex", "o3-mini", "gpt-5", "gpt-5-mini"]) + return preferred if preferred else allowed_models[0] + + elif category == ToolModelCategory.FAST_RESPONSE: + # Prefer faster models with good performance + # Order: gpt-5-mini > gpt-5-nano > gpt-5 > gpt-4.1 + preferred = find_first(["gpt-5-mini", "gpt-5-nano", "gpt-5", "gpt-4.1"]) + return preferred if preferred else allowed_models[0] + + else: # BALANCED or default + # Prefer gpt-5-codex for code tasks, then balanced options + # Order: gpt-5-codex > gpt-5 > gpt-5-mini > o3-mini > gpt-4.1 > gpt-5-nano + preferred = find_first(["gpt-5-codex", "gpt-5", "gpt-5-mini", "o3-mini", "gpt-4.1", "gpt-5-nano"]) + return preferred if preferred else allowed_models[0] diff --git a/providers/openai_compatible.py b/providers/openai_compatible.py index 4b514d79..84141654 100644 --- a/providers/openai_compatible.py +++ b/providers/openai_compatible.py @@ -3,12 +3,12 @@ import copy import ipaddress import logging +import os from typing import Optional from urllib.parse import urlparse from openai import OpenAI -from utils.env import get_env, suppress_env_vars from utils.image_utils import validate_image from .base import ModelProvider @@ -39,7 +39,6 @@ def __init__(self, api_key: str, base_url: str = None, **kwargs): base_url: Base URL for the API endpoint **kwargs: Additional configuration options including timeout """ - self._allowed_alias_cache: dict[str, str] = {} super().__init__(api_key, **kwargs) self._client = None self.base_url = base_url @@ -75,33 +74,9 @@ def _ensure_model_allowed( canonical = canonical_name.lower() if requested not in self.allowed_models and canonical not in self.allowed_models: - allowed = False - for allowed_entry in list(self.allowed_models): - normalized_resolved = self._allowed_alias_cache.get(allowed_entry) - if normalized_resolved is None: - try: - resolved_name = self._resolve_model_name(allowed_entry) - except Exception: - continue - - if not resolved_name: - continue - - normalized_resolved = resolved_name.lower() - self._allowed_alias_cache[allowed_entry] = normalized_resolved - - if normalized_resolved == canonical: - # Canonical match discovered via alias resolution – mark as allowed and - # memoise the canonical entry for future lookups. - allowed = True - self._allowed_alias_cache[canonical] = canonical - self.allowed_models.add(canonical) - break - - if not allowed: - raise ValueError( - f"Model '{requested_name}' is not allowed by restriction policy. Allowed models: {sorted(self.allowed_models)}" - ) + raise ValueError( + f"Model '{requested_name}' is not allowed by restriction policy. Allowed models: {sorted(self.allowed_models)}" + ) def _parse_allowed_models(self) -> Optional[set[str]]: """Parse allowed models from environment variable. @@ -112,14 +87,13 @@ def _parse_allowed_models(self) -> Optional[set[str]]: # Get provider-specific allowed models provider_type = self.get_provider_type().value.upper() env_var = f"{provider_type}_ALLOWED_MODELS" - models_str = get_env(env_var, "") or "" + models_str = os.getenv(env_var, "") if models_str: # Parse and normalize to lowercase for case-insensitive comparison models = {m.strip().lower() for m in models_str.split(",") if m.strip()} if models: logging.info(f"Configured allowed models for {self.FRIENDLY_NAME}: {sorted(models)}") - self._allowed_alias_cache = {} return models # Log info if no allow-list configured for proxy providers @@ -165,25 +139,10 @@ def _configure_timeouts(self, **kwargs): logging.info(f"Using extended timeouts for custom endpoint: {self.base_url}") # Allow override via kwargs or environment variables in future, for now... - connect_timeout = kwargs.get("connect_timeout") - if connect_timeout is None: - connect_timeout_raw = get_env("CUSTOM_CONNECT_TIMEOUT") - connect_timeout = float(connect_timeout_raw) if connect_timeout_raw is not None else float(default_connect) - - read_timeout = kwargs.get("read_timeout") - if read_timeout is None: - read_timeout_raw = get_env("CUSTOM_READ_TIMEOUT") - read_timeout = float(read_timeout_raw) if read_timeout_raw is not None else float(default_read) - - write_timeout = kwargs.get("write_timeout") - if write_timeout is None: - write_timeout_raw = get_env("CUSTOM_WRITE_TIMEOUT") - write_timeout = float(write_timeout_raw) if write_timeout_raw is not None else float(default_write) - - pool_timeout = kwargs.get("pool_timeout") - if pool_timeout is None: - pool_timeout_raw = get_env("CUSTOM_POOL_TIMEOUT") - pool_timeout = float(pool_timeout_raw) if pool_timeout_raw is not None else float(default_pool) + connect_timeout = kwargs.get("connect_timeout", float(os.getenv("CUSTOM_CONNECT_TIMEOUT", default_connect))) + read_timeout = kwargs.get("read_timeout", float(os.getenv("CUSTOM_READ_TIMEOUT", default_read))) + write_timeout = kwargs.get("write_timeout", float(os.getenv("CUSTOM_WRITE_TIMEOUT", default_write))) + pool_timeout = kwargs.get("pool_timeout", float(os.getenv("CUSTOM_POOL_TIMEOUT", default_pool))) timeout = httpx.Timeout(connect=connect_timeout, read=read_timeout, write=write_timeout, pool=pool_timeout) @@ -257,74 +216,80 @@ def _validate_base_url(self) -> None: def client(self): """Lazy initialization of OpenAI client with security checks and timeout configuration.""" if self._client is None: + import os + import httpx + # Temporarily disable proxy environment variables to prevent httpx from detecting them + original_env = {} proxy_env_vars = ["HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy"] - with suppress_env_vars(*proxy_env_vars): - try: - # Create a custom httpx client that explicitly avoids proxy parameters - timeout_config = ( - self.timeout_config - if hasattr(self, "timeout_config") and self.timeout_config - else httpx.Timeout(30.0) + for var in proxy_env_vars: + if var in os.environ: + original_env[var] = os.environ[var] + del os.environ[var] + + try: + # Create a custom httpx client that explicitly avoids proxy parameters + timeout_config = ( + self.timeout_config + if hasattr(self, "timeout_config") and self.timeout_config + else httpx.Timeout(30.0) + ) + + # Create httpx client with minimal config to avoid proxy conflicts + # Note: proxies parameter was removed in httpx 0.28.0 + # Check for test transport injection + if hasattr(self, "_test_transport"): + # Use custom transport for testing (HTTP recording/replay) + http_client = httpx.Client( + transport=self._test_transport, + timeout=timeout_config, + follow_redirects=True, + ) + else: + # Normal production client + http_client = httpx.Client( + timeout=timeout_config, + follow_redirects=True, ) - # Create httpx client with minimal config to avoid proxy conflicts - # Note: proxies parameter was removed in httpx 0.28.0 - # Check for test transport injection - if hasattr(self, "_test_transport"): - # Use custom transport for testing (HTTP recording/replay) - http_client = httpx.Client( - transport=self._test_transport, - timeout=timeout_config, - follow_redirects=True, - ) - else: - # Normal production client - http_client = httpx.Client( - timeout=timeout_config, - follow_redirects=True, - ) - - # Keep client initialization minimal to avoid proxy parameter conflicts - client_kwargs = { - "api_key": self.api_key, - "http_client": http_client, - } + # Keep client initialization minimal to avoid proxy parameter conflicts + client_kwargs = { + "api_key": self.api_key, + "http_client": http_client, + } - if self.base_url: - client_kwargs["base_url"] = self.base_url + if self.base_url: + client_kwargs["base_url"] = self.base_url - if self.organization: - client_kwargs["organization"] = self.organization + if self.organization: + client_kwargs["organization"] = self.organization - # Add default headers if any - if self.DEFAULT_HEADERS: - client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy() + # Add default headers if any + if self.DEFAULT_HEADERS: + client_kwargs["default_headers"] = self.DEFAULT_HEADERS.copy() - logging.debug( - "OpenAI client initialized with custom httpx client and timeout: %s", - timeout_config, - ) + logging.debug(f"OpenAI client initialized with custom httpx client and timeout: {timeout_config}") - # Create OpenAI client with custom httpx client - self._client = OpenAI(**client_kwargs) + # Create OpenAI client with custom httpx client + self._client = OpenAI(**client_kwargs) - except Exception as e: - # If all else fails, try absolute minimal client without custom httpx - logging.warning( - "Failed to create client with custom httpx, falling back to minimal config: %s", - e, - ) - try: - minimal_kwargs = {"api_key": self.api_key} - if self.base_url: - minimal_kwargs["base_url"] = self.base_url - self._client = OpenAI(**minimal_kwargs) - except Exception as fallback_error: - logging.error("Even minimal OpenAI client creation failed: %s", fallback_error) - raise + except Exception as e: + # If all else fails, try absolute minimal client without custom httpx + logging.warning(f"Failed to create client with custom httpx, falling back to minimal config: {e}") + try: + minimal_kwargs = {"api_key": self.api_key} + if self.base_url: + minimal_kwargs["base_url"] = self.base_url + self._client = OpenAI(**minimal_kwargs) + except Exception as fallback_error: + logging.error(f"Even minimal OpenAI client creation failed: {fallback_error}") + raise + finally: + # Restore original proxy environment variables + for var, value in original_env.items(): + os.environ[var] = value return self._client @@ -391,10 +356,9 @@ def _generate_with_responses_endpoint( messages: list, temperature: float, max_output_tokens: Optional[int] = None, - capabilities: Optional[ModelCapabilities] = None, **kwargs, ) -> ModelResponse: - """Generate content using the /v1/responses endpoint for reasoning models.""" + """Generate content using the /v1/responses endpoint for o3-pro via OpenAI library.""" # Convert messages to the correct format for responses endpoint input_messages = [] @@ -413,14 +377,10 @@ def _generate_with_responses_endpoint( # Prepare completion parameters for responses endpoint # Based on OpenAI documentation, use nested reasoning object for responses endpoint - effort = "medium" - if capabilities and capabilities.default_reasoning_effort: - effort = capabilities.default_reasoning_effort - completion_params = { "model": model_name, "input": input_messages, - "reasoning": {"effort": effort}, + "reasoning": {"effort": "medium"}, # Use nested object for responses endpoint "store": True, } @@ -428,6 +388,10 @@ def _generate_with_responses_endpoint( if max_output_tokens: completion_params["max_completion_tokens"] = max_output_tokens + # Do NOT send temperature for GPT-5 family or other reasoning models. + # The Responses API rejects sampling params on these models; omit entirely. + # (If future non-reasoning models are routed here, they may accept temperature.) + # For responses endpoint, we only add parameters that are explicitly supported # Remove unsupported chat completion parameters that may cause API errors @@ -480,11 +444,11 @@ def _attempt() -> ModelResponse: operation=_attempt, max_attempts=max_retries, delays=retry_delays, - log_prefix="responses endpoint", + log_prefix="o3-pro responses endpoint", ) except Exception as exc: attempts = max(attempt_counter["value"], 1) - error_msg = f"responses endpoint error after {attempts} attempt{'s' if attempts > 1 else ''}: {exc}" + error_msg = f"o3-pro responses endpoint error after {attempts} attempt{'s' if attempts > 1 else ''}: {exc}" logging.error(error_msg) raise RuntimeError(error_msg) from exc @@ -502,11 +466,10 @@ def generate_content( Args: prompt: User prompt to send to the model - model_name: Canonical model name or its alias + model_name: Name of the model to use system_prompt: Optional system prompt for model behavior temperature: Sampling temperature max_output_tokens: Maximum tokens to generate - images: Optional list of image paths or data URLs to include with the prompt (for vision models) **kwargs: Additional provider-specific parameters Returns: @@ -538,9 +501,6 @@ def generate_content( # Validate parameters with the effective temperature self.validate_parameters(model_name, effective_temperature) - # Resolve to canonical model name - resolved_model = self._resolve_model_name(model_name) - # Prepare messages messages = [] if system_prompt: @@ -562,7 +522,7 @@ def generate_content( # Continue with other images and text continue elif images and (not capabilities or not capabilities.supports_images): - logging.warning(f"Model {resolved_model} does not support images, ignoring {len(images)} image(s)") + logging.warning(f"Model {model_name} does not support images, ignoring {len(images)} image(s)") # Add user message if len(user_content) == 1: @@ -572,13 +532,13 @@ def generate_content( # Text + images, use content array format messages.append({"role": "user", "content": user_content}) - # Prepare completion parameters - # Always disable streaming for OpenRouter - # MCP doesn't use streaming, and this avoids issues with O3 model access + # Resolve alias -> canonical early and use consistently + resolved_model = self._resolve_model_name(model_name) + + # Prepare completion parameters with canonical model completion_params = { "model": resolved_model, "messages": messages, - "stream": False, } # Use the effective temperature we calculated earlier @@ -597,21 +557,13 @@ def generate_content( for key, value in kwargs.items(): if key in ["top_p", "frequency_penalty", "presence_penalty", "seed", "stop", "stream"]: # Reasoning models (those that don't support temperature) also don't support these parameters - if not supports_sampling and key in ["top_p", "frequency_penalty", "presence_penalty", "stream"]: + if not supports_sampling and key in ["top_p", "frequency_penalty", "presence_penalty"]: continue # Skip unsupported parameters for reasoning models completion_params[key] = value # Check if this model needs the Responses API endpoint - # Prefer capability metadata; fall back to static map when capabilities unavailable - use_responses_api = False - if capabilities is not None: - use_responses_api = getattr(capabilities, "use_openai_response_api", False) - else: - static_capabilities = self.get_all_model_capabilities().get(resolved_model) - if static_capabilities is not None: - use_responses_api = getattr(static_capabilities, "use_openai_response_api", False) - - if use_responses_api: + # Reasoning and GPT-5 family use the new Responses API + if resolved_model in ["o3-pro", "gpt-5-codex", "gpt-5", "gpt-5-mini", "gpt-5-nano"]: # These models require the /v1/responses endpoint for stateful context # If it fails, we should not fall back to chat/completions return self._generate_with_responses_endpoint( @@ -619,7 +571,6 @@ def generate_content( messages=messages, temperature=temperature, max_output_tokens=max_output_tokens, - capabilities=capabilities, **kwargs, ) @@ -654,12 +605,12 @@ def _attempt() -> ModelResponse: operation=_attempt, max_attempts=max_retries, delays=retry_delays, - log_prefix=f"{self.FRIENDLY_NAME} API ({resolved_model})", + log_prefix=f"{self.FRIENDLY_NAME} API ({model_name})", ) except Exception as exc: attempts = max(attempt_counter["value"], 1) error_msg = ( - f"{self.FRIENDLY_NAME} API error for model {resolved_model} after {attempts} attempt" + f"{self.FRIENDLY_NAME} API error for model {model_name} after {attempts} attempt" f"{'s' if attempts > 1 else ''}: {exc}" ) logging.error(error_msg) @@ -671,7 +622,7 @@ def validate_parameters(self, model_name: str, temperature: float, **kwargs) -> For proxy providers, this may use generic capabilities. Args: - model_name: Canonical model name or its alias + model_name: Model to validate for temperature: Temperature to validate **kwargs: Additional parameters to validate """ diff --git a/providers/registry.py b/providers/registry.py index cd28c426..4fd2fe1f 100644 --- a/providers/registry.py +++ b/providers/registry.py @@ -1,10 +1,9 @@ """Model provider registry for managing available providers.""" import logging +import os from typing import TYPE_CHECKING, Optional -from utils.env import get_env - from .base import ModelProvider from .shared import ProviderType @@ -38,7 +37,7 @@ class ModelProviderRegistry: PROVIDER_PRIORITY_ORDER = [ ProviderType.GOOGLE, # Direct Gemini access ProviderType.OPENAI, # Direct OpenAI access - ProviderType.AZURE, # Azure-hosted OpenAI deployments + ProviderType.AZURE, # Azure OpenAI access ProviderType.XAI, # Direct X.AI GROK access ProviderType.DIAL, # DIAL unified API access ProviderType.CUSTOM, # Local/self-hosted models @@ -104,7 +103,7 @@ def get_provider(cls, provider_type: ProviderType, force_new: bool = False) -> O provider = provider_class(api_key=api_key) else: # Regular class - need to handle URL requirement - custom_url = get_env("CUSTOM_API_URL", "") or "" + custom_url = os.getenv("CUSTOM_API_URL", "") if not custom_url: if api_key: # Key is set but URL is missing logging.warning("CUSTOM_API_KEY set but CUSTOM_API_URL missing – skipping Custom provider") @@ -118,26 +117,29 @@ def get_provider(cls, provider_type: ProviderType, force_new: bool = False) -> O # For Gemini, check if custom base URL is configured if not api_key: return None - gemini_base_url = get_env("GEMINI_BASE_URL") + gemini_base_url = os.getenv("GEMINI_BASE_URL") provider_kwargs = {"api_key": api_key} if gemini_base_url: provider_kwargs["base_url"] = gemini_base_url logging.info(f"Initialized Gemini provider with custom endpoint: {gemini_base_url}") provider = provider_class(**provider_kwargs) elif provider_type == ProviderType.AZURE: + # For Azure OpenAI, check required configuration if not api_key: return None + azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + azure_version = os.getenv("AZURE_OPENAI_API_VERSION", "2025-04-01-preview") + deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME") - azure_endpoint = get_env("AZURE_OPENAI_ENDPOINT") - if not azure_endpoint: - logging.warning("AZURE_OPENAI_ENDPOINT missing – skipping Azure OpenAI provider") + if not azure_endpoint or not deployment_name: + logging.warning("Azure OpenAI requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_DEPLOYMENT_NAME") return None - azure_version = get_env("AZURE_OPENAI_API_VERSION") provider = provider_class( api_key=api_key, azure_endpoint=azure_endpoint, api_version=azure_version, + deployment_name=deployment_name, ) else: if not api_key: @@ -222,18 +224,6 @@ def get_available_models(cls, respect_restrictions: bool = True) -> dict[str, Pr logging.warning("Provider %s does not implement list_models", provider_type) continue - if restriction_service and restriction_service.has_restrictions(provider_type): - restricted_display = cls._collect_restricted_display_names( - provider, - provider_type, - available, - restriction_service, - ) - if restricted_display: - for model_name in restricted_display: - models[model_name] = provider_type - continue - for model_name in available: # ===================================================================================== # CRITICAL: Prevent double restriction filtering (Fixed Issue #98) @@ -256,50 +246,6 @@ def get_available_models(cls, respect_restrictions: bool = True) -> dict[str, Pr return models - @classmethod - def _collect_restricted_display_names( - cls, - provider: ModelProvider, - provider_type: ProviderType, - available: list[str], - restriction_service, - ) -> list[str] | None: - """Derive the human-facing model list when restrictions are active.""" - - allowed_models = restriction_service.get_allowed_models(provider_type) - if not allowed_models: - return None - - allowed_details: list[tuple[str, int]] = [] - - for model_name in sorted(allowed_models): - try: - capabilities = provider.get_capabilities(model_name) - except (AttributeError, ValueError): - continue - - try: - rank = capabilities.get_effective_capability_rank() - rank_value = float(rank) - except (AttributeError, TypeError, ValueError): - rank_value = 0.0 - - allowed_details.append((model_name, rank_value)) - - if allowed_details: - allowed_details.sort(key=lambda item: (-item[1], item[0])) - return [name for name, _ in allowed_details] - - # Fallback: intersect the allowlist with the provider-advertised names. - available_lookup = {name.lower(): name for name in available} - display_names: list[str] = [] - for model_name in sorted(allowed_models): - lowered = model_name.lower() - if lowered in available_lookup: - display_names.append(available_lookup[lowered]) - - return display_names - @classmethod def get_available_model_names(cls, provider_type: Optional[ProviderType] = None) -> list[str]: """Get list of available model names, optionally filtered by provider. @@ -334,18 +280,18 @@ def _get_api_key_for_provider(cls, provider_type: ProviderType) -> Optional[str] key_mapping = { ProviderType.GOOGLE: "GEMINI_API_KEY", ProviderType.OPENAI: "OPENAI_API_KEY", - ProviderType.AZURE: "AZURE_OPENAI_API_KEY", ProviderType.XAI: "XAI_API_KEY", ProviderType.OPENROUTER: "OPENROUTER_API_KEY", ProviderType.CUSTOM: "CUSTOM_API_KEY", # Can be empty for providers that don't need auth ProviderType.DIAL: "DIAL_API_KEY", + ProviderType.AZURE: "AZURE_OPENAI_API_KEY", } env_var = key_mapping.get(provider_type) if not env_var: return None - return get_env(env_var) + return os.getenv(env_var) @classmethod def _get_allowed_models_for_provider(cls, provider: ModelProvider, provider_type: ProviderType) -> list[str]: diff --git a/pyproject.toml b/pyproject.toml index ff7b1f22..e3e9bfd3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "zen-mcp-server" -version = "7.8.1" +version = "1.0.0" description = "AI-powered MCP server with multiple model providers" requires-python = ">=3.9" dependencies = [ @@ -12,7 +12,7 @@ dependencies = [ ] [tool.setuptools.packages.find] -include = ["tools*", "providers*", "systemprompts*", "utils*", "conf*", "clink*"] +include = ["tools*", "providers*", "systemprompts*", "utils*", "conf*"] [tool.setuptools] py-modules = ["server", "config"] @@ -21,15 +21,7 @@ py-modules = ["server", "config"] "*" = ["conf/*.json"] [tool.setuptools.data-files] -"conf" = [ - "conf/custom_models.json", - "conf/openrouter_models.json", - "conf/azure_models.json", - "conf/openai_models.json", - "conf/gemini_models.json", - "conf/xai_models.json", - "conf/dial_models.json", -] +"conf" = ["conf/custom_models.json"] [project.scripts] zen-mcp-server = "server:run" diff --git a/server.py b/server.py index 5e4e5177..eeaba6bf 100644 --- a/server.py +++ b/server.py @@ -28,6 +28,35 @@ from pathlib import Path from typing import Any, Optional +# Try to load environment variables from .env file if dotenv is available +# This is optional - environment variables can still be passed directly +try: + from dotenv import dotenv_values, load_dotenv + + # Load environment variables from .env file in the script's directory + # This ensures .env is loaded regardless of the current working directory + script_dir = Path(__file__).parent + env_file = script_dir / ".env" + + # First load only to read ZEN_MCP_FORCE_ENV_OVERRIDE, then reload with proper override setting + # Use a temporary environment to read just this configuration variable + temp_env = {} + if env_file.exists(): + temp_env = dotenv_values(env_file) + + # Check if we should force override based on .env file content (not system env) + force_override = temp_env.get("ZEN_MCP_FORCE_ENV_OVERRIDE", "false").lower() == "true" + + # Load .env file with appropriate override setting + load_dotenv(dotenv_path=env_file, override=force_override) + + # Store override setting for logging after logger is configured + _zen_mcp_force_override = force_override +except ImportError: + # dotenv not available - this is fine, environment variables can still be passed directly + # This commonly happens when running via uvx or in minimal environments + pass + from mcp.server import Server # noqa: E402 from mcp.server.models import InitializationOptions # noqa: E402 from mcp.server.stdio import stdio_server # noqa: E402 @@ -51,13 +80,11 @@ AnalyzeTool, ChallengeTool, ChatTool, - CLinkTool, CodeReviewTool, ConsensusTool, DebugIssueTool, DocgenTool, ListModelsTool, - LookupTool, PlannerTool, PrecommitTool, RefactorTool, @@ -68,11 +95,10 @@ VersionTool, ) from tools.models import ToolOutput # noqa: E402 -from utils.env import env_override_enabled, get_env # noqa: E402 # Configure logging for server operations # Can be controlled via LOG_LEVEL environment variable (DEBUG, INFO, WARNING, ERROR) -log_level = (get_env("LOG_LEVEL", "DEBUG") or "DEBUG").upper() +log_level = os.getenv("LOG_LEVEL", "DEBUG").upper() # Create timezone-aware formatter @@ -151,12 +177,19 @@ def formatTime(self, record, datefmt=None): logger = logging.getLogger(__name__) -# Log ZEN_MCP_FORCE_ENV_OVERRIDE configuration for transparency -if env_override_enabled(): - logger.info("ZEN_MCP_FORCE_ENV_OVERRIDE enabled - .env file values will override system environment variables") - logger.debug("Environment override prevents conflicts between different AI tools passing cached API keys") -else: - logger.debug("ZEN_MCP_FORCE_ENV_OVERRIDE disabled - system environment variables take precedence") +# Log ZEN_MCP_FORCE_ENV_OVERRIDE configuration if it was set during dotenv loading +try: + if "_zen_mcp_force_override" in globals(): + if _zen_mcp_force_override: + logger.info( + "ZEN_MCP_FORCE_ENV_OVERRIDE enabled - .env file values will override system environment variables" + ) + logger.debug("Environment override prevents conflicts between different AI tools passing cached API keys") + else: + logger.debug("ZEN_MCP_FORCE_ENV_OVERRIDE disabled - system environment variables take precedence") +except NameError: + # _zen_mcp_force_override not defined, which means dotenv wasn't available or no .env file + pass # Create the MCP server instance with a unique name identifier @@ -175,7 +208,7 @@ def parse_disabled_tools_env() -> set[str]: Returns: Set of lowercase tool names to disable, empty set if none specified """ - disabled_tools_env = (get_env("DISABLED_TOOLS", "") or "").strip() + disabled_tools_env = os.getenv("DISABLED_TOOLS", "").strip() if not disabled_tools_env: return set() return {t.strip().lower() for t in disabled_tools_env.split(",") if t.strip()} @@ -259,7 +292,6 @@ def filter_disabled_tools(all_tools: dict[str, Any]) -> dict[str, Any]: # Tools are instantiated once and reused across requests (stateless design) TOOLS = { "chat": ChatTool(), # Interactive development chat and brainstorming - "clink": CLinkTool(), # Bridge requests to configured AI CLIs "thinkdeep": ThinkDeepTool(), # Step-by-step deep thinking workflow with expert analysis "planner": PlannerTool(), # Interactive sequential planner using workflow architecture "consensus": ConsensusTool(), # Step-by-step consensus workflow with multi-model analysis @@ -273,7 +305,6 @@ def filter_disabled_tools(all_tools: dict[str, Any]) -> dict[str, Any]: "tracer": TracerTool(), # Static call path prediction and control flow analysis "testgen": TestGenTool(), # Step-by-step test generation workflow with expert validation "challenge": ChallengeTool(), # Critical challenge prompt wrapper to avoid automatic agreement - "apilookup": LookupTool(), # Quick web/API lookup instructions "listmodels": ListModelsTool(), # List all available AI models by provider "version": VersionTool(), # Display server version and system information } @@ -286,11 +317,6 @@ def filter_disabled_tools(all_tools: dict[str, Any]) -> dict[str, Any]: "description": "Chat and brainstorm ideas", "template": "Chat with {model} about this", }, - "clink": { - "name": "clink", - "description": "Forward a request to a configured AI CLI (e.g., Gemini)", - "template": "Use clink with cli_name= to run this prompt", - }, "thinkdeep": { "name": "thinkdeeper", "description": "Step-by-step deep thinking workflow with expert analysis", @@ -356,11 +382,6 @@ def filter_disabled_tools(all_tools: dict[str, Any]) -> dict[str, Any]: "description": "Challenge a statement critically without automatic agreement", "template": "Challenge this statement critically", }, - "apilookup": { - "name": "apilookup", - "description": "Look up the latest API or SDK information", - "template": "Lookup latest API docs for {model}", - }, "listmodels": { "name": "listmodels", "description": "List available AI models", @@ -388,7 +409,7 @@ def configure_providers(): logger.debug("Checking environment variables for API keys...") api_keys_to_check = ["OPENAI_API_KEY", "OPENROUTER_API_KEY", "GEMINI_API_KEY", "XAI_API_KEY", "CUSTOM_API_URL"] for key in api_keys_to_check: - value = get_env(key) + value = os.getenv(key) logger.debug(f" {key}: {'[PRESENT]' if value else '[MISSING]'}") from providers import ModelProviderRegistry from providers.azure_openai import AzureOpenAIProvider @@ -407,14 +428,14 @@ def configure_providers(): has_custom = False # Check for Gemini API key - gemini_key = get_env("GEMINI_API_KEY") + gemini_key = os.getenv("GEMINI_API_KEY") if gemini_key and gemini_key != "your_gemini_api_key_here": valid_providers.append("Gemini") has_native_apis = True logger.info("Gemini API key found - Gemini models available") # Check for OpenAI API key - openai_key = get_env("OPENAI_API_KEY") + openai_key = os.getenv("OPENAI_API_KEY") logger.debug(f"OpenAI key check: key={'[PRESENT]' if openai_key else '[MISSING]'}") if openai_key and openai_key != "your_openai_api_key_here": valid_providers.append("OpenAI") @@ -426,43 +447,32 @@ def configure_providers(): else: logger.debug("OpenAI API key is placeholder value") - # Check for Azure OpenAI configuration - azure_key = get_env("AZURE_OPENAI_API_KEY") - azure_endpoint = get_env("AZURE_OPENAI_ENDPOINT") - azure_models_available = False - if azure_key and azure_key != "your_azure_openai_key_here" and azure_endpoint: - try: - from providers.registries.azure import AzureModelRegistry - - azure_registry = AzureModelRegistry() - if azure_registry.list_models(): - valid_providers.append("Azure OpenAI") - has_native_apis = True - azure_models_available = True - logger.info("Azure OpenAI configuration detected") - else: - logger.warning( - "Azure OpenAI models configuration is empty. Populate conf/azure_models.json or set AZURE_MODELS_CONFIG_PATH." - ) - except Exception as exc: - logger.warning(f"Failed to load Azure OpenAI models: {exc}") - # Check for X.AI API key - xai_key = get_env("XAI_API_KEY") + xai_key = os.getenv("XAI_API_KEY") if xai_key and xai_key != "your_xai_api_key_here": valid_providers.append("X.AI (GROK)") has_native_apis = True logger.info("X.AI API key found - GROK models available") + # Check for Azure OpenAI API key + azure_key = os.getenv("AZURE_OPENAI_API_KEY") + azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + azure_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME") + if azure_key and azure_endpoint and azure_deployment: + if azure_key != "your_azure_openai_key_here" and azure_endpoint != "https://your-resource.openai.azure.com/": + valid_providers.append("Azure OpenAI") + has_native_apis = True + logger.info(f"Azure OpenAI found - deployment: {azure_deployment}") + # Check for DIAL API key - dial_key = get_env("DIAL_API_KEY") + dial_key = os.getenv("DIAL_API_KEY") if dial_key and dial_key != "your_dial_api_key_here": valid_providers.append("DIAL") has_native_apis = True logger.info("DIAL API key found - DIAL models available") # Check for OpenRouter API key - openrouter_key = get_env("OPENROUTER_API_KEY") + openrouter_key = os.getenv("OPENROUTER_API_KEY") logger.debug(f"OpenRouter key check: key={'[PRESENT]' if openrouter_key else '[MISSING]'}") if openrouter_key and openrouter_key != "your_openrouter_api_key_here": valid_providers.append("OpenRouter") @@ -475,14 +485,14 @@ def configure_providers(): logger.debug("OpenRouter API key is placeholder value") # Check for custom API endpoint (Ollama, vLLM, etc.) - custom_url = get_env("CUSTOM_API_URL") + custom_url = os.getenv("CUSTOM_API_URL") if custom_url: # IMPORTANT: Always read CUSTOM_API_KEY even if empty # - Some providers (vLLM, LM Studio, enterprise APIs) require authentication # - Others (Ollama) work without authentication (empty key) # - DO NOT remove this variable - it's needed for provider factory function - custom_key = get_env("CUSTOM_API_KEY", "") or "" # Default to empty (Ollama doesn't need auth) - custom_model = get_env("CUSTOM_MODEL_NAME", "llama3.2") or "llama3.2" + custom_key = os.getenv("CUSTOM_API_KEY", "") # Default to empty (Ollama doesn't need auth) + custom_model = os.getenv("CUSTOM_MODEL_NAME", "llama3.2") valid_providers.append(f"Custom API ({custom_url})") has_custom = True logger.info(f"Custom API endpoint found: {custom_url} with model {custom_model}") @@ -493,51 +503,35 @@ def configure_providers(): # Register providers in priority order: # 1. Native APIs first (most direct and efficient) - registered_providers = [] - if has_native_apis: if gemini_key and gemini_key != "your_gemini_api_key_here": ModelProviderRegistry.register_provider(ProviderType.GOOGLE, GeminiModelProvider) - registered_providers.append(ProviderType.GOOGLE.value) - logger.debug(f"Registered provider: {ProviderType.GOOGLE.value}") if openai_key and openai_key != "your_openai_api_key_here": ModelProviderRegistry.register_provider(ProviderType.OPENAI, OpenAIModelProvider) - registered_providers.append(ProviderType.OPENAI.value) - logger.debug(f"Registered provider: {ProviderType.OPENAI.value}") - if azure_models_available: - ModelProviderRegistry.register_provider(ProviderType.AZURE, AzureOpenAIProvider) - registered_providers.append(ProviderType.AZURE.value) - logger.debug(f"Registered provider: {ProviderType.AZURE.value}") + if azure_key and azure_endpoint and azure_deployment: + if ( + azure_key != "your_azure_openai_key_here" + and azure_endpoint != "https://your-resource.openai.azure.com/" + ): + ModelProviderRegistry.register_provider(ProviderType.AZURE, AzureOpenAIProvider) if xai_key and xai_key != "your_xai_api_key_here": ModelProviderRegistry.register_provider(ProviderType.XAI, XAIModelProvider) - registered_providers.append(ProviderType.XAI.value) - logger.debug(f"Registered provider: {ProviderType.XAI.value}") if dial_key and dial_key != "your_dial_api_key_here": ModelProviderRegistry.register_provider(ProviderType.DIAL, DIALModelProvider) - registered_providers.append(ProviderType.DIAL.value) - logger.debug(f"Registered provider: {ProviderType.DIAL.value}") # 2. Custom provider second (for local/private models) if has_custom: # Factory function that creates CustomProvider with proper parameters def custom_provider_factory(api_key=None): # api_key is CUSTOM_API_KEY (can be empty for Ollama), base_url from CUSTOM_API_URL - base_url = get_env("CUSTOM_API_URL", "") or "" + base_url = os.getenv("CUSTOM_API_URL", "") return CustomProvider(api_key=api_key or "", base_url=base_url) # Use provided API key or empty string ModelProviderRegistry.register_provider(ProviderType.CUSTOM, custom_provider_factory) - registered_providers.append(ProviderType.CUSTOM.value) - logger.debug(f"Registered provider: {ProviderType.CUSTOM.value}") # 3. OpenRouter last (catch-all for everything else) if has_openrouter: ModelProviderRegistry.register_provider(ProviderType.OPENROUTER, OpenRouterProvider) - registered_providers.append(ProviderType.OPENROUTER.value) - logger.debug(f"Registered provider: {ProviderType.OPENROUTER.value}") - - # Log all registered providers - if registered_providers: - logger.info(f"Registered providers: {', '.join(registered_providers)}") # Require at least one valid provider if not valid_providers: @@ -545,6 +539,7 @@ def custom_provider_factory(api_key=None): "At least one API configuration is required. Please set either:\n" "- GEMINI_API_KEY for Gemini models\n" "- OPENAI_API_KEY for OpenAI models\n" + "- AZURE_OPENAI_API_KEY + AZURE_OPENAI_ENDPOINT + AZURE_OPENAI_DEPLOYMENT_NAME for Azure OpenAI models\n" "- XAI_API_KEY for X.AI GROK models\n" "- DIAL_API_KEY for DIAL models\n" "- OPENROUTER_API_KEY for OpenRouter (multiple models)\n" @@ -679,8 +674,7 @@ async def handle_list_tools() -> list[Tool]: ) # Log cache efficiency info - openrouter_key_for_cache = get_env("OPENROUTER_API_KEY") - if openrouter_key_for_cache and openrouter_key_for_cache != "your_openrouter_api_key_here": + if os.getenv("OPENROUTER_API_KEY") and os.getenv("OPENROUTER_API_KEY") != "your_openrouter_api_key_here": logger.debug("OpenRouter registry cache used efficiently across all tool schemas") logger.debug(f"Returning {len(tools)} tools to MCP client") @@ -1092,12 +1086,9 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any # Create model context early to use for history building from utils.model_context import ModelContext - tool = TOOLS.get(context.tool_name) - requires_model = tool.requires_model() if tool else True - # Check if we should use the model from the previous conversation turn model_from_args = arguments.get("model") - if requires_model and not model_from_args and context.turns: + if not model_from_args and context.turns: # Find the last assistant turn to get the model used for turn in reversed(context.turns): if turn.role == "assistant" and turn.model_name: @@ -1105,99 +1096,48 @@ async def reconstruct_thread_context(arguments: dict[str, Any]) -> dict[str, Any logger.debug(f"[CONVERSATION_DEBUG] Using model from previous turn: {turn.model_name}") break - # Resolve an effective model for context reconstruction when DEFAULT_MODEL=auto - model_context = arguments.get("_model_context") + # Build or repair model context with graceful fallback when needed + try: + model_context = ModelContext.from_arguments(arguments) + except ValueError as exc: + # Try to find a reasonable fallback model + from providers.registry import ModelProviderRegistry - if requires_model: - if model_context is None: + fallback_model = None + # Try to get a category-appropriate fallback if we know the tool + if context.tool_name: try: - model_context = ModelContext.from_arguments(arguments) - arguments.setdefault("_resolved_model_name", model_context.model_name) - except ValueError as exc: - from providers.registry import ModelProviderRegistry - - fallback_model = None - if tool is not None: - try: - fallback_model = ModelProviderRegistry.get_preferred_fallback_model(tool.get_model_category()) - except Exception as fallback_exc: # pragma: no cover - defensive log - logger.debug( - f"[CONVERSATION_DEBUG] Unable to resolve fallback model for {context.tool_name}: {fallback_exc}" - ) - - if fallback_model is None: - available_models = ModelProviderRegistry.get_available_model_names() - if available_models: - fallback_model = available_models[0] - - if fallback_model is None: - raise + # Import tool registry to get tool instance + from tools import get_tool_by_name + tool_instance = get_tool_by_name(context.tool_name) + if tool_instance and hasattr(tool_instance, "get_model_category"): + fallback_model = ModelProviderRegistry.get_preferred_fallback_model( + tool_instance.get_model_category() + ) + except Exception as fallback_exc: # pragma: no cover - defensive log logger.debug( - f"[CONVERSATION_DEBUG] Falling back to model '{fallback_model}' for context reconstruction after error: {exc}" + f"[CONVERSATION_DEBUG] Unable to resolve fallback model for {context.tool_name}: {fallback_exc}" ) - model_context = ModelContext(fallback_model) - arguments["_model_context"] = model_context - arguments["_resolved_model_name"] = fallback_model - from providers.registry import ModelProviderRegistry - - provider = ModelProviderRegistry.get_provider_for_model(model_context.model_name) - if provider is None: - fallback_model = None - if tool is not None: - try: - fallback_model = ModelProviderRegistry.get_preferred_fallback_model(tool.get_model_category()) - except Exception as fallback_exc: # pragma: no cover - defensive log - logger.debug( - f"[CONVERSATION_DEBUG] Unable to resolve fallback model for {context.tool_name}: {fallback_exc}" - ) - - if fallback_model is None: - available_models = ModelProviderRegistry.get_available_model_names() - if available_models: - fallback_model = available_models[0] - - if fallback_model is None: - raise ValueError( - f"Conversation continuation failed: model '{model_context.model_name}' is not available with current API keys." - ) + if fallback_model is None: + available_models = ModelProviderRegistry.get_available_model_names() + if available_models: + fallback_model = available_models[0] - logger.debug( - f"[CONVERSATION_DEBUG] Model '{model_context.model_name}' unavailable; swapping to '{fallback_model}' for context reconstruction" + if fallback_model is None: + # Propagate with helpful error + raise ValueError( + f"Conversation continuation failed: model context could not be created for arguments; " + f"no available fallback models detected. Original error: {exc}" ) - model_context = ModelContext(fallback_model) - arguments["_model_context"] = model_context - arguments["_resolved_model_name"] = fallback_model - else: - if model_context is None: - from providers.registry import ModelProviderRegistry - - fallback_model = None - if tool is not None: - try: - fallback_model = ModelProviderRegistry.get_preferred_fallback_model(tool.get_model_category()) - except Exception as fallback_exc: # pragma: no cover - defensive log - logger.debug( - f"[CONVERSATION_DEBUG] Unable to resolve fallback model for {context.tool_name}: {fallback_exc}" - ) - - if fallback_model is None: - available_models = ModelProviderRegistry.get_available_model_names() - if available_models: - fallback_model = available_models[0] - - if fallback_model is None: - raise ValueError( - "Conversation continuation failed: no available models detected for context reconstruction." - ) - logger.debug( - f"[CONVERSATION_DEBUG] Using fallback model '{fallback_model}' for context reconstruction of tool without model requirement" - ) - model_context = ModelContext(fallback_model) - arguments["_model_context"] = model_context - arguments["_resolved_model_name"] = fallback_model + logger.debug( + f"[CONVERSATION_DEBUG] Falling back to model '{fallback_model}' for context reconstruction after error: {exc}" + ) + model_context = ModelContext(fallback_model) + arguments["_model_context"] = model_context + arguments["_resolved_model_name"] = fallback_model # Build conversation history with model-specific limits logger.debug(f"[CONVERSATION_DEBUG] Building conversation history for thread {continuation_id}") diff --git a/tests/test_azure_openai_provider.py b/tests/test_azure_openai_provider.py index 1d154e0e..048e486c 100644 --- a/tests/test_azure_openai_provider.py +++ b/tests/test_azure_openai_provider.py @@ -1,145 +1,742 @@ -import sys -import types +"""Tests for Azure OpenAI provider implementation using Responses API.""" -import pytest +from unittest.mock import MagicMock, patch -if "openai" not in sys.modules: # pragma: no cover - test shim for optional dependency - stub = types.ModuleType("openai") - stub.AzureOpenAI = object # Replaced with a mock inside tests - sys.modules["openai"] = stub +import pytest from providers.azure_openai import AzureOpenAIProvider -from providers.shared import ModelCapabilities, ProviderType +from providers.shared import ProviderType + + +class TestAzureOpenAIProvider: + """Test Azure OpenAI provider functionality.""" + + def setup_method(self): + """Set up clean state before each test.""" + # Clear restriction service cache before each test + import utils.model_restrictions + + utils.model_restrictions._restriction_service = None + def teardown_method(self): + """Clean up after each test to avoid singleton issues.""" + # Clear restriction service cache after each test + import utils.model_restrictions -class _DummyResponse: - def __init__(self): - self.choices = [ - types.SimpleNamespace( - message=types.SimpleNamespace(content="hello"), - finish_reason="stop", + utils.model_restrictions._restriction_service = None + + def test_initialization_success(self): + """Test successful provider initialization with all required parameters.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + assert provider.api_key == "test-key" + assert provider.azure_endpoint == "https://test.openai.azure.com" + assert provider.api_version == "2025-03-01-preview" + assert provider.deployment_name == "gpt-5" + assert provider.get_provider_type() == ProviderType.AZURE + + def test_initialization_missing_azure_endpoint(self): + """Test initialization fails without azure_endpoint.""" + with pytest.raises(ValueError, match="azure_endpoint is required"): + AzureOpenAIProvider( + api_key="test-key", + api_version="2025-03-01-preview", + deployment_name="gpt-5", ) - ] - self.model = "prod-gpt4o" - self.id = "resp-123" - self.created = 0 - self.usage = types.SimpleNamespace( - prompt_tokens=5, - completion_tokens=3, - total_tokens=8, - ) - -@pytest.fixture -def dummy_azure_client(monkeypatch): - captured = {} - - class _DummyAzureClient: - def __init__(self, **kwargs): - captured["client_kwargs"] = kwargs - self.chat = types.SimpleNamespace(completions=types.SimpleNamespace(create=self._create_completion)) - self.responses = types.SimpleNamespace(create=self._create_response) - - def _create_completion(self, **kwargs): - captured["request_kwargs"] = kwargs - return _DummyResponse() - - def _create_response(self, **kwargs): - captured["responses_kwargs"] = kwargs - return _DummyResponse() - - monkeypatch.delenv("AZURE_OPENAI_ALLOWED_MODELS", raising=False) - monkeypatch.setattr("providers.azure_openai.AzureOpenAI", _DummyAzureClient) - return captured - - -def test_generate_content_uses_deployment_mapping(dummy_azure_client): - provider = AzureOpenAIProvider( - api_key="key", - azure_endpoint="https://example.openai.azure.com/", - deployments={"gpt-4o": "prod-gpt4o"}, - ) - - result = provider.generate_content("hello", "gpt-4o") - - assert dummy_azure_client["request_kwargs"]["model"] == "prod-gpt4o" - assert result.model_name == "gpt-4o" - assert result.provider == ProviderType.AZURE - assert provider.validate_model_name("prod-gpt4o") - - -def test_generate_content_accepts_deployment_alias(dummy_azure_client): - provider = AzureOpenAIProvider( - api_key="key", - azure_endpoint="https://example.openai.azure.com/", - deployments={"gpt-4o-mini": "mini-deployment"}, - ) - - # Calling with the deployment alias should still resolve properly. - result = provider.generate_content("hi", "mini-deployment") - - assert dummy_azure_client["request_kwargs"]["model"] == "mini-deployment" - assert result.model_name == "gpt-4o-mini" - - -def test_client_initialization_uses_endpoint_and_version(dummy_azure_client): - provider = AzureOpenAIProvider( - api_key="key", - azure_endpoint="https://example.openai.azure.com/", - api_version="2024-03-15-preview", - deployments={"gpt-4o": "prod"}, - ) - - _ = provider.client - - assert dummy_azure_client["client_kwargs"]["azure_endpoint"] == "https://example.openai.azure.com" - assert dummy_azure_client["client_kwargs"]["api_version"] == "2024-03-15-preview" - - -def test_deployment_overrides_capabilities(dummy_azure_client): - provider = AzureOpenAIProvider( - api_key="key", - azure_endpoint="https://example.openai.azure.com/", - deployments={ - "gpt-4o": { - "deployment": "prod-gpt4o", - "friendly_name": "Azure GPT-4o EU", - "intelligence_score": 19, - "supports_temperature": False, - "temperature_constraint": "fixed", - } - }, - ) - - caps = provider.get_capabilities("gpt-4o") - assert caps.friendly_name == "Azure GPT-4o EU" - assert caps.intelligence_score == 19 - assert not caps.supports_temperature - - -def test_registry_configuration_merges_capabilities(dummy_azure_client, monkeypatch): - def fake_registry_entries(self): - capability = ModelCapabilities( - provider=ProviderType.AZURE, - model_name="gpt-4o", - friendly_name="Azure GPT-4o Registry", - context_window=500_000, - max_output_tokens=128_000, + def test_initialization_missing_api_version(self): + """Test initialization fails without api_version.""" + with pytest.raises(ValueError, match="api_version is required"): + AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + deployment_name="gpt-5", + ) + + def test_initialization_missing_deployment_name(self): + """Test initialization fails without deployment_name.""" + with pytest.raises(ValueError, match="deployment_name is required"): + AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + ) + + def test_initialization_old_api_version_warning(self): + """Test warning is logged for API versions older than 2025-03-01-preview.""" + with patch("providers.azure_openai.logger") as mock_logger: + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2024-06-01", + deployment_name="gpt-5", + ) + + # Verify provider was created and warning was logged + assert provider is not None + mock_logger.warning.assert_called_once() + warning_message = mock_logger.warning.call_args[0][0] + assert "may not support Responses API" in warning_message + + def test_model_validation_gpt5(self): + """Test model name validation for GPT-5.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Test valid models + assert provider.validate_model_name("gpt-5") is True + assert provider.validate_model_name("gpt-5-codex") is True + + # Test valid aliases + assert provider.validate_model_name("gpt5") is True + assert provider.validate_model_name("azure-gpt5") is True + assert provider.validate_model_name("azure-gpt-5") is True + assert provider.validate_model_name("codex") is True + assert provider.validate_model_name("gpt5-codex") is True + assert provider.validate_model_name("gpt5codex") is True + assert provider.validate_model_name("azure-codex") is True + assert provider.validate_model_name("azure-gpt5-codex") is True + + # Test invalid models + assert provider.validate_model_name("gpt-4") is False + assert provider.validate_model_name("o3") is False + assert provider.validate_model_name("invalid-model") is False + + def test_resolve_model_name(self): + """Test model name resolution for aliases.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Test GPT-5 aliases + assert provider._resolve_model_name("gpt5") == "gpt-5" + assert provider._resolve_model_name("azure-gpt5") == "gpt-5" + assert provider._resolve_model_name("azure-gpt-5") == "gpt-5" + + # Test GPT-5 Codex aliases + assert provider._resolve_model_name("gpt5-codex") == "gpt-5-codex" + assert provider._resolve_model_name("gpt5codex") == "gpt-5-codex" + assert provider._resolve_model_name("codex") == "gpt-5-codex" + assert provider._resolve_model_name("azure-codex") == "gpt-5-codex" + assert provider._resolve_model_name("azure-gpt5-codex") == "gpt-5-codex" + + # Test full names pass through unchanged + assert provider._resolve_model_name("gpt-5") == "gpt-5" + assert provider._resolve_model_name("gpt-5-codex") == "gpt-5-codex" + + def test_get_capabilities_gpt5(self): + """Test getting model capabilities for GPT-5.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + capabilities = provider.get_capabilities("gpt-5") + + assert capabilities.model_name == "gpt-5" + assert capabilities.friendly_name == "Azure OpenAI (GPT-5)" + assert capabilities.provider == ProviderType.AZURE + assert capabilities.intelligence_score == 16 + assert capabilities.context_window == 400_000 + assert capabilities.max_output_tokens == 128_000 + assert capabilities.supports_extended_thinking is True + assert capabilities.supports_system_prompts is True + assert capabilities.supports_streaming is True + assert capabilities.supports_function_calling is True + assert capabilities.supports_json_mode is True + assert capabilities.supports_images is True + assert capabilities.max_image_size_mb == 20.0 + # Azure Responses API enforces fixed temperature behavior for reasoning + # models in this provider. Temperature is not user-tunable. + assert capabilities.supports_temperature is False + assert getattr(capabilities.temperature_constraint, "value", None) == 1.0 + + def test_get_capabilities_gpt5_codex(self): + """Test getting model capabilities for GPT-5 Codex.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5-codex", + ) + + capabilities = provider.get_capabilities("gpt-5-codex") + + assert capabilities.model_name == "gpt-5-codex" + assert capabilities.friendly_name == "Azure OpenAI (GPT-5 Codex)" + assert capabilities.provider == ProviderType.AZURE + assert capabilities.intelligence_score == 17 + assert capabilities.context_window == 400_000 + assert capabilities.max_output_tokens == 128_000 + assert capabilities.supports_extended_thinking is True + assert capabilities.supports_system_prompts is True + assert capabilities.supports_streaming is True + assert capabilities.supports_function_calling is True + assert capabilities.supports_json_mode is True + assert capabilities.supports_images is False + assert capabilities.max_image_size_mb == 0.0 + # GPT-5-Codex requires fixed temperature=1.0 + assert capabilities.supports_temperature is False + assert capabilities.temperature_constraint.value == 1.0 + + def test_get_capabilities_with_alias(self): + """Test getting model capabilities with alias resolves correctly.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + capabilities = provider.get_capabilities("gpt5") + assert capabilities.model_name == "gpt-5" + assert capabilities.friendly_name == "Azure OpenAI (GPT-5)" + + capabilities = provider.get_capabilities("codex") + assert capabilities.model_name == "gpt-5-codex" + assert capabilities.friendly_name == "Azure OpenAI (GPT-5 Codex)" + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_basic(self, mock_azure_class): + """Test basic content generation using Responses API.""" + # Set up mock Azure client + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + # Mock the response object + mock_response = MagicMock() + mock_response.output_text = "This is the response content" + mock_response.id = "test-response-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 100 + mock_response.usage.output_tokens = 50 + mock_response.usage.total_tokens = 150 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Generate content + result = provider.generate_content( + prompt="Test prompt", + model_name="gpt-5", + temperature=1.0, + ) + + # Verify API was called correctly + mock_client.responses.create.assert_called_once() + call_kwargs = mock_client.responses.create.call_args[1] + + assert call_kwargs["model"] == "gpt-5" + # For codex/reasoning models, temperature is omitted (fixed internally) + assert "temperature" not in call_kwargs + assert len(call_kwargs["input"]) == 1 + assert call_kwargs["input"][0]["role"] == "user" + assert call_kwargs["input"][0]["content"] == "Test prompt" + + # Verify response + assert result.content == "This is the response content" + assert result.model_name == "gpt-5" + assert result.friendly_name == "Azure OpenAI (GPT-5)" + assert result.provider == ProviderType.AZURE + assert result.usage["input_tokens"] == 100 + assert result.usage["output_tokens"] == 50 + assert result.usage["total_tokens"] == 150 + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_with_system_prompt(self, mock_azure_class): + """Test content generation with system prompt.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Response with system prompt" + mock_response.id = "test-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 150 + mock_response.usage.output_tokens = 75 + mock_response.usage.total_tokens = 225 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content( + prompt="User message", + model_name="gpt-5", + system_prompt="You are a helpful assistant", + temperature=1.0, + ) + + # Verify messages include system prompt + call_kwargs = mock_client.responses.create.call_args[1] + assert len(call_kwargs["input"]) == 2 + assert call_kwargs["input"][0]["role"] == "system" + assert call_kwargs["input"][0]["content"] == "You are a helpful assistant" + assert call_kwargs["input"][1]["role"] == "user" + assert call_kwargs["input"][1]["content"] == "User message" + + assert result.content == "Response with system prompt" + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_extracts_from_output_array(self, mock_azure_class): + """Test content extraction from output array when output_text is not available.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + # Mock response with output array (no output_text) + mock_response = MagicMock() + mock_response.output_text = None + + # Create mock output items + text_item = MagicMock() + text_item.type = "text" + text_item.content = [MagicMock(text="Text from output array")] + + mock_response.output = [text_item] + mock_response.id = "test-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 50 + mock_response.usage.output_tokens = 25 + mock_response.usage.total_tokens = 75 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content( + prompt="Test prompt", + model_name="gpt-5", + temperature=1.0, + ) + + # Verify content extracted from output array + assert result.content == "Text from output array" + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_extracts_from_message_type(self, mock_azure_class): + """Test content extraction from output array with message type.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + # Mock response with output array containing message type + mock_response = MagicMock() + mock_response.output_text = None + + message_item = MagicMock() + message_item.type = "message" + message_item.content = "Direct message content" + + mock_response.output = [message_item] + mock_response.id = "test-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 30 + mock_response.usage.output_tokens = 20 + mock_response.usage.total_tokens = 50 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content( + prompt="Test", + model_name="gpt-5", + temperature=1.0, + ) + + assert result.content == "Direct message content" + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_no_content_error(self, mock_azure_class): + """Test error when no content can be extracted from response.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + # Mock response with no content + mock_response = MagicMock() + mock_response.output_text = None + mock_response.output = [] + mock_response.usage = MagicMock() + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", ) - return {"gpt-4o": {"deployment": "registry-deployment", "capability": capability}} - monkeypatch.setattr(AzureOpenAIProvider, "_load_registry_entries", fake_registry_entries) - - provider = AzureOpenAIProvider( - api_key="key", - azure_endpoint="https://example.openai.azure.com/", - ) - - # Capability should come from registry - caps = provider.get_capabilities("gpt-4o") - assert caps.friendly_name == "Azure GPT-4o Registry" - assert caps.context_window == 500_000 - - # API call should use deployment defined in registry - provider.generate_content("hello", "gpt-4o") - assert dummy_azure_client["request_kwargs"]["model"] == "registry-deployment" + with pytest.raises(ValueError, match="No content available in response"): + provider.generate_content( + prompt="Test", + model_name="gpt-5", + temperature=1.0, + ) + + @patch("providers.azure_openai.AzureOpenAI") + def test_token_usage_extraction(self, mock_azure_class): + """Test token usage extraction from response.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Test response" + mock_response.id = "test-id" + mock_response.status = "completed" + + # Test with input_tokens and output_tokens format + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 200 + mock_response.usage.output_tokens = 100 + mock_response.usage.total_tokens = 300 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content(prompt="Test", model_name="gpt-5", temperature=1.0) + + assert result.usage["input_tokens"] == 200 + assert result.usage["prompt_tokens"] == 200 + assert result.usage["output_tokens"] == 100 + assert result.usage["completion_tokens"] == 100 + assert result.usage["total_tokens"] == 300 + + @patch("providers.azure_openai.AzureOpenAI") + def test_token_usage_extraction_alternative_format(self, mock_azure_class): + """Test token usage extraction with prompt_tokens and completion_tokens format.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Test response" + mock_response.id = "test-id" + mock_response.status = "completed" + + # Test with prompt_tokens and completion_tokens format + # Create a custom mock class that only has specific attributes + class UsageWithLegacyFields: + prompt_tokens = 250 + completion_tokens = 125 + + mock_response.usage = UsageWithLegacyFields() + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content(prompt="Test", model_name="gpt-5", temperature=1.0) + + assert result.usage["prompt_tokens"] == 250 + assert result.usage["input_tokens"] == 250 + assert result.usage["completion_tokens"] == 125 + assert result.usage["output_tokens"] == 125 + assert result.usage["total_tokens"] == 375 # Calculated + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_with_max_output_tokens(self, mock_azure_class): + """Test content generation with explicit max_output_tokens.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Response" + mock_response.id = "test-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 50 + mock_response.usage.output_tokens = 25 + mock_response.usage.total_tokens = 75 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + result = provider.generate_content( + prompt="Test", + model_name="gpt-5", + max_output_tokens=4000, + temperature=1.0, + ) + + # Verify max_output_tokens was passed and result is not None + assert result is not None + call_kwargs = mock_client.responses.create.call_args[1] + assert call_kwargs["max_output_tokens"] == 4000 + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_api_error(self, mock_azure_class): + """Test error handling when API call fails.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + # Simulate API error + mock_client.responses.create.side_effect = Exception("API Error") + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + with pytest.raises(Exception, match="API Error"): + provider.generate_content( + prompt="Test", + model_name="gpt-5", + temperature=1.0, + ) + + def test_provider_type(self): + """Test get_provider_type returns ProviderType.AZURE.""" + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + assert provider.get_provider_type() == ProviderType.AZURE + + def test_get_preferred_model_extended_reasoning(self): + """Test get_preferred_model for extended reasoning category.""" + from tools.models import ToolModelCategory + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Test with both models available + allowed = ["gpt-5", "gpt-5-codex"] + preferred = provider.get_preferred_model(ToolModelCategory.EXTENDED_REASONING, allowed) + assert preferred == "gpt-5-codex" # Codex preferred for extended reasoning + + # Test with only gpt-5 available + allowed = ["gpt-5"] + preferred = provider.get_preferred_model(ToolModelCategory.EXTENDED_REASONING, allowed) + assert preferred == "gpt-5" + + # Test with empty list + preferred = provider.get_preferred_model(ToolModelCategory.EXTENDED_REASONING, []) + assert preferred is None + + def test_get_preferred_model_fast_response(self): + """Test get_preferred_model for fast response category.""" + from tools.models import ToolModelCategory + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Test with both models available + allowed = ["gpt-5", "gpt-5-codex"] + preferred = provider.get_preferred_model(ToolModelCategory.FAST_RESPONSE, allowed) + assert preferred == "gpt-5" # gpt-5 preferred for fast response + + # Test with only codex available + allowed = ["gpt-5-codex"] + preferred = provider.get_preferred_model(ToolModelCategory.FAST_RESPONSE, allowed) + assert preferred == "gpt-5-codex" + + def test_get_preferred_model_balanced(self): + """Test get_preferred_model for balanced category.""" + from tools.models import ToolModelCategory + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Test with both models available + allowed = ["gpt-5", "gpt-5-codex"] + preferred = provider.get_preferred_model(ToolModelCategory.BALANCED, allowed) + assert preferred == "gpt-5-codex" # Codex preferred for code tasks + + # Test with only gpt-5 available + allowed = ["gpt-5"] + preferred = provider.get_preferred_model(ToolModelCategory.BALANCED, allowed) + assert preferred == "gpt-5" + + @patch("providers.azure_openai.AzureOpenAI") + def test_close_cleanup(self, mock_azure_class): + """Test close method properly cleans up resources.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Initialize client by calling _get_client + provider._get_client() + assert provider._client is not None + + # Close should set client to None + provider.close() + assert provider._client is None + + @patch("providers.azure_openai.AzureOpenAI") + def test_lazy_client_initialization(self, mock_azure_class): + """Test that Azure client is lazily initialized on first use.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5", + ) + + # Client should not be initialized yet + assert provider._client is None + mock_azure_class.assert_not_called() + + # Get client should initialize it + client = provider._get_client() + assert client is not None + assert provider._client is not None + mock_azure_class.assert_called_once_with( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + ) + + # Second call should return same client + client2 = provider._get_client() + assert client2 is client + mock_azure_class.assert_called_once() # Still only called once + + @patch("providers.azure_openai.AzureOpenAI") + def test_metadata_in_response(self, mock_azure_class): + """Test that response metadata includes deployment and status info.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Test content" + mock_response.id = "response-123" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 50 + mock_response.usage.output_tokens = 25 + mock_response.usage.total_tokens = 75 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="my-gpt5-deployment", + ) + + result = provider.generate_content(prompt="Test", model_name="gpt-5", temperature=1.0) + + # Verify metadata + assert result.metadata["response_id"] == "response-123" + assert result.metadata["status"] == "completed" + assert result.metadata["deployment_name"] == "my-gpt5-deployment" + + @patch("providers.azure_openai.AzureOpenAI") + def test_generate_content_resolves_alias(self, mock_azure_class): + """Test that generate_content resolves aliases before making API call.""" + mock_client = MagicMock() + mock_azure_class.return_value = mock_client + + mock_response = MagicMock() + mock_response.output_text = "Test response" + mock_response.id = "test-id" + mock_response.status = "completed" + mock_response.usage = MagicMock() + mock_response.usage.input_tokens = 50 + mock_response.usage.output_tokens = 25 + mock_response.usage.total_tokens = 75 + + mock_client.responses.create.return_value = mock_response + + provider = AzureOpenAIProvider( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2025-03-01-preview", + deployment_name="gpt-5-codex", + ) + + # Use alias "codex" + result = provider.generate_content( + prompt="Test prompt", + model_name="codex", + temperature=1.0, + ) + + # Verify API was called with deployment name (not the alias) + call_kwargs = mock_client.responses.create.call_args[1] + assert call_kwargs["model"] == "gpt-5-codex" # Uses deployment name + + # Verify result uses resolved model name + assert result.model_name == "gpt-5-codex" diff --git a/tests/test_deploy_scripts.py b/tests/test_deploy_scripts.py index d6d4ab22..1ec5da81 100644 --- a/tests/test_deploy_scripts.py +++ b/tests/test_deploy_scripts.py @@ -34,7 +34,7 @@ def test_bash_scripts_executable(self): script_path = self.scripts_dir / script if script_path.exists(): # Check for shebang - content = script_path.read_text() + content = script_path.read_text(encoding="utf-8") assert content.startswith("#!/"), f"Script {script} must have shebang" def test_powershell_scripts_format(self): @@ -44,7 +44,7 @@ def test_powershell_scripts_format(self): for script in ps_scripts: script_path = self.scripts_dir / script if script_path.exists(): - content = script_path.read_text() + content = script_path.read_text(encoding="utf-8") # Check for PowerShell indicators ps_indicators = [ @@ -77,7 +77,7 @@ def test_build_script_functionality(self): build_script = self.scripts_dir / "build.sh" if build_script.exists(): - content = build_script.read_text() + content = build_script.read_text(encoding="utf-8") # Should contain Docker build commands assert ( @@ -91,7 +91,7 @@ def test_deploy_script_health_check_integration(self): for script_name in deploy_scripts: script_path = self.scripts_dir / script_name if script_path.exists(): - content = script_path.read_text() + content = script_path.read_text(encoding="utf-8") # Look for health check related content health_check_indicators = ["health", "healthcheck", "docker inspect", "container status"] @@ -108,7 +108,7 @@ def test_script_error_handling(self): for script_name in scripts: script_path = self.scripts_dir / script_name if script_path.exists(): - content = script_path.read_text() + content = script_path.read_text(encoding="utf-8") # Check for error handling patterns error_patterns = [ @@ -145,7 +145,7 @@ def test_script_parameter_handling(self): deploy_ps1 = self.scripts_dir / "deploy.ps1" if deploy_ps1.exists(): - content = deploy_ps1.read_text() + content = deploy_ps1.read_text(encoding="utf-8") # PowerShell scripts should handle parameters param_indicators = ["param(", "[Parameter(", "$SkipHealthCheck", "$HealthCheckTimeout"] @@ -161,7 +161,7 @@ def test_environment_preparation(self): for script_name in scripts_to_check: script_path = self.scripts_dir / script_name if script_path.exists(): - content = script_path.read_text() + content = script_path.read_text(encoding="utf-8") # Check for environment preparation env_prep_patterns = [".env", "environment", "API_KEY", "mkdir", "logs"] @@ -199,7 +199,7 @@ def test_healthcheck_functions_exist(self): if not self.healthcheck_script.exists(): pytest.skip("healthcheck.py not found") - content = self.healthcheck_script.read_text() + content = self.healthcheck_script.read_text(encoding="utf-8") # Expected functions expected_functions = ["def check_process", "def check_python_imports", "def check_log_directory"] @@ -235,7 +235,7 @@ def test_healthcheck_exit_codes(self): if not self.healthcheck_script.exists(): pytest.skip("healthcheck.py not found") - content = self.healthcheck_script.read_text() + content = self.healthcheck_script.read_text(encoding="utf-8") # Should have proper exit code handling exit_patterns = [ @@ -263,7 +263,7 @@ def test_scripts_work_with_compose_file(self): deploy_script = project_root / "docker" / "scripts" / "deploy.sh" if deploy_script.exists(): - content = deploy_script.read_text() + content = deploy_script.read_text(encoding="utf-8") # Should work with compose file compose_refs = ["docker-compose", "compose.yml", "compose.yaml"] @@ -285,8 +285,8 @@ def test_cross_platform_compatibility(self): # If both exist, they should have similar functionality if unix_deploy.exists() and windows_deploy.exists(): - unix_content = unix_deploy.read_text() - windows_content = windows_deploy.read_text() + unix_content = unix_deploy.read_text(encoding="utf-8") + windows_content = windows_deploy.read_text(encoding="utf-8") # Both should reference Docker assert "docker" in unix_content.lower() @@ -300,7 +300,7 @@ def test_script_logging_integration(self): for script_name in scripts: script_path = scripts_dir / script_name if script_path.exists(): - content = script_path.read_text() + content = script_path.read_text(encoding="utf-8") # Check for logging/output logging_patterns = ["echo", "Write-Host", "Write-Output", "print", "logger"] diff --git a/tests/test_docker_config_complete.py b/tests/test_docker_config_complete.py index 08e69a08..3d327830 100644 --- a/tests/test_docker_config_complete.py +++ b/tests/test_docker_config_complete.py @@ -20,7 +20,7 @@ def test_dockerfile_configuration(self): if not dockerfile.exists(): pytest.skip("Dockerfile not found") - content = dockerfile.read_text() + content = dockerfile.read_text(encoding="utf-8") # Essential checks assert "FROM python:" in content @@ -43,7 +43,7 @@ def test_environment_file_template(self): env_example = project_root / ".env.example" if env_example.exists(): - content = env_example.read_text() + content = env_example.read_text(encoding="utf-8") # Essential variables essential_vars = ["GEMINI_API_KEY", "OPENAI_API_KEY", "LOG_LEVEL"] diff --git a/tests/test_docker_healthcheck.py b/tests/test_docker_healthcheck.py index 69383803..60bf43c4 100644 --- a/tests/test_docker_healthcheck.py +++ b/tests/test_docker_healthcheck.py @@ -29,7 +29,7 @@ def test_healthcheck_script_executable(self): pytest.skip("healthcheck.py not found") # Check if script has Python shebang - content = self.healthcheck_script.read_text() + content = self.healthcheck_script.read_text(encoding="utf-8") assert content.startswith("#!/usr/bin/env python"), "Health check script must have Python shebang" @patch("subprocess.run") @@ -101,7 +101,7 @@ def test_health_check_docker_configuration(self): compose_file = self.project_root / "docker-compose.yml" if compose_file.exists(): - content = compose_file.read_text() + content = compose_file.read_text(encoding="utf-8") # Check for health check configuration assert "healthcheck:" in content, "Health check must be configured" @@ -119,7 +119,7 @@ def test_dockerfile_health_check_setup(self): dockerfile = project_root / "Dockerfile" if dockerfile.exists(): - content = dockerfile.read_text() + content = dockerfile.read_text(encoding="utf-8") # Check that health check script is copied script_copied = ("COPY" in content and "healthcheck.py" in content) or "COPY . ." in content diff --git a/tests/test_docker_implementation.py b/tests/test_docker_implementation.py index 7bf19bf5..9ece8fac 100644 --- a/tests/test_docker_implementation.py +++ b/tests/test_docker_implementation.py @@ -37,7 +37,7 @@ def test_dockerfile_exists(self): assert self.dockerfile_path.exists(), "Dockerfile must exist" # Check Dockerfile content - content = self.dockerfile_path.read_text() + content = self.dockerfile_path.read_text(encoding="utf-8") assert "FROM python:" in content, "Dockerfile must have a Python base" # Dockerfile uses COPY . . to copy all code assert "COPY . ." in content or "COPY --chown=" in content, "Dockerfile must copy source code" @@ -49,7 +49,7 @@ def test_docker_compose_configuration(self): assert self.docker_compose_path.exists(), "docker-compose.yml must exist" # Basic YAML syntax check - content = self.docker_compose_path.read_text() + content = self.docker_compose_path.read_text(encoding="utf-8") assert "services:" in content, "docker-compose.yml must have services" assert "zen-mcp" in content, "Service zen-mcp must be defined" assert "build:" in content, "Build configuration must be present" @@ -59,7 +59,7 @@ def test_environment_file_template(self): env_example_path = self.project_root / ".env.example" if env_example_path.exists(): - content = env_example_path.read_text() + content = env_example_path.read_text(encoding="utf-8") assert "GEMINI_API_KEY=" in content, "Template must contain GEMINI_API_KEY" assert "OPENAI_API_KEY=" in content, "Template must contain OPENAI_API_KEY" assert "LOG_LEVEL=" in content, "Template must contain LOG_LEVEL" @@ -236,7 +236,7 @@ def test_non_root_user_configuration(self): dockerfile_path = Path(__file__).parent.parent / "Dockerfile" if dockerfile_path.exists(): - content = dockerfile_path.read_text() + content = dockerfile_path.read_text(encoding="utf-8") # Check that a non-root user is configured assert "USER " in content or "useradd" in content, "Dockerfile should configure a non-root user" @@ -246,7 +246,7 @@ def test_readonly_filesystem_configuration(self): docker_compose_path = Path(__file__).parent.parent / "docker-compose.yml" if docker_compose_path.exists(): - content = docker_compose_path.read_text() + content = docker_compose_path.read_text(encoding="utf-8") # Look for security configurations security_indicators = ["read_only", "tmpfs", "security_opt", "cap_drop"] @@ -260,7 +260,7 @@ def test_environment_variable_security(self): dockerfile_path = Path(__file__).parent.parent / "Dockerfile" if dockerfile_path.exists(): - content = dockerfile_path.read_text() + content = dockerfile_path.read_text(encoding="utf-8") # Check that no API keys are hardcoded sensitive_patterns = ["API_KEY=sk-", "API_KEY=gsk_", "API_KEY=xai-"] @@ -309,7 +309,7 @@ def temp_project_dir(): (temp_path / "logs").mkdir() # Create base files - (temp_path / "server.py").write_text("# Mock server.py") + (temp_path / "server.py").write_text("# Mock server.py", encoding="utf-8") (temp_path / "Dockerfile").write_text( """ FROM python:3.11-slim @@ -331,7 +331,7 @@ def test_complete_docker_setup_validation(self, temp_project_dir): GEMINI_API_KEY=test_key LOG_LEVEL=INFO """ - (temp_project_dir / ".env").write_text(env_content) + (temp_project_dir / ".env").write_text(env_content, encoding="utf-8") # Validate that everything is in place assert (temp_project_dir / ".env").exists() diff --git a/tests/test_docker_mcp_validation.py b/tests/test_docker_mcp_validation.py index c28642d8..1fa8b769 100644 --- a/tests/test_docker_mcp_validation.py +++ b/tests/test_docker_mcp_validation.py @@ -29,7 +29,7 @@ def test_dockerfile_exists_and_valid(self): """Test Dockerfile existence and validity""" assert self.dockerfile_path.exists(), "Missing Dockerfile" - content = self.dockerfile_path.read_text() + content = self.dockerfile_path.read_text(encoding="utf-8") assert "FROM python:" in content, "Python base required" assert "server.py" in content, "server.py must be copied" @@ -63,7 +63,7 @@ def test_docker_security_configuration(self): if not self.dockerfile_path.exists(): pytest.skip("Dockerfile not found") - content = self.dockerfile_path.read_text() + content = self.dockerfile_path.read_text(encoding="utf-8") # Check non-root user has_user_config = "USER " in content or "useradd" in content or "adduser" in content diff --git a/tests/test_docker_security.py b/tests/test_docker_security.py index 0614903b..1fee325e 100644 --- a/tests/test_docker_security.py +++ b/tests/test_docker_security.py @@ -24,7 +24,7 @@ def test_non_root_user_configuration(self): if not self.dockerfile_path.exists(): pytest.skip("Dockerfile not found") - content = self.dockerfile_path.read_text() + content = self.dockerfile_path.read_text(encoding="utf-8") # Check for user creation or switching user_indicators = ["USER " in content, "useradd" in content, "adduser" in content, "RUN addgroup" in content] @@ -36,7 +36,7 @@ def test_no_unnecessary_privileges(self): if not self.compose_path.exists(): pytest.skip("docker-compose.yml not found") - content = self.compose_path.read_text() + content = self.compose_path.read_text(encoding="utf-8") # Check that dangerous options are not used dangerous_options = ["privileged: true", "--privileged", "cap_add:", "SYS_ADMIN"] @@ -49,7 +49,7 @@ def test_read_only_filesystem(self): if not self.compose_path.exists(): pytest.skip("docker-compose.yml not found") - content = self.compose_path.read_text() + content = self.compose_path.read_text(encoding="utf-8") # Check for read-only configurations if "read_only:" in content: @@ -64,7 +64,7 @@ def test_environment_variable_security(self): if not file_path.exists(): continue - content = file_path.read_text().lower() + content = file_path.read_text(encoding="utf-8").lower() # Check that we don't have hardcoded secrets for pattern in sensitive_patterns: @@ -83,7 +83,7 @@ def test_network_security(self): if not self.compose_path.exists(): pytest.skip("docker-compose.yml not found") - content = self.compose_path.read_text() + content = self.compose_path.read_text(encoding="utf-8") # Check for custom network (better than default bridge) if "networks:" in content: @@ -96,7 +96,7 @@ def test_volume_security(self): if not self.compose_path.exists(): pytest.skip("docker-compose.yml not found") - content = self.compose_path.read_text() + content = self.compose_path.read_text(encoding="utf-8") # Check that sensitive host paths are not mounted dangerous_mounts = ["/:/", "/var/run/docker.sock:", "/etc/passwd:", "/etc/shadow:", "/root:"] @@ -108,7 +108,7 @@ def test_secret_management(self): """Test that secrets are properly managed""" # Check for Docker secrets usage in compose file if self.compose_path.exists(): - content = self.compose_path.read_text() + content = self.compose_path.read_text(encoding="utf-8") # If secrets are used, they should be properly configured if "secrets:" in content: @@ -119,7 +119,7 @@ def test_container_capabilities(self): if not self.compose_path.exists(): pytest.skip("docker-compose.yml not found") - content = self.compose_path.read_text() + content = self.compose_path.read_text(encoding="utf-8") # Check for capability restrictions if "cap_drop:" in content: @@ -141,7 +141,7 @@ def test_env_file_not_in_image(self): dockerfile = project_root / "Dockerfile" if dockerfile.exists(): - content = dockerfile.read_text() + content = dockerfile.read_text(encoding="utf-8") # .env files should not be copied assert "COPY .env" not in content, ".env file should not be copied into image" @@ -152,7 +152,7 @@ def test_dockerignore_for_sensitive_files(self): dockerignore = project_root / ".dockerignore" if dockerignore.exists(): - content = dockerignore.read_text() + content = dockerignore.read_text(encoding="utf-8") sensitive_files = [".env", "*.key", "*.pem", ".git"] @@ -200,7 +200,7 @@ def test_dockerfile_best_practices(self): if not dockerfile.exists(): pytest.skip("Dockerfile not found") - content = dockerfile.read_text() + content = dockerfile.read_text(encoding="utf-8") # Check for multi-stage builds (reduces attack surface) if "FROM" in content: @@ -221,7 +221,7 @@ def test_container_security_context(self): compose_file = project_root / "docker-compose.yml" if compose_file.exists(): - content = compose_file.read_text() + content = compose_file.read_text(encoding="utf-8") # Check for security context if configured security_options = ["security_opt:", "no-new-privileges:", "read_only:"] diff --git a/tests/test_docker_volume_persistence.py b/tests/test_docker_volume_persistence.py index c7a52169..bd6354d0 100644 --- a/tests/test_docker_volume_persistence.py +++ b/tests/test_docker_volume_persistence.py @@ -25,7 +25,7 @@ def test_docker_compose_volumes_configuration(self): if not self.docker_compose_path.exists(): pytest.skip("docker-compose.yml not found") - content = self.docker_compose_path.read_text() + content = self.docker_compose_path.read_text(encoding="utf-8") # Check for named volume definition assert "zen-mcp-config:" in content, "zen-mcp-config volume must be defined" @@ -73,7 +73,7 @@ def test_log_persistence_configuration(self): log_mount = "./logs:/app/logs" if self.docker_compose_path.exists(): - content = self.docker_compose_path.read_text() + content = self.docker_compose_path.read_text(encoding="utf-8") assert log_mount in content, f"Log mount {log_mount} must be configured" def test_volume_backup_restore_capability(self): @@ -113,7 +113,7 @@ def test_volume_permissions(self): # Test creating a temporary file test_file = logs_dir / "test_write_permission.tmp" try: - test_file.write_text("test") + test_file.write_text("test", encoding="utf-8") assert test_file.exists() finally: if test_file.exists(): diff --git a/tests/test_file_protection.py b/tests/test_file_protection.py index 067eb0a6..1cab03be 100644 --- a/tests/test_file_protection.py +++ b/tests/test_file_protection.py @@ -5,9 +5,12 @@ 3. Excluded directories """ +import sys from pathlib import Path from unittest.mock import patch +import pytest + from utils.file_utils import ( expand_paths, get_user_home_directory, @@ -116,6 +119,7 @@ def test_allow_home_subdirectories(self): assert is_home_directory_root(Path("/Users/testuser/projects")) is False assert is_home_directory_root(Path("/Users/testuser/Documents/code")) is False + @pytest.mark.skipif(sys.platform == "win32", reason="Unix-specific test") def test_detect_home_patterns_macos(self): """Test detection of macOS home directory patterns.""" # Test various macOS home patterns @@ -124,6 +128,7 @@ def test_detect_home_patterns_macos(self): # But subdirectories should be allowed assert is_home_directory_root(Path("/Users/john/projects")) is False + @pytest.mark.skipif(sys.platform == "win32", reason="Unix-specific test") def test_detect_home_patterns_linux(self): """Test detection of Linux home directory patterns.""" assert is_home_directory_root(Path("/home/ubuntu")) is True @@ -285,10 +290,10 @@ def mock_is_mcp(path): file_paths = [str(f) for f in files] - # User files should be included - assert any("my-awesome-project/README.md" in p for p in file_paths) - assert any("my-awesome-project/main.py" in p for p in file_paths) - assert any("src/app.py" in p for p in file_paths) + # User files should be included (check for components separately for cross-platform) + assert any("my-awesome-project" in p and "README.md" in p for p in file_paths) + assert any("my-awesome-project" in p and "main.py" in p for p in file_paths) + assert any("src" in p and "app.py" in p for p in file_paths) # MCP files should NOT be included assert not any("gemini-mcp-server" in p for p in file_paths) diff --git a/tests/test_openai_provider.py b/tests/test_openai_provider.py index 6ffb5d22..520b2728 100644 --- a/tests/test_openai_provider.py +++ b/tests/test_openai_provider.py @@ -128,7 +128,7 @@ def test_get_capabilities_gpt5(self): assert capabilities.supports_system_prompts is True assert capabilities.supports_streaming is False assert capabilities.supports_function_calling is True - assert capabilities.supports_temperature is True + assert capabilities.supports_temperature is False def test_get_capabilities_gpt5_mini(self): """Test getting model capabilities for GPT-5-mini.""" @@ -144,7 +144,7 @@ def test_get_capabilities_gpt5_mini(self): assert capabilities.supports_system_prompts is True assert capabilities.supports_streaming is False assert capabilities.supports_function_calling is True - assert capabilities.supports_temperature is True + assert capabilities.supports_temperature is False @patch("providers.openai_compatible.OpenAI") def test_generate_content_resolves_alias_before_api_call(self, mock_openai_class): diff --git a/tests/test_pip_detection_fix.py b/tests/test_pip_detection_fix.py index 3e8ec2e7..e771eae7 100644 --- a/tests/test_pip_detection_fix.py +++ b/tests/test_pip_detection_fix.py @@ -5,6 +5,7 @@ """ import subprocess +import sys import tempfile from pathlib import Path @@ -14,6 +15,7 @@ class TestPipDetectionFix: """Test cases for issue #188: PIP is available but not recognized.""" + @pytest.mark.skipif(sys.platform == "win32", reason="Requires bash") def test_run_server_script_syntax_valid(self): """Test that run-server.sh has valid bash syntax.""" result = subprocess.run(["bash", "-n", "./run-server.sh"], capture_output=True, text=True) @@ -21,24 +23,25 @@ def test_run_server_script_syntax_valid(self): def test_run_server_has_proper_shebang(self): """Test that run-server.sh starts with proper shebang.""" - content = Path("./run-server.sh").read_text() + content = Path("./run-server.sh").read_text(encoding="utf-8") assert content.startswith("#!/bin/bash"), "Script missing proper bash shebang" def test_critical_functions_exist(self): """Test that all critical functions are defined in the script.""" - content = Path("./run-server.sh").read_text() + content = Path("./run-server.sh").read_text(encoding="utf-8") critical_functions = ["find_python", "setup_environment", "setup_venv", "install_dependencies", "bootstrap_pip"] for func in critical_functions: assert f"{func}()" in content, f"Critical function {func}() not found in script" + @pytest.mark.skipif(sys.platform == "win32", reason="Requires bash") def test_pip_detection_consistency_issue(self): """Test the specific issue: pip works in setup_venv but fails in install_dependencies. This test verifies that our fix ensures consistent Python executable paths. """ # Test that the get_venv_python_path function now returns absolute paths - content = Path("./run-server.sh").read_text() + content = Path("./run-server.sh").read_text(encoding="utf-8") # Check that get_venv_python_path includes our absolute path conversion logic assert "abs_venv_path" in content, "get_venv_python_path should use absolute paths" @@ -63,12 +66,12 @@ def test_pip_detection_with_non_interactive_shell(self): # Create mock python executable python_exe = bin_path / "python" - python_exe.write_text("#!/bin/bash\necho 'Python 3.12.3'\n") + python_exe.write_text("#!/bin/bash\necho 'Python 3.12.3'\n", encoding="utf-8") python_exe.chmod(0o755) # Create mock pip executable pip_exe = bin_path / "pip" - pip_exe.write_text("#!/bin/bash\necho 'pip 23.0.1'\n") + pip_exe.write_text("#!/bin/bash\necho 'pip 23.0.1'\n", encoding="utf-8") pip_exe.chmod(0o755) # Test that we can detect pip using explicit paths (not PATH) @@ -82,7 +85,7 @@ def test_enhanced_diagnostic_messages_included(self): Verify that the script contains the enhanced error diagnostics we added. """ - content = Path("./run-server.sh").read_text() + content = Path("./run-server.sh").read_text(encoding="utf-8") # Check that enhanced diagnostic information is present in the script expected_diagnostic_patterns = [ diff --git a/tests/test_utils.py b/tests/test_utils.py index f3d1f92a..c0102734 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,6 +2,10 @@ Tests for utility functions """ +import sys + +import pytest + from utils import check_token_limit, estimate_tokens, read_file_content, read_files @@ -29,6 +33,7 @@ def test_read_file_content_not_found(self, project_path): assert "Error: File does not exist" in content assert tokens > 0 + @pytest.mark.skipif(sys.platform == "win32", reason="Unix-specific test") def test_read_file_content_safe_files_allowed(self): """Test that safe files outside the original project root are now allowed""" # In the new security model, safe files like /etc/passwd diff --git a/tools/simple/base.py b/tools/simple/base.py index 2b37f2c3..c185777c 100644 --- a/tools/simple/base.py +++ b/tools/simple/base.py @@ -427,14 +427,23 @@ async def execute(self, arguments: dict[str, Any]) -> list: supports_thinking = capabilities.supports_extended_thinking # Generate content with provider abstraction - model_response = provider.generate_content( - prompt=prompt, - model_name=self._current_model_name, - system_prompt=system_prompt, - temperature=temperature, - thinking_mode=thinking_mode if supports_thinking else None, - images=images if images else None, - ) + if capabilities.supports_temperature: + model_response = provider.generate_content( + prompt=prompt, + model_name=self._current_model_name, + system_prompt=system_prompt, + temperature=temperature, + thinking_mode=thinking_mode if supports_thinking else None, + images=images if images else None, + ) + else: + model_response = provider.generate_content( + prompt=prompt, + model_name=self._current_model_name, + system_prompt=system_prompt, + thinking_mode=thinking_mode if supports_thinking else None, + images=images if images else None, + ) logger.info(f"Received response from {provider.get_provider_type().value} API for {self.get_name()}") @@ -484,14 +493,23 @@ async def execute(self, arguments: dict[str, Any]) -> list: retry_prompt = f"{original_prompt}\n\nIMPORTANT: Please provide a substantive response. If you cannot respond to the above request, please explain why and suggest alternatives." try: - retry_response = provider.generate_content( - prompt=retry_prompt, - model_name=self._current_model_name, - system_prompt=system_prompt, - temperature=temperature, - thinking_mode=thinking_mode if supports_thinking else None, - images=images if images else None, - ) + if capabilities.supports_temperature: + retry_response = provider.generate_content( + prompt=retry_prompt, + model_name=self._current_model_name, + system_prompt=system_prompt, + temperature=temperature, + thinking_mode=thinking_mode if supports_thinking else None, + images=images if images else None, + ) + else: + retry_response = provider.generate_content( + prompt=retry_prompt, + model_name=self._current_model_name, + system_prompt=system_prompt, + thinking_mode=thinking_mode if supports_thinking else None, + images=images if images else None, + ) if retry_response.content: # Successful retry - use the retry response