BeehiveInnovations · sangjin-kim-eyk · Oct 3, 2025 · Oct 4, 2025 · Oct 4, 2025 · Oct 4, 2025
diff --git a/.claude/settings.json b/.claude/settings.json
@@ -1,7 +1,76 @@
 {
   "permissions": {
     "allow": [
-    ],
-    "deny": []
-  }
+      "Bash(poetry run pytest:*)",
+      "Bash(poetry install:*)",
+      "Bash(python -m pytest tests/adapters/filesystem/test_mock_file_system_adapter.py -v)",
+      "Bash(python:*)",
+      "Bash(poetry run:*)",
+      "Bash(rm:*)",
+      "Bash(mkdir:*)",
+      "Bash(poetry --version)",
+      "Bash(poetry:*)",
+      "Bash(ls:*)",
+      "Bash(find:*)",
+      "Bash(grep:*)",
+      "Bash(where python)",
+      "Bash(set APP_NAME=Test Agent)",
+      "Bash(del test_validation.py)",
+      "Bash(E:agent_smith.venvScriptspython.exe -m pytest tests/integration/ -v)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/ -v)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/ -v --tb=no -q)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_docker_process_integration.py -v)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_filesystem_integration.py -v)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_us_001_task_selection.py -v)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_us_002_isolated_agent_execution.py -v)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_us_003_automated_e2e_test_workflow.py -v)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/ --tb=no -q)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/ --ignore=tests/integration/test_di_container_integration.py --ignore=tests/integration/test_git_integration.py --ignore=tests/integration/test_llm_integration.py --ignore=tests/integration/test_observability_integration.py --ignore=tests/integration/test_us_007_proactive_plan_validation.py --ignore=tests/integration/test_us_012_shell_injection_prevention.py --tb=no -q)",
+      "Bash(where poetry)",
+      "Bash(\"C:\\Users\\idnot\\AppData\\Roaming\\Python\\Scripts\\poetry.exe\" run pytest tests/integration/ -v)",
+      "Bash(powershell:*)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/ -v --tb=short)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -c \"from opentelemetry.metrics import __all__; print(__all__)\")",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_di_container_integration.py -v --tb=short)",
+      "Bash(\"E:\\agent_smith\\.venv\\Scripts\\python.exe\" -m pytest tests/integration/test_di_container_integration.py::TestDIContainerIntegration::test_service_dependency_resolution -v --tb=short)",
+      "Bash(cmd /c \"poetry run python test_orchestrator_basic.py\")",
+      "Bash(wc:*)",
+      "mcp__sequential-thinking__sequentialthinking",
+      "Bash(cmd:*)",
+      "Bash(echo $0)",
+      "Bash(cat:*)",
+      "Bash(export:*)",
+      "Bash(.venv/Scripts/pytest.exe tests/security/test_context_loader_security.py -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/core/orchestration/ -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/mocks/mock_planner.py -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/adapters/ -v)",
+      "Bash(.venv/Scripts/pytest.exe --collect-only tests/)",
+      "Bash(.venv/Scripts/pytest.exe tests/adapters/docker/test_mock_docker_process_adapter.py -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/adapters/filesystem/test_mock_file_system_adapter.py -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/integration/test_orchestrator_basic_workflow_integration.py -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/integration/test_us_012_shell_injection_prevention.py -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/adapters/observability/test_mock_observability_adapter.py -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/core/domain/test_exceptions.py -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/adapters/scrubbers/test_regex_scrubber.py -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/integration/test_context_loader_integration.py -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/performance/ -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/core/context/ -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/core/config/ -v)",
+      "Bash(.venv/Scripts/pytest.exe tests/integration/test_us_007_proactive_plan_validation.py -v)",
+      "Bash(for:*)",
+      "Bash(do echo \"Processing $file\")",
+      "Bash(head:*)",
+      "Bash(done)"
+    ]
+  },
+  "enableAllProjectMcpServers": true,
+  "enabledMcpjsonServers": [
+    "sequential-thinking",
+    "firecrawl",
+    "tavily",
+    "perplexity",
+    "Ref",
+    "serena",
+    "zen"
+  ]
 }
diff --git a/.env.example b/.env.example
@@ -35,6 +35,52 @@ DIAL_API_KEY=your_dial_api_key_here
 # DIAL_API_HOST=https://core.dialx.ai        # Optional: Base URL without /openai suffix (auto-appended)
 # DIAL_API_VERSION=2025-01-01-preview        # Optional: API version header for DIAL requests
 
+# Azure OpenAI GPT-5 and GPT-5-Codex Configuration
+# ==============================================================================
+# CRITICAL: Azure OpenAI GPT-5/GPT-5-Codex models have UNIQUE REQUIREMENTS:
+#
+# 1. RESPONSES API ONLY - Chat Completions API is NOT implemented for these models
+# 2. TEMPERATURE CONSTRAINT - Must be exactly 1.0 (returns 400 error if changed)
+# 3. MINIMUM OUTPUT TOKENS - Must be at least 16 (returns 400 error if less)
+# 4. API VERSION - Must be 2025-03-01-preview or later for Responses API
+#
+# How to obtain credentials:
+# 1. Log in to Azure Portal (https://portal.azure.com)
+# 2. Navigate to your Azure OpenAI resource
+# 3. Go to "Keys and Endpoint" section
+# 4. Copy the API Key and Endpoint URL
+# 5. Deploy a GPT-5 or GPT-5-Codex model and note the deployment name
+#
+# All 4 variables below are REQUIRED for Azure OpenAI to work:
+AZURE_OPENAI_API_KEY=your_azure_openai_key_here
+AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com/
+AZURE_OPENAI_API_VERSION=2025-04-01-preview  # Minimum: 2025-03-01-preview
+AZURE_OPENAI_DEPLOYMENT_NAME=gpt-5  # Your deployment name (e.g., gpt-5, gpt-5-codex)
+#
+# Model Capabilities Comparison:
+# ┌─────────────┬────────────┬────────────┬──────────┬─────────┬──────────────┐
+# │ Model       │ Context    │ Max Output │ Vision   │ Code    │ Intelligence │
+# ├─────────────┼────────────┼────────────┼──────────┼─────────┼──────────────┤
+# │ GPT-5       │ 400K       │ 128K       │ Yes      │ Good    │ Score: 16    │
+# │ GPT-5-Codex │ 400K       │ 128K       │ No       │ Elite   │ Score: 17    │
+# └─────────────┴────────────┴────────────┴──────────┴─────────┴──────────────┘
+#
+# Known Constraints (from Azure OpenAI documentation and community testing):
+# - Temperature: Fixed at 1.0, error message: "Unsupported value. Only the default (1) value is supported"
+# - Max Output Tokens: Minimum 16 tokens required for reasoning models
+# - Responses API: Different response format than Chat Completions API
+# - No streaming support for reasoning tokens (only final output streams)
+#
+# Example deployment names (user-defined in Azure Portal):
+# - gpt-5                 # General purpose reasoning model
+# - gpt-5-codex           # Code-specialized variant
+# - my-gpt5-deployment    # Custom name you choose
+# - prod-gpt5-codex       # Environment-specific naming
+#
+# IMPORTANT: These are reasoning models with extended thinking capabilities.
+# They use additional "reasoning tokens" internally before generating the output.
+# ==============================================================================
+
 # Option 2: Use OpenRouter for access to multiple models through one API
 # Get your OpenRouter API key from: https://openrouter.ai/
 # If using OpenRouter, comment out the native API keys above

diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
@@ -0,0 +1,50 @@
+name: Claude Code
+
+on:
+  issue_comment:
+    types: [created]
+  pull_request_review_comment:
+    types: [created]
+  issues:
+    types: [opened, assigned]
+  pull_request_review:
+    types: [submitted]
+
+jobs:
+  claude:
+    if: |
+      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
+      (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+      actions: read # Required for Claude to read CI results on PRs
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code
+        id: claude
+        uses: anthropics/claude-code-action@v1
+        with:
+          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+
+          # This is an optional setting that allows Claude to read CI results on PRs
+          additional_permissions: |
+            actions: read
+
+          # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
+          # prompt: 'Update the pull request description to include a summary of changes.'
+
+          # Optional: Add claude_args to customize behavior and configuration
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://docs.claude.com/en/docs/claude-code/sdk#command-line for available options
+          # claude_args: '--model claude-opus-4-1-20250805 --allowed-tools Bash(gh pr:*)'
+
diff --git a/.gitignore b/.gitignore
@@ -144,6 +144,9 @@ cython_debug/
 # VS Code
 .vscode/
 
+# Cursor
+.cursor/
+
 # macOS
 .DS_Store
 
@@ -161,9 +164,6 @@ htmlcov/
 coverage.xml
 .pytest_cache/
 
-# Test simulation artifacts (dynamically created during testing)
-test_simulation_files/.claude/
-
 # Temporary test directories
 test-setup/
 
@@ -175,8 +175,8 @@ FEATURE_*.md
 # Local user instructions
 CLAUDE.local.md
 
-# Claude Code personal settings
-.claude/settings.local.json
+# Claude Code
+.claude/
 
 # Standalone mode files
 .zen_venv/