Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions interpreter/computer_use/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,10 +338,11 @@ async def main():
if "--server" in sys.argv:
app = FastAPI()

# Start the mouse position checking thread when in server mode
mouse_thread = threading.Thread(target=check_mouse_position)
mouse_thread.daemon = True
mouse_thread.start()
# Start the mouse position checking thread when in server mode (if display available)
if pyautogui is not None:
mouse_thread = threading.Thread(target=check_mouse_position)
mouse_thread.daemon = True
mouse_thread.start()

# Get API key from environment variable
api_key = os.environ.get("ANTHROPIC_API_KEY")
Expand Down Expand Up @@ -460,10 +461,11 @@ async def tool_output_callback(result: ToolResult, tool_id: str):

print_markdown(markdown_text)

# Start the mouse position checking thread
mouse_thread = threading.Thread(target=check_mouse_position)
mouse_thread.daemon = True
mouse_thread.start()
# Start the mouse position checking thread (if display available)
if pyautogui is not None:
mouse_thread = threading.Thread(target=check_mouse_position)
mouse_thread.daemon = True
mouse_thread.start()

while not exit_flag:
user_input = input("> ")
Expand Down Expand Up @@ -535,14 +537,22 @@ def run_async_main():
import threading

# Replace the pynput and screeninfo imports with pyautogui
import pyautogui
# Handle missing DISPLAY environment variable (e.g., SSH sessions)
try:
import pyautogui
except Exception:
pyautogui = None

# Replace the global variables and functions related to mouse tracking
exit_flag = False


def check_mouse_position():
"""Monitor mouse position and exit if mouse moves to a corner."""
global exit_flag
# Skip if pyautogui is not available (no display)
if pyautogui is None:
return
corner_threshold = 10
screen_width, screen_height = pyautogui.size()

Expand Down
37 changes: 33 additions & 4 deletions interpreter/computer_use/tools/computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,36 @@
from typing import Literal, TypedDict
from uuid import uuid4

# Add import for PyAutoGUI
import pyautogui
# Add import for PyAutoGUI with graceful handling of missing DISPLAY
try:
import pyautogui
except Exception as e:
# pyautogui may fail to import when DISPLAY is not set (e.g., SSH sessions)
pyautogui = None
_pyautogui_import_error = e
else:
_pyautogui_import_error = None

from anthropic.types.beta import BetaToolComputerUse20241022Param

from .base import BaseAnthropicTool, ToolError, ToolResult
from .run import run

OUTPUT_DIR = "/tmp/outputs"

# Fallback screen size when pyautogui is unavailable
_FALLBACK_SCREEN_SIZE = (1024, 768)


def _require_pyautogui():
"""Raise ToolError if pyautogui is not available."""
if pyautogui is None:
raise ToolError(
"PyAutoGUI is unavailable because no display is attached (DISPLAY not set). "
"If you're using SSH, enable X11 forwarding or use a virtual display (e.g., xvfb)."
) from _pyautogui_import_error


TYPING_DELAY_MS = 12
TYPING_GROUP_SIZE = 50

Expand Down Expand Up @@ -68,6 +89,7 @@ def chunks(s: str, chunk_size: int) -> list[str]:


def smooth_move_to(x, y, duration=1.2):
_require_pyautogui()
start_x, start_y = pyautogui.position()
dx = x - start_x
dy = y - start_y
Expand Down Expand Up @@ -101,7 +123,9 @@ class ComputerTool(BaseAnthropicTool):
api_type: Literal["computer_20241022"] = "computer_20241022"
width: int
height: int
display_num: None # Simplified to always be None since we're only using primary display
display_num: (
None # Simplified to always be None since we're only using primary display
)

_screenshot_delay = 2.0
_scaling_enabled = True
Expand All @@ -122,7 +146,10 @@ def to_params(self) -> BetaToolComputerUse20241022Param:

def __init__(self):
super().__init__()
self.width, self.height = pyautogui.size()
if pyautogui is None:
self.width, self.height = _FALLBACK_SCREEN_SIZE
else:
self.width, self.height = pyautogui.size()
self.display_num = None

async def __call__(
Expand All @@ -133,6 +160,7 @@ async def __call__(
coordinate: tuple[int, int] | None = None,
**kwargs,
):
_require_pyautogui()
if action in ("mouse_move", "left_click_drag"):
if coordinate is None:
raise ToolError(f"coordinate is required for {action}")
Expand Down Expand Up @@ -221,6 +249,7 @@ def normalize_key(key):

async def screenshot(self):
"""Take a screenshot of the current screen and return the base64 encoded image."""
_require_pyautogui()
temp_dir = Path(tempfile.gettempdir())
path = temp_dir / f"screenshot_{uuid4().hex}.png"

Expand Down
74 changes: 74 additions & 0 deletions tests/test_display_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
"""Tests for DISPLAY environment variable handling in computer_use module."""

import os
import sys
import unittest
from unittest.mock import patch


class TestDisplayEnvHandling(unittest.TestCase):
"""Test that missing DISPLAY environment variable is handled gracefully."""

def test_computer_tool_import_without_display(self):
"""Test that computer.py can be imported without DISPLAY set."""
# Remove DISPLAY from environment if present
env_backup = os.environ.get("DISPLAY")
if "DISPLAY" in os.environ:
del os.environ["DISPLAY"]

# Clear any cached imports
modules_to_clear = [
k for k in sys.modules.keys() if k.startswith("interpreter.computer_use")
]
for mod in modules_to_clear:
del sys.modules[mod]

try:
# Mock pyautogui to raise the expected error
with patch.dict("sys.modules", {"pyautogui": None}):
# This should not raise an error on import
from interpreter.computer_use.tools.computer import (
_FALLBACK_SCREEN_SIZE,
ComputerTool,
)
from interpreter.computer_use.tools.computer import (
pyautogui as imported_pyautogui,
)

# Verify pyautogui is None when not available
if imported_pyautogui is None:
# Create tool instance - should use fallback size
tool = ComputerTool()
self.assertEqual(tool.width, _FALLBACK_SCREEN_SIZE[0])
self.assertEqual(tool.height, _FALLBACK_SCREEN_SIZE[1])
finally:
# Restore DISPLAY environment
if env_backup is not None:
os.environ["DISPLAY"] = env_backup

def test_loop_import_without_display(self):
"""Test that loop.py handles missing pyautogui gracefully."""
# Clear any cached imports
modules_to_clear = [
k
for k in sys.modules.keys()
if k.startswith("interpreter.computer_use.loop")
]
for mod in modules_to_clear:
del sys.modules[mod]

# The loop module should import without errors even when pyautogui fails
try:
from interpreter.computer_use.loop import check_mouse_position, pyautogui

# If pyautogui is None, check_mouse_position should return immediately
if pyautogui is None:
# This should not raise an error
check_mouse_position()
except ImportError:
# It's OK if other dependencies are missing in test environment
pass


if __name__ == "__main__":
unittest.main()
4 changes: 3 additions & 1 deletion tests/test_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,9 @@ def say(icon_name):
say("1")
time.sleep(1)

import pyautogui
if platform.system() == "Linux" and not os.environ.get("DISPLAY"):
pytest.skip("DISPLAY not set; pyautogui requires a GUI.")
pyautogui = pytest.importorskip("pyautogui")

pyautogui.mouseDown()

Expand Down