Skip to content

Commit 1408c25

Browse files
enhance: browser_som_screenshot (#3059)
Co-authored-by: Wendong-Fan <w3ndong.fan@gmail.com>
1 parent 16cce6c commit 1408c25

12 files changed

+1464
-339
lines changed

camel/toolkits/hybrid_browser_toolkit/config_loader.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ class BrowserConfig:
4444
connect_over_cdp: bool = False
4545
cdp_url: Optional[str] = None
4646

47+
# Full visual mode configuration
48+
full_visual_mode: bool = False
49+
4750

4851
@dataclass
4952
class ToolkitConfig:
@@ -116,6 +119,8 @@ def from_kwargs(cls, **kwargs) -> 'ConfigLoader':
116119
toolkit_kwargs["session_id"] = value
117120
elif key == "enabledTools":
118121
toolkit_kwargs["enabled_tools"] = value
122+
elif key == "fullVisualMode":
123+
browser_kwargs["full_visual_mode"] = value
119124

120125
browser_config = BrowserConfig(**browser_kwargs)
121126
toolkit_config = ToolkitConfig(**toolkit_kwargs)
@@ -146,6 +151,7 @@ def to_ws_config(self) -> Dict[str, Any]:
146151
"viewport_limit": self.browser_config.viewport_limit,
147152
"connectOverCdp": self.browser_config.connect_over_cdp,
148153
"cdpUrl": self.browser_config.cdp_url,
154+
"fullVisualMode": self.browser_config.full_visual_mode,
149155
}
150156

151157
def get_timeout_config(self) -> Dict[str, Optional[int]]:

camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def __new__(
5050
viewport_limit: bool = False,
5151
connect_over_cdp: bool = False,
5252
cdp_url: Optional[str] = None,
53+
full_visual_mode: bool = False,
5354
**kwargs: Any,
5455
) -> Any:
5556
r"""Create a HybridBrowserToolkit instance with the specified mode.
@@ -98,6 +99,11 @@ def __new__(
9899
cdp_url (Optional[str]): WebSocket endpoint URL for CDP
99100
connection. Required when connect_over_cdp is True.
100101
Defaults to None. (Only supported in TypeScript mode)
102+
full_visual_mode (bool): When True, browser actions like click,
103+
browser_open, visit_page, etc. will return 'full visual mode'
104+
as snapshot instead of actual page content. The
105+
browser_get_page_snapshot method will still return the actual
106+
snapshot. Defaults to False.
101107
**kwargs: Additional keyword arguments passed to the
102108
implementation.
103109
@@ -129,6 +135,7 @@ def __new__(
129135
viewport_limit=viewport_limit,
130136
connect_over_cdp=connect_over_cdp,
131137
cdp_url=cdp_url,
138+
full_visual_mode=full_visual_mode,
132139
**kwargs,
133140
)
134141
elif mode == "python":

camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def __init__(
9898
viewport_limit: bool = False,
9999
connect_over_cdp: bool = False,
100100
cdp_url: Optional[str] = None,
101+
full_visual_mode: bool = False,
101102
) -> None:
102103
r"""Initialize the HybridBrowserToolkit.
103104
@@ -143,6 +144,9 @@ def __init__(
143144
cdp_url (Optional[str]): WebSocket endpoint URL for CDP
144145
connection (e.g., 'ws://localhost:9222/devtools/browser/...').
145146
Required when connect_over_cdp is True. Defaults to None.
147+
full_visual_mode (bool): When True, browser actions like click,
148+
browser_open, visit_page, etc. will not return snapshots.
149+
Defaults to False.
146150
"""
147151
super().__init__()
148152
RegisteredAgentToolkit.__init__(self)
@@ -167,6 +171,7 @@ def __init__(
167171
enabled_tools=enabled_tools,
168172
connect_over_cdp=connect_over_cdp,
169173
cdp_url=cdp_url,
174+
full_visual_mode=full_visual_mode,
170175
)
171176

172177
# Legacy attribute access for backward compatibility
@@ -182,6 +187,7 @@ def __init__(
182187
self._default_start_url = browser_config.default_start_url
183188
self._session_id = toolkit_config.session_id or "default"
184189
self._viewport_limit = browser_config.viewport_limit
190+
self._full_visual_mode = browser_config.full_visual_mode
185191

186192
# Store timeout configuration for backward compatibility
187193
self._default_timeout = browser_config.default_timeout
@@ -648,22 +654,29 @@ async def browser_click(self, *, ref: str) -> Dict[str, Any]:
648654

649655
# Add tab information
650656
tab_info = await ws_wrapper.get_tab_info()
651-
result.update(
652-
{
653-
"tabs": tab_info,
654-
"current_tab": next(
655-
(
656-
i
657-
for i, tab in enumerate(tab_info)
658-
if tab.get("is_current")
659-
),
660-
0,
657+
658+
response = {
659+
"result": result.get("result", ""),
660+
"snapshot": result.get("snapshot", ""),
661+
"tabs": tab_info,
662+
"current_tab": next(
663+
(
664+
i
665+
for i, tab in enumerate(tab_info)
666+
if tab.get("is_current")
661667
),
662-
"total_tabs": len(tab_info),
663-
}
664-
)
668+
0,
669+
),
670+
"total_tabs": len(tab_info),
671+
}
665672

666-
return result
673+
if "newTabId" in result:
674+
response["newTabId"] = result["newTabId"]
675+
676+
if "timing" in result:
677+
response["timing"] = result["timing"]
678+
679+
return response
667680
except Exception as e:
668681
logger.error(f"Failed to click element: {e}")
669682
return {
@@ -1377,6 +1390,8 @@ def clone_for_new_session(
13771390
screenshot_timeout=self._screenshot_timeout,
13781391
page_stability_timeout=self._page_stability_timeout,
13791392
dom_content_loaded_timeout=self._dom_content_loaded_timeout,
1393+
viewport_limit=self._viewport_limit,
1394+
full_visual_mode=self._full_visual_mode,
13801395
)
13811396

13821397
def get_tools(self) -> List[FunctionTool]:

0 commit comments

Comments
 (0)