Skip to content

Commit a047656

Browse files
authored
Update wechat_agent actions (#5)
1 parent 554b87f commit a047656

File tree

3 files changed

+91
-24
lines changed

3 files changed

+91
-24
lines changed

macosagent/agents/wechat_agent/agent/prompt.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,15 @@
6868
- Keep track of the status and subresults in the memory.
6969
7070
9. Action specific rules:
71-
- Scrolling: When performing a scroll action, use the "scroll" action.
72-
- The `amount` parameter follows the rules of `pyautogui.scroll(amount)`:
73-
- A **positive** value scrolls **up**.
74-
- A **negative** value scrolls **down**.
75-
- The **magnitude** of the `amount` value determines the scroll distance.
76-
- The value of the 'amount' parameter should be between -10 and 10 for a single scroll action to ensure it is manageable and does not cause unexpected behavior.
71+
- Scrolling: When performing a scroll action, use either the "scroll_up" or "scroll_down" action depending on direction.
72+
- The `amount` parameter must be a positive integer indicating the scroll distance.
73+
- The direction is determined by the action type:
74+
- `scroll_up`: Scrolls the element up (content moves down)
75+
- `scroll_down`: Scrolls the element down (content moves up)
76+
- Recommended value range for `amount` is between 1 and 10 to ensure smooth behavior.
7777
- Example usages:
78-
- **Scroll up (move page content down)**: `{"scroll": {"index": <target_element_index>, "amount": 10}}`
79-
- **Scroll down (move page content up)**: `{"scroll": {"index": <target_element_index>, "amount": -10}}`
78+
- Scroll up: `{"scroll_up": {"index": <target_element_index>, "amount": 5}}`
79+
- Scroll down: `{"scroll_down": {"index": <target_element_index>, "amount": 5}}`
8080
8181
- Inputting Emojis: When inputting emojis, use the "inputs" action.
8282
- An example format is "{inputs: {'index': <target_element_index>, "text": "😊"}}".
@@ -92,9 +92,15 @@
9292
- Adding new content:
9393
- Example: `{"inputs": {"index": <target_element_index>, "text": "<existing content> new text"}}`
9494
95-
- Summarizing and Inputting Content:
96-
- When required to summarize content, the agent must first extract key points and then input the summary into the designated field using the "input" action.
97-
- Example: `{"inputs": {"index": <target_element_index>, "text": "<summary of extracted content>"}}`
95+
- Extracting Content: When the task requires analyzing or extracting information from the screen (e.g. summarizing a message, identifying key elements, or fulfilling user-specific requirements), use the "extract_content" action.
96+
- Provide a clear extraction requirement in the "target" field to guide what should be extracted.
97+
- Provide the source text or raw content in the "content" field.
98+
- The result of this action will be stored in memory and included in the reasoning chain for follow-up actions.
99+
- Example:
100+
{"extract_content": {
101+
"target": "Summarize the message for forwarding",
102+
"content": "Hi! Here is the updated schedule: Monday - Team Meeting at 10am; Tuesday - Client call at 2pm. Let me know if you're available."
103+
}}
98104
99105
- Sending Files: When sending files, always assume the file is already in the clipboard.
100106
- Use the "paste" action to paste the file, followed by the "send" action to confirm sending.

macosagent/agents/wechat_agent/controller/service.py

Lines changed: 70 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from macosagent.agents.wechat_agent.controller.registry.service import Registry
1313
from macosagent.agents.wechat_agent.wechat.context import WechatContext
14-
from macosagent.agents.wechat_agent.controller.views import ClickElementAction, InputAction, DoneAction, ScrollAction, PasteAction, CopyAction, RightClickElementAction, SendAction
14+
from macosagent.agents.wechat_agent.controller.views import ClickElementAction, InputAction, DoneAction, ScrollAction, PasteAction, CopyAction, RightClickElementAction, SendAction, ExtractContent
1515
from macosagent.agents.wechat_agent.agent.views import ActionResult, ActionModel
1616
#from macosagent.agents.wechat_agent.agent.llm import create_gpt_4o_tool
1717

@@ -136,27 +136,67 @@ async def inputs(params: InputAction, context: WechatContext):
136136
return ActionResult(is_done=False, success=False, extracted_content=str(e), include_in_memory=True)
137137

138138
@self.registry.action(
139-
"Scroll element",
139+
"Scroll element up",
140140
param_model=ScrollAction,
141141
)
142-
async def scroll(params: ScrollAction, context: WechatContext):
142+
async def scroll_up(params: ScrollAction, context: WechatContext):
143143
index = params.index
144144
element = [item for item in context.state.accessibility_tree_json if item["id"] == index]
145145
x, y, w, h = element[0]["bbox"]
146146
offset = context.state.offset
147-
x = int(x + offset[0] + w/2)
148-
y = int(y + offset[1] + h/2)
149-
logger.info(f"Scroll element {element[0]}")
147+
x = int(x + offset[0] + w / 2)
148+
y = int(y + offset[1] + h / 2)
150149
try:
151-
amount = params.amount if params.amount is not None else 0 # Default to scrolling one page if amount is None
152-
logger.info(f"Scrolling {amount} pixels")
153-
# pyautogui.click(x=x, y=y, clicks=1, interval=0.1, button='left')
150+
amount = -abs(params.amount) if params.amount is not None else -10
151+
logger.info(f"Scroll element {element[0]} up")
154152
pyautogui.moveTo(x, y, duration=0.1)
155-
pyautogui.scroll(amount) # Use pyautogui to scroll by 'amount' pixels
156-
return ActionResult(is_done=False, success=True, extracted_content=f"🌀 Scrolled {amount} pixels", include_in_memory=True)
153+
pyautogui.scroll(amount)
154+
return ActionResult(
155+
is_done=False,
156+
success=True,
157+
extracted_content=f"🌀 Scrolled up {abs(amount)} pixels",
158+
include_in_memory=True,
159+
)
157160
except Exception as e:
158-
logger.error(f"Error scrolling: {e}")
159-
return ActionResult(is_done=False, success=False, extracted_content=str(e), include_in_memory=True)
161+
logger.error(f"Error scrolling up: {e}")
162+
return ActionResult(
163+
is_done=False,
164+
success=False,
165+
extracted_content=str(e),
166+
include_in_memory=True,
167+
)
168+
169+
@self.registry.action(
170+
"Scroll element down",
171+
param_model=ScrollAction,
172+
)
173+
async def scroll_down(params: ScrollAction, context: WechatContext):
174+
index = params.index
175+
element = [item for item in context.state.accessibility_tree_json if item["id"] == index]
176+
x, y, w, h = element[0]["bbox"]
177+
offset = context.state.offset
178+
x = int(x + offset[0] + w / 2)
179+
y = int(y + offset[1] + h / 2)
180+
try:
181+
amount = abs(params.amount) if params.amount is not None else 10
182+
logger.info(f"Scroll element {element[0]} down")
183+
pyautogui.moveTo(x, y, duration=0.1)
184+
pyautogui.scroll(amount)
185+
return ActionResult(
186+
is_done=False,
187+
success=True,
188+
extracted_content=f"🌀 Scrolled down {abs(amount)} pixels",
189+
include_in_memory=True,
190+
)
191+
except Exception as e:
192+
logger.error(f"Error scrolling down: {e}")
193+
return ActionResult(
194+
is_done=False,
195+
success=False,
196+
extracted_content=str(e),
197+
include_in_memory=True,
198+
)
199+
160200

161201
@self.registry.action(
162202
"Paste clipboard text",
@@ -237,6 +277,23 @@ async def send(params: SendAction, context: WechatContext):
237277
logger.error(f"Error pasting text: {e}")
238278
return ActionResult(is_done=False, success=False, extracted_content=str(e), include_in_memory=True)
239279

280+
@self.registry.action(
281+
"Extract content",
282+
param_model=ExtractContent,
283+
)
284+
async def extract_content(params: ExtractContent, context: WechatContext):
285+
try:
286+
target = params.target
287+
content = params.content
288+
289+
logger.info(f"Extracted content based on the requirement '{target}': {content}")
290+
291+
return ActionResult(is_done=False, success=True, extracted_content=f"📋 Extracted content based on the requirement '{target}': {content}", include_in_memory=True)
292+
293+
except Exception as e:
294+
logger.error(f"Error extracting content based on the requirement '{target}': {e}")
295+
return ActionResult(is_done=False, success=False, extracted_content=str(e), include_in_memory=True)
296+
240297
# @self.registry.action(
241298
# "Modify clipboard text",
242299
# param_model=ModifyClipboardTextAction,

macosagent/agents/wechat_agent/controller/views.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ class CopyAction(BaseModel):
3131
class SendAction(BaseModel):
3232
index: int
3333

34+
class ExtractContent(BaseModel):
35+
target: str
36+
content: str
37+
3438
# class ModifyClipboardTextAction(BaseModel):
3539
# requirement_text: str
3640

0 commit comments

Comments
 (0)