Skip to content

Commit 04b9756

Browse files
authored
Merge pull request #126 from tageniu/new-feature-branch
Implement a Ctrl+C signal handler to pause for debug purposes
2 parents 2a6bb50 + dd3870f commit 04b9756

File tree

3 files changed

+247
-3
lines changed

3 files changed

+247
-3
lines changed

gui_agents/s1/cli_app.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import logging
55
import os
66
import platform
7+
import signal
78
import sys
89
import time
910

@@ -13,6 +14,71 @@
1314

1415
current_platform = platform.system().lower()
1516

17+
# Global flag to track pause state for debugging
18+
paused = False
19+
20+
def get_char():
21+
"""Get a single character from stdin without pressing Enter"""
22+
try:
23+
# Import termios and tty on Unix-like systems
24+
if platform.system() in ["Darwin", "Linux"]:
25+
import termios
26+
import tty
27+
fd = sys.stdin.fileno()
28+
old_settings = termios.tcgetattr(fd)
29+
try:
30+
tty.setraw(sys.stdin.fileno())
31+
ch = sys.stdin.read(1)
32+
finally:
33+
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
34+
return ch
35+
else:
36+
# Windows fallback
37+
import msvcrt
38+
return msvcrt.getch().decode('utf-8', errors='ignore')
39+
except:
40+
return input() # Fallback for non-terminal environments
41+
42+
def signal_handler(signum, frame):
43+
"""Handle Ctrl+C signal for debugging during agent execution"""
44+
global paused
45+
46+
if not paused:
47+
print("\n\n🔸 Agent-S Workflow Paused 🔸")
48+
print("=" * 50)
49+
print("Options:")
50+
print(" • Press Ctrl+C again to quit")
51+
print(" • Press Esc to resume workflow")
52+
print("=" * 50)
53+
54+
paused = True
55+
56+
while paused:
57+
try:
58+
print("\n[PAUSED] Waiting for input... ", end="", flush=True)
59+
char = get_char()
60+
61+
if ord(char) == 3: # Ctrl+C
62+
print("\n\n🛑 Exiting Agent-S...")
63+
sys.exit(0)
64+
elif ord(char) == 27: # Esc
65+
print("\n\n▶️ Resuming Agent-S workflow...")
66+
paused = False
67+
break
68+
else:
69+
print(f"\n Unknown command: '{char}' (ord: {ord(char)})")
70+
71+
except KeyboardInterrupt:
72+
print("\n\n🛑 Exiting Agent-S...")
73+
sys.exit(0)
74+
else:
75+
# Already paused, second Ctrl+C means quit
76+
print("\n\n🛑 Exiting Agent-S...")
77+
sys.exit(0)
78+
79+
# Set up signal handler for Ctrl+C
80+
signal.signal(signal.SIGINT, signal_handler)
81+
1682
if current_platform == "darwin":
1783
from gui_agents.s1.aci.MacOSACI import MacOSACI, UIElement
1884
elif current_platform == "linux":
@@ -81,10 +147,14 @@ def show_permission_dialog(code: str, action_description: str):
81147

82148

83149
def run_agent(agent: UIAgent, instruction: str):
150+
global paused
84151
obs = {}
85152
traj = "Task:\n" + instruction
86153
subtask_traj = ""
87-
for _ in range(15):
154+
for step in range(15):
155+
# Check if we're in paused state and wait
156+
while paused:
157+
time.sleep(0.1)
88158
obs["accessibility_tree"] = UIElement.systemWideElement()
89159

90160
# Get screen shot using pyautogui.
@@ -100,6 +170,12 @@ def run_agent(agent: UIAgent, instruction: str):
100170
# Convert to base64 string.
101171
obs["screenshot"] = screenshot_bytes
102172

173+
# Check again for pause state before prediction
174+
while paused:
175+
time.sleep(0.1)
176+
177+
print(f"\n🔄 Step {step + 1}/15: Getting next action from agent...")
178+
103179
# Get next action code from the agent
104180
info, code = agent.predict(instruction=instruction, observation=obs)
105181

@@ -120,13 +196,18 @@ def run_agent(agent: UIAgent, instruction: str):
120196
continue
121197

122198
if "wait" in code[0].lower():
199+
print("⏳ Agent requested wait...")
123200
time.sleep(5)
124201
continue
125202

126203
else:
127204
time.sleep(1.0)
128205
print("EXECUTING CODE:", code[0])
129206

207+
# Check for pause state before execution
208+
while paused:
209+
time.sleep(0.1)
210+
130211
# Ask for permission before executing
131212
exec(code[0])
132213
time.sleep(1.0)

gui_agents/s2/cli_app.py

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import platform
77
import pyautogui
8+
import signal
89
import sys
910
import time
1011

@@ -15,6 +16,71 @@
1516

1617
current_platform = platform.system().lower()
1718

19+
# Global flag to track pause state for debugging
20+
paused = False
21+
22+
def get_char():
23+
"""Get a single character from stdin without pressing Enter"""
24+
try:
25+
# Import termios and tty on Unix-like systems
26+
if platform.system() in ["Darwin", "Linux"]:
27+
import termios
28+
import tty
29+
fd = sys.stdin.fileno()
30+
old_settings = termios.tcgetattr(fd)
31+
try:
32+
tty.setraw(sys.stdin.fileno())
33+
ch = sys.stdin.read(1)
34+
finally:
35+
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
36+
return ch
37+
else:
38+
# Windows fallback
39+
import msvcrt
40+
return msvcrt.getch().decode('utf-8', errors='ignore')
41+
except:
42+
return input() # Fallback for non-terminal environments
43+
44+
def signal_handler(signum, frame):
45+
"""Handle Ctrl+C signal for debugging during agent execution"""
46+
global paused
47+
48+
if not paused:
49+
print("\n\n🔸 Agent-S Workflow Paused 🔸")
50+
print("=" * 50)
51+
print("Options:")
52+
print(" • Press Ctrl+C again to quit")
53+
print(" • Press Esc to resume workflow")
54+
print("=" * 50)
55+
56+
paused = True
57+
58+
while paused:
59+
try:
60+
print("\n[PAUSED] Waiting for input... ", end="", flush=True)
61+
char = get_char()
62+
63+
if ord(char) == 3: # Ctrl+C
64+
print("\n\n🛑 Exiting Agent-S...")
65+
sys.exit(0)
66+
elif ord(char) == 27: # Esc
67+
print("\n\n▶️ Resuming Agent-S workflow...")
68+
paused = False
69+
break
70+
else:
71+
print(f"\n Unknown command: '{char}' (ord: {ord(char)})")
72+
73+
except KeyboardInterrupt:
74+
print("\n\n🛑 Exiting Agent-S...")
75+
sys.exit(0)
76+
else:
77+
# Already paused, second Ctrl+C means quit
78+
print("\n\n🛑 Exiting Agent-S...")
79+
sys.exit(0)
80+
81+
# Set up signal handler for Ctrl+C
82+
signal.signal(signal.SIGINT, signal_handler)
83+
1884
logger = logging.getLogger()
1985
logger.setLevel(logging.DEBUG)
2086

@@ -81,10 +147,15 @@ def scale_screen_dimensions(width: int, height: int, max_dim_size: int):
81147

82148

83149
def run_agent(agent, instruction: str, scaled_width: int, scaled_height: int):
150+
global paused
84151
obs = {}
85152
traj = "Task:\n" + instruction
86153
subtask_traj = ""
87-
for _ in range(15):
154+
for step in range(15):
155+
# Check if we're in paused state and wait
156+
while paused:
157+
time.sleep(0.1)
158+
88159
# Get screen shot using pyautogui
89160
screenshot = pyautogui.screenshot()
90161
screenshot = screenshot.resize((scaled_width, scaled_height), Image.LANCZOS)
@@ -98,6 +169,12 @@ def run_agent(agent, instruction: str, scaled_width: int, scaled_height: int):
98169
# Convert to base64 string.
99170
obs["screenshot"] = screenshot_bytes
100171

172+
# Check again for pause state before prediction
173+
while paused:
174+
time.sleep(0.1)
175+
176+
print(f"\n🔄 Step {step + 1}/15: Getting next action from agent...")
177+
101178
# Get next action code from the agent
102179
info, code = agent.predict(instruction=instruction, observation=obs)
103180

@@ -118,13 +195,18 @@ def run_agent(agent, instruction: str, scaled_width: int, scaled_height: int):
118195
continue
119196

120197
if "wait" in code[0].lower():
198+
print("⏳ Agent requested wait...")
121199
time.sleep(5)
122200
continue
123201

124202
else:
125203
time.sleep(1.0)
126204
print("EXECUTING CODE:", code[0])
127205

206+
# Check for pause state before execution
207+
while paused:
208+
time.sleep(0.1)
209+
128210
# Ask for permission before executing
129211
exec(code[0])
130212
time.sleep(1.0)

gui_agents/s2_5/cli_app.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import platform
77
import pyautogui
8+
import signal
89
import sys
910
import time
1011

@@ -15,6 +16,71 @@
1516

1617
current_platform = platform.system().lower()
1718

19+
# Global flag to track pause state for debugging
20+
paused = False
21+
22+
def get_char():
23+
"""Get a single character from stdin without pressing Enter"""
24+
try:
25+
# Import termios and tty on Unix-like systems
26+
if platform.system() in ["Darwin", "Linux"]:
27+
import termios
28+
import tty
29+
fd = sys.stdin.fileno()
30+
old_settings = termios.tcgetattr(fd)
31+
try:
32+
tty.setraw(sys.stdin.fileno())
33+
ch = sys.stdin.read(1)
34+
finally:
35+
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
36+
return ch
37+
else:
38+
# Windows fallback
39+
import msvcrt
40+
return msvcrt.getch().decode('utf-8', errors='ignore')
41+
except:
42+
return input() # Fallback for non-terminal environments
43+
44+
def signal_handler(signum, frame):
45+
"""Handle Ctrl+C signal for debugging during agent execution"""
46+
global paused
47+
48+
if not paused:
49+
print("\n\n🔸 Agent-S Workflow Paused 🔸")
50+
print("=" * 50)
51+
print("Options:")
52+
print(" • Press Ctrl+C again to quit")
53+
print(" • Press Esc to resume workflow")
54+
print("=" * 50)
55+
56+
paused = True
57+
58+
while paused:
59+
try:
60+
print("\n[PAUSED] Waiting for input... ", end="", flush=True)
61+
char = get_char()
62+
63+
if ord(char) == 3: # Ctrl+C
64+
print("\n\n🛑 Exiting Agent-S...")
65+
sys.exit(0)
66+
elif ord(char) == 27: # Esc
67+
print("\n\n▶️ Resuming Agent-S workflow...")
68+
paused = False
69+
break
70+
else:
71+
print(f"\n Unknown command: '{char}' (ord: {ord(char)})")
72+
73+
except KeyboardInterrupt:
74+
print("\n\n🛑 Exiting Agent-S...")
75+
sys.exit(0)
76+
else:
77+
# Already paused, second Ctrl+C means quit
78+
print("\n\n🛑 Exiting Agent-S...")
79+
sys.exit(0)
80+
81+
# Set up signal handler for Ctrl+C
82+
signal.signal(signal.SIGINT, signal_handler)
83+
1884
logger = logging.getLogger()
1985
logger.setLevel(logging.DEBUG)
2086

@@ -81,10 +147,14 @@ def scale_screen_dimensions(width: int, height: int, max_dim_size: int):
81147

82148

83149
def run_agent(agent, instruction: str, scaled_width: int, scaled_height: int):
150+
global paused
84151
obs = {}
85152
traj = "Task:\n" + instruction
86153
subtask_traj = ""
87-
for _ in range(15):
154+
for step in range(15):
155+
# Check if we're in paused state and wait
156+
while paused:
157+
time.sleep(0.1)
88158
# Get screen shot using pyautogui
89159
screenshot = pyautogui.screenshot()
90160
screenshot = screenshot.resize((scaled_width, scaled_height), Image.LANCZOS)
@@ -98,6 +168,12 @@ def run_agent(agent, instruction: str, scaled_width: int, scaled_height: int):
98168
# Convert to base64 string.
99169
obs["screenshot"] = screenshot_bytes
100170

171+
# Check again for pause state before prediction
172+
while paused:
173+
time.sleep(0.1)
174+
175+
print(f"\n🔄 Step {step + 1}/15: Getting next action from agent...")
176+
101177
# Get next action code from the agent
102178
info, code = agent.predict(instruction=instruction, observation=obs)
103179

@@ -117,13 +193,18 @@ def run_agent(agent, instruction: str, scaled_width: int, scaled_height: int):
117193
continue
118194

119195
if "wait" in code[0].lower():
196+
print("⏳ Agent requested wait...")
120197
time.sleep(5)
121198
continue
122199

123200
else:
124201
time.sleep(1.0)
125202
print("EXECUTING CODE:", code[0])
126203

204+
# Check for pause state before execution
205+
while paused:
206+
time.sleep(0.1)
207+
127208
# Ask for permission before executing
128209
exec(code[0])
129210
time.sleep(1.0)

0 commit comments

Comments
 (0)