mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-19 12:24:46 +08:00
fix: fix sub agent timeout
This commit is contained in:
@@ -15,7 +15,7 @@ class SubagentConfig:
|
|||||||
disallowed_tools: Optional list of tool names to deny.
|
disallowed_tools: Optional list of tool names to deny.
|
||||||
model: Model to use - 'inherit' uses parent's model.
|
model: Model to use - 'inherit' uses parent's model.
|
||||||
max_turns: Maximum number of agent turns before stopping.
|
max_turns: Maximum number of agent turns before stopping.
|
||||||
timeout_seconds: Maximum execution time in seconds (default: 300 = 5 minutes).
|
timeout_seconds: Maximum execution time in seconds (default: 900 = 15 minutes).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
name: str
|
name: str
|
||||||
@@ -25,4 +25,4 @@ class SubagentConfig:
|
|||||||
disallowed_tools: list[str] | None = field(default_factory=lambda: ["task"])
|
disallowed_tools: list[str] | None = field(default_factory=lambda: ["task"])
|
||||||
model: str = "inherit"
|
model: str = "inherit"
|
||||||
max_turns: int = 50
|
max_turns: int = 50
|
||||||
timeout_seconds: int = 300
|
timeout_seconds: int = 900
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ class SubagentStatus(Enum):
|
|||||||
RUNNING = "running"
|
RUNNING = "running"
|
||||||
COMPLETED = "completed"
|
COMPLETED = "completed"
|
||||||
FAILED = "failed"
|
FAILED = "failed"
|
||||||
|
TIMED_OUT = "timed_out"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -332,7 +333,7 @@ class SubagentExecutor:
|
|||||||
f"[trace={self.trace_id}] Subagent {self.config.name} execution timed out after {self.config.timeout_seconds}s"
|
f"[trace={self.trace_id}] Subagent {self.config.name} execution timed out after {self.config.timeout_seconds}s"
|
||||||
)
|
)
|
||||||
with _background_tasks_lock:
|
with _background_tasks_lock:
|
||||||
_background_tasks[task_id].status = SubagentStatus.FAILED
|
_background_tasks[task_id].status = SubagentStatus.TIMED_OUT
|
||||||
_background_tasks[task_id].error = f"Execution timed out after {self.config.timeout_seconds} seconds"
|
_background_tasks[task_id].error = f"Execution timed out after {self.config.timeout_seconds} seconds"
|
||||||
_background_tasks[task_id].completed_at = datetime.now()
|
_background_tasks[task_id].completed_at = datetime.now()
|
||||||
# Cancel the future (best effort - may not stop the actual execution)
|
# Cancel the future (best effort - may not stop the actual execution)
|
||||||
|
|||||||
@@ -131,7 +131,7 @@ def task_tool(
|
|||||||
logger.info(f"[trace={trace_id}] Task {task_id} status: {result.status.value}")
|
logger.info(f"[trace={trace_id}] Task {task_id} status: {result.status.value}")
|
||||||
last_status = result.status
|
last_status = result.status
|
||||||
|
|
||||||
# Check if task completed or failed
|
# Check if task completed, failed, or timed out
|
||||||
if result.status == SubagentStatus.COMPLETED:
|
if result.status == SubagentStatus.COMPLETED:
|
||||||
writer({"type": "task_completed", "task_id": task_id, "result": result.result})
|
writer({"type": "task_completed", "task_id": task_id, "result": result.result})
|
||||||
logger.info(f"[trace={trace_id}] Task {task_id} completed after {poll_count} polls")
|
logger.info(f"[trace={trace_id}] Task {task_id} completed after {poll_count} polls")
|
||||||
@@ -140,14 +140,20 @@ def task_tool(
|
|||||||
writer({"type": "task_failed", "task_id": task_id, "error": result.error})
|
writer({"type": "task_failed", "task_id": task_id, "error": result.error})
|
||||||
logger.error(f"[trace={trace_id}] Task {task_id} failed: {result.error}")
|
logger.error(f"[trace={trace_id}] Task {task_id} failed: {result.error}")
|
||||||
return f"Task failed. Error: {result.error}"
|
return f"Task failed. Error: {result.error}"
|
||||||
|
elif result.status == SubagentStatus.TIMED_OUT:
|
||||||
|
writer({"type": "task_timed_out", "task_id": task_id, "error": result.error})
|
||||||
|
logger.warning(f"[trace={trace_id}] Task {task_id} timed out: {result.error}")
|
||||||
|
return f"Task timed out. Error: {result.error}"
|
||||||
|
|
||||||
# Still running, wait before next poll
|
# Still running, wait before next poll
|
||||||
writer({"type": "task_running", "task_id": task_id, "poll_count": poll_count})
|
writer({"type": "task_running", "task_id": task_id, "poll_count": poll_count})
|
||||||
time.sleep(5) # Poll every 5 seconds
|
time.sleep(5) # Poll every 5 seconds
|
||||||
poll_count += 1
|
poll_count += 1
|
||||||
|
|
||||||
# Optional: Add timeout protection (e.g., max 5 minutes)
|
# Polling timeout as a safety net (in case thread pool timeout doesn't work)
|
||||||
if poll_count > 60: # 60 * 5s = 5 minutes
|
# Set to 16 minutes (longer than the default 15-minute thread pool timeout)
|
||||||
logger.warning(f"[trace={trace_id}] Task {task_id} timed out after {poll_count} polls")
|
# This catches edge cases where the background task gets stuck
|
||||||
|
if poll_count > 192: # 192 * 5s = 16 minutes
|
||||||
|
logger.error(f"[trace={trace_id}] Task {task_id} polling timed out after {poll_count} polls (should have been caught by thread pool timeout)")
|
||||||
writer({"type": "task_timed_out", "task_id": task_id})
|
writer({"type": "task_timed_out", "task_id": task_id})
|
||||||
return f"Task timed out after 5 minutes. Status: {result.status.value}"
|
return f"Task polling timed out after 16 minutes. This may indicate the background task is stuck. Status: {result.status.value}"
|
||||||
|
|||||||
Reference in New Issue
Block a user