mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-03 06:12:14 +08:00
Merge upstream/experimental: resolve conflict in lead_agent/prompt.py
- Keep upstream subagent HARD LIMIT (max 3 task calls, batching) in subagent_reminder - Keep our removal of Citations: do not add back 'Citations when synthesizing' Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -24,7 +24,7 @@ deer-flow/
|
|||||||
│ ├── src/
|
│ ├── src/
|
||||||
│ │ ├── agents/ # LangGraph agent system
|
│ │ ├── agents/ # LangGraph agent system
|
||||||
│ │ │ ├── lead_agent/ # Main agent (factory + system prompt)
|
│ │ │ ├── lead_agent/ # Main agent (factory + system prompt)
|
||||||
│ │ │ ├── middlewares/ # 9 middleware components
|
│ │ │ ├── middlewares/ # 10 middleware components
|
||||||
│ │ │ ├── memory/ # Memory extraction, queue, prompts
|
│ │ │ ├── memory/ # Memory extraction, queue, prompts
|
||||||
│ │ │ └── thread_state.py # ThreadState schema
|
│ │ │ └── thread_state.py # ThreadState schema
|
||||||
│ │ ├── gateway/ # FastAPI Gateway API
|
│ │ ├── gateway/ # FastAPI Gateway API
|
||||||
@@ -112,12 +112,14 @@ Middlewares execute in strict order in `src/agents/lead_agent/agent.py`:
|
|||||||
1. **ThreadDataMiddleware** - Creates per-thread directories (`backend/.deer-flow/threads/{thread_id}/user-data/{workspace,uploads,outputs}`)
|
1. **ThreadDataMiddleware** - Creates per-thread directories (`backend/.deer-flow/threads/{thread_id}/user-data/{workspace,uploads,outputs}`)
|
||||||
2. **UploadsMiddleware** - Tracks and injects newly uploaded files into conversation
|
2. **UploadsMiddleware** - Tracks and injects newly uploaded files into conversation
|
||||||
3. **SandboxMiddleware** - Acquires sandbox, stores `sandbox_id` in state
|
3. **SandboxMiddleware** - Acquires sandbox, stores `sandbox_id` in state
|
||||||
4. **SummarizationMiddleware** - Context reduction when approaching token limits (optional, if enabled)
|
4. **DanglingToolCallMiddleware** - Injects placeholder ToolMessages for AIMessage tool_calls that lack responses (e.g., due to user interruption)
|
||||||
5. **TodoListMiddleware** - Task tracking with `write_todos` tool (optional, if plan_mode)
|
5. **SummarizationMiddleware** - Context reduction when approaching token limits (optional, if enabled)
|
||||||
6. **TitleMiddleware** - Auto-generates thread title after first complete exchange
|
6. **TodoListMiddleware** - Task tracking with `write_todos` tool (optional, if plan_mode)
|
||||||
7. **MemoryMiddleware** - Queues conversations for async memory update (filters to user + final AI responses)
|
7. **TitleMiddleware** - Auto-generates thread title after first complete exchange
|
||||||
8. **ViewImageMiddleware** - Injects base64 image data before LLM call (conditional on vision support)
|
8. **MemoryMiddleware** - Queues conversations for async memory update (filters to user + final AI responses)
|
||||||
9. **ClarificationMiddleware** - Intercepts `ask_clarification` tool calls, interrupts via `Command(goto=END)` (must be last)
|
9. **ViewImageMiddleware** - Injects base64 image data before LLM call (conditional on vision support)
|
||||||
|
10. **SubagentLimitMiddleware** - Truncates excess `task` tool calls from model response to enforce `MAX_CONCURRENT_SUBAGENTS` limit (optional, if subagent_enabled)
|
||||||
|
11. **ClarificationMiddleware** - Intercepts `ask_clarification` tool calls, interrupts via `Command(goto=END)` (must be last)
|
||||||
|
|
||||||
### Configuration System
|
### Configuration System
|
||||||
|
|
||||||
@@ -185,7 +187,7 @@ Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` →
|
|||||||
|
|
||||||
**Built-in Agents**: `general-purpose` (all tools except `task`) and `bash` (command specialist)
|
**Built-in Agents**: `general-purpose` (all tools except `task`) and `bash` (command specialist)
|
||||||
**Execution**: Dual thread pool - `_scheduler_pool` (3 workers) + `_execution_pool` (3 workers)
|
**Execution**: Dual thread pool - `_scheduler_pool` (3 workers) + `_execution_pool` (3 workers)
|
||||||
**Concurrency**: `MAX_CONCURRENT_SUBAGENTS = 3` per trace, 15-minute timeout
|
**Concurrency**: `MAX_CONCURRENT_SUBAGENTS = 3` enforced by `SubagentLimitMiddleware` (truncates excess tool calls in `after_model`), 15-minute timeout
|
||||||
**Flow**: `task()` tool → `SubagentExecutor` → background thread → poll 5s → SSE events → result
|
**Flow**: `task()` tool → `SubagentExecutor` → background thread → poll 5s → SSE events → result
|
||||||
**Events**: `task_started`, `task_running`, `task_completed`/`task_failed`/`task_timed_out`
|
**Events**: `task_started`, `task_running`, `task_completed`/`task_failed`/`task_timed_out`
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,9 @@ from langchain_core.runnables import RunnableConfig
|
|||||||
|
|
||||||
from src.agents.lead_agent.prompt import apply_prompt_template
|
from src.agents.lead_agent.prompt import apply_prompt_template
|
||||||
from src.agents.middlewares.clarification_middleware import ClarificationMiddleware
|
from src.agents.middlewares.clarification_middleware import ClarificationMiddleware
|
||||||
|
from src.agents.middlewares.dangling_tool_call_middleware import DanglingToolCallMiddleware
|
||||||
from src.agents.middlewares.memory_middleware import MemoryMiddleware
|
from src.agents.middlewares.memory_middleware import MemoryMiddleware
|
||||||
|
from src.agents.middlewares.subagent_limit_middleware import SubagentLimitMiddleware
|
||||||
from src.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
|
from src.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
|
||||||
from src.agents.middlewares.title_middleware import TitleMiddleware
|
from src.agents.middlewares.title_middleware import TitleMiddleware
|
||||||
from src.agents.middlewares.uploads_middleware import UploadsMiddleware
|
from src.agents.middlewares.uploads_middleware import UploadsMiddleware
|
||||||
@@ -174,6 +176,7 @@ Being proactive with task management demonstrates thoroughness and ensures all r
|
|||||||
|
|
||||||
# ThreadDataMiddleware must be before SandboxMiddleware to ensure thread_id is available
|
# ThreadDataMiddleware must be before SandboxMiddleware to ensure thread_id is available
|
||||||
# UploadsMiddleware should be after ThreadDataMiddleware to access thread_id
|
# UploadsMiddleware should be after ThreadDataMiddleware to access thread_id
|
||||||
|
# DanglingToolCallMiddleware patches missing ToolMessages before model sees the history
|
||||||
# SummarizationMiddleware should be early to reduce context before other processing
|
# SummarizationMiddleware should be early to reduce context before other processing
|
||||||
# TodoListMiddleware should be before ClarificationMiddleware to allow todo management
|
# TodoListMiddleware should be before ClarificationMiddleware to allow todo management
|
||||||
# TitleMiddleware generates title after first exchange
|
# TitleMiddleware generates title after first exchange
|
||||||
@@ -189,7 +192,7 @@ def _build_middlewares(config: RunnableConfig):
|
|||||||
Returns:
|
Returns:
|
||||||
List of middleware instances.
|
List of middleware instances.
|
||||||
"""
|
"""
|
||||||
middlewares = [ThreadDataMiddleware(), UploadsMiddleware(), SandboxMiddleware()]
|
middlewares = [ThreadDataMiddleware(), UploadsMiddleware(), SandboxMiddleware(), DanglingToolCallMiddleware()]
|
||||||
|
|
||||||
# Add summarization middleware if enabled
|
# Add summarization middleware if enabled
|
||||||
summarization_middleware = _create_summarization_middleware()
|
summarization_middleware = _create_summarization_middleware()
|
||||||
@@ -221,6 +224,11 @@ def _build_middlewares(config: RunnableConfig):
|
|||||||
if model_config is not None and model_config.supports_vision:
|
if model_config is not None and model_config.supports_vision:
|
||||||
middlewares.append(ViewImageMiddleware())
|
middlewares.append(ViewImageMiddleware())
|
||||||
|
|
||||||
|
# Add SubagentLimitMiddleware to truncate excess parallel task calls
|
||||||
|
subagent_enabled = config.get("configurable", {}).get("subagent_enabled", False)
|
||||||
|
if subagent_enabled:
|
||||||
|
middlewares.append(SubagentLimitMiddleware())
|
||||||
|
|
||||||
# ClarificationMiddleware should always be last
|
# ClarificationMiddleware should always be last
|
||||||
middlewares.append(ClarificationMiddleware())
|
middlewares.append(ClarificationMiddleware())
|
||||||
return middlewares
|
return middlewares
|
||||||
|
|||||||
@@ -12,7 +12,17 @@ You are running with subagent capabilities enabled. Your role is to be a **task
|
|||||||
|
|
||||||
**CORE PRINCIPLE: Complex tasks should be decomposed and distributed across multiple subagents for parallel execution.**
|
**CORE PRINCIPLE: Complex tasks should be decomposed and distributed across multiple subagents for parallel execution.**
|
||||||
|
|
||||||
**⚠️ LIMIT: You can launch at most 3 subagents per turn. Prioritize the most important sub-tasks if more decomposition is possible.**
|
**⛔ HARD CONCURRENCY LIMIT: MAXIMUM 3 `task` CALLS PER RESPONSE. THIS IS NOT OPTIONAL.**
|
||||||
|
- Each response, you may include **at most 3** `task` tool calls. Any excess calls are **silently discarded** by the system — you will lose that work.
|
||||||
|
- **Before launching subagents, you MUST count your sub-tasks in your thinking:**
|
||||||
|
- If count ≤ 3: Launch all in this response.
|
||||||
|
- If count > 3: **Pick the 3 most important/foundational sub-tasks for this turn.** Save the rest for the next turn.
|
||||||
|
- **Multi-batch execution** (for >3 sub-tasks):
|
||||||
|
- Turn 1: Launch sub-tasks 1-3 in parallel → wait for results
|
||||||
|
- Turn 2: Launch sub-tasks 4-6 in parallel → wait for results
|
||||||
|
- ... continue until all sub-tasks are complete
|
||||||
|
- Final turn: Synthesize ALL results into a coherent answer
|
||||||
|
- **Example thinking pattern**: "I identified 6 sub-tasks. Since the limit is 3 per turn, I will launch sub-tasks 1-3 now, and sub-tasks 4-6 in the next turn."
|
||||||
|
|
||||||
**Available Subagents:**
|
**Available Subagents:**
|
||||||
- **general-purpose**: For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.
|
- **general-purpose**: For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.
|
||||||
@@ -22,27 +32,33 @@ You are running with subagent capabilities enabled. Your role is to be a **task
|
|||||||
|
|
||||||
✅ **DECOMPOSE + PARALLEL EXECUTION (Preferred Approach):**
|
✅ **DECOMPOSE + PARALLEL EXECUTION (Preferred Approach):**
|
||||||
|
|
||||||
For complex queries, break them down into multiple focused sub-tasks and execute in parallel:
|
For complex queries, break them down into focused sub-tasks and execute in parallel batches (max 3 per turn):
|
||||||
|
|
||||||
**Example 1: "Why is Tencent's stock price declining?"**
|
**Example 1: "Why is Tencent's stock price declining?" (3 sub-tasks → 1 batch)**
|
||||||
→ Decompose into 3 parallel searches:
|
→ Turn 1: Launch 3 subagents in parallel:
|
||||||
- Subagent 1: Recent financial reports, earnings data, and revenue trends
|
- Subagent 1: Recent financial reports, earnings data, and revenue trends
|
||||||
- Subagent 2: Negative news, controversies, and regulatory issues
|
- Subagent 2: Negative news, controversies, and regulatory issues
|
||||||
- Subagent 3: Industry trends, competitor performance, and market sentiment
|
- Subagent 3: Industry trends, competitor performance, and market sentiment
|
||||||
|
→ Turn 2: Synthesize results
|
||||||
|
|
||||||
**Example 2: "What are the latest AI trends in 2026?"**
|
**Example 2: "Compare 5 cloud providers" (5 sub-tasks → 2 batches)**
|
||||||
→ Decompose into 3 parallel research areas:
|
→ Turn 1: Launch 3 subagents in parallel:
|
||||||
- Subagent 1: LLM and foundation model developments
|
- Subagent 1: AWS pricing, features, and market position
|
||||||
- Subagent 2: AI infrastructure, hardware trends, and enterprise adoption
|
- Subagent 2: Azure pricing, features, and market position
|
||||||
- Subagent 3: Regulatory, ethical developments, and societal impact
|
- Subagent 3: GCP pricing, features, and market position
|
||||||
|
→ Turn 2: Launch 2 subagents in parallel:
|
||||||
|
- Subagent 4: Alibaba Cloud pricing, features, and market position
|
||||||
|
- Subagent 5: Oracle Cloud pricing, features, and market position
|
||||||
|
→ Turn 3: Synthesize ALL results into comprehensive comparison
|
||||||
|
|
||||||
**Example 3: "Refactor the authentication system"**
|
**Example 3: "Refactor the authentication system"**
|
||||||
→ Decompose into 3 parallel analysis:
|
→ Turn 1: Launch 3 subagents in parallel:
|
||||||
- Subagent 1: Analyze current auth implementation and technical debt
|
- Subagent 1: Analyze current auth implementation and technical debt
|
||||||
- Subagent 2: Research best practices and security patterns
|
- Subagent 2: Research best practices and security patterns
|
||||||
- Subagent 3: Review related tests, documentation, and vulnerabilities
|
- Subagent 3: Review related tests, documentation, and vulnerabilities
|
||||||
|
→ Turn 2: Synthesize results
|
||||||
|
|
||||||
✅ **USE Parallel Subagents (2+ subagents) when:**
|
✅ **USE Parallel Subagents (max 3 per turn) when:**
|
||||||
- **Complex research questions**: Requires multiple information sources or perspectives
|
- **Complex research questions**: Requires multiple information sources or perspectives
|
||||||
- **Multi-aspect analysis**: Task has several independent dimensions to explore
|
- **Multi-aspect analysis**: Task has several independent dimensions to explore
|
||||||
- **Large codebases**: Need to analyze different parts simultaneously
|
- **Large codebases**: Need to analyze different parts simultaneously
|
||||||
@@ -55,10 +71,17 @@ For complex queries, break them down into multiple focused sub-tasks and execute
|
|||||||
- **Meta conversation**: Questions about conversation history
|
- **Meta conversation**: Questions about conversation history
|
||||||
- **Sequential dependencies**: Each step depends on previous results (do steps yourself sequentially)
|
- **Sequential dependencies**: Each step depends on previous results (do steps yourself sequentially)
|
||||||
|
|
||||||
**CRITICAL WORKFLOW**:
|
**CRITICAL WORKFLOW** (STRICTLY follow this before EVERY action):
|
||||||
1. In your thinking: Can I decompose this into 2-3 independent parallel sub-tasks?
|
1. **COUNT**: In your thinking, list all sub-tasks and count them explicitly: "I have N sub-tasks"
|
||||||
2. **YES** → Launch up to 3 `task` calls in parallel, then synthesize results
|
2. **PLAN BATCHES**: If N > 3, explicitly plan which sub-tasks go in which batch:
|
||||||
3. **NO** → Execute directly using available tools (bash, read_file, web_search, etc.)
|
- "Batch 1 (this turn): sub-tasks A, B, C"
|
||||||
|
- "Batch 2 (next turn): sub-tasks D, E, F"
|
||||||
|
3. **EXECUTE**: Launch ONLY the current batch (max 3 `task` calls). Do NOT launch sub-tasks from future batches.
|
||||||
|
4. **REPEAT**: After results return, launch the next batch. Continue until all batches complete.
|
||||||
|
5. **SYNTHESIZE**: After ALL batches are done, synthesize all results.
|
||||||
|
6. **Cannot decompose** → Execute directly using available tools (bash, read_file, web_search, etc.)
|
||||||
|
|
||||||
|
**⛔ VIOLATION: Launching more than 3 `task` calls in a single response is a HARD ERROR. The system WILL discard excess calls and you WILL lose work. Always batch.**
|
||||||
|
|
||||||
**Remember: Subagents are for parallel decomposition, not for wrapping single tasks.**
|
**Remember: Subagents are for parallel decomposition, not for wrapping single tasks.**
|
||||||
|
|
||||||
@@ -68,38 +91,35 @@ For complex queries, break them down into multiple focused sub-tasks and execute
|
|||||||
- The tool call will block until the subagent completes its work
|
- The tool call will block until the subagent completes its work
|
||||||
- Once complete, the result is returned to you directly
|
- Once complete, the result is returned to you directly
|
||||||
|
|
||||||
**Usage Example - Parallel Decomposition:**
|
**Usage Example 1 - Single Batch (≤3 sub-tasks):**
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# User asks: "Why is Tencent's stock price declining?"
|
# User asks: "Why is Tencent's stock price declining?"
|
||||||
# Thinking: This is complex research requiring multiple angles
|
# Thinking: 3 sub-tasks → fits in 1 batch
|
||||||
# → Decompose into 3 parallel searches (max 3 subagents per turn)
|
|
||||||
|
|
||||||
# Launch 3 subagents in a SINGLE response with multiple tool calls:
|
# Turn 1: Launch 3 subagents in parallel
|
||||||
|
task(description="Tencent financial data", prompt="...", subagent_type="general-purpose")
|
||||||
|
task(description="Tencent news & regulation", prompt="...", subagent_type="general-purpose")
|
||||||
|
task(description="Industry & market trends", prompt="...", subagent_type="general-purpose")
|
||||||
|
# All 3 run in parallel → synthesize results
|
||||||
|
```
|
||||||
|
|
||||||
# Subagent 1: Financial data
|
**Usage Example 2 - Multiple Batches (>3 sub-tasks):**
|
||||||
task(
|
|
||||||
description="Tencent financial data",
|
|
||||||
prompt="Search for Tencent's latest financial reports, quarterly earnings, and revenue trends in 2025-2026. Focus on numbers and official data.",
|
|
||||||
subagent_type="general-purpose"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Subagent 2: Negative news & regulatory
|
```python
|
||||||
task(
|
# User asks: "Compare AWS, Azure, GCP, Alibaba Cloud, and Oracle Cloud"
|
||||||
description="Tencent news & regulation",
|
# Thinking: 5 sub-tasks → need 2 batches (3 + 2)
|
||||||
prompt="Search for recent negative news, controversies, and regulatory issues affecting Tencent in 2025-2026.",
|
|
||||||
subagent_type="general-purpose"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Subagent 3: Industry & market
|
# Turn 1: Launch first batch of 3
|
||||||
task(
|
task(description="AWS analysis", prompt="...", subagent_type="general-purpose")
|
||||||
description="Industry & market trends",
|
task(description="Azure analysis", prompt="...", subagent_type="general-purpose")
|
||||||
prompt="Search for Chinese tech industry trends, competitor performance (Alibaba, ByteDance), and macro-economic factors affecting Chinese tech stocks in 2025-2026.",
|
task(description="GCP analysis", prompt="...", subagent_type="general-purpose")
|
||||||
subagent_type="general-purpose"
|
|
||||||
)
|
|
||||||
|
|
||||||
# All 3 subagents run in parallel, results return simultaneously
|
# Turn 2: Launch second batch of 2 (after first batch completes)
|
||||||
# Then synthesize findings into comprehensive analysis
|
task(description="Alibaba Cloud analysis", prompt="...", subagent_type="general-purpose")
|
||||||
|
task(description="Oracle Cloud analysis", prompt="...", subagent_type="general-purpose")
|
||||||
|
|
||||||
|
# Turn 3: Synthesize ALL results from both batches
|
||||||
```
|
```
|
||||||
|
|
||||||
**Counter-Example - Direct Execution (NO subagents):**
|
**Counter-Example - Direct Execution (NO subagents):**
|
||||||
@@ -113,9 +133,10 @@ bash("npm test") # Direct execution, not task()
|
|||||||
```
|
```
|
||||||
|
|
||||||
**CRITICAL**:
|
**CRITICAL**:
|
||||||
|
- **Max 3 `task` calls per turn** - the system enforces this, excess calls are discarded
|
||||||
- Only use `task` when you can launch 2+ subagents in parallel
|
- Only use `task` when you can launch 2+ subagents in parallel
|
||||||
- Single task = No value from subagents = Execute directly
|
- Single task = No value from subagents = Execute directly
|
||||||
- Multiple tasks in SINGLE response = Parallel execution
|
- For >3 sub-tasks, use sequential batches of 3 across multiple turns
|
||||||
</subagent_system>"""
|
</subagent_system>"""
|
||||||
|
|
||||||
SYSTEM_PROMPT_TEMPLATE = """
|
SYSTEM_PROMPT_TEMPLATE = """
|
||||||
@@ -314,14 +335,18 @@ def apply_prompt_template(subagent_enabled: bool = False) -> str:
|
|||||||
|
|
||||||
# Add subagent reminder to critical_reminders if enabled
|
# Add subagent reminder to critical_reminders if enabled
|
||||||
subagent_reminder = (
|
subagent_reminder = (
|
||||||
"- **Orchestrator Mode**: You are a task orchestrator - decompose complex tasks into parallel sub-tasks and launch multiple subagents simultaneously. Synthesize results, don't execute directly.\n"
|
"- **Orchestrator Mode**: You are a task orchestrator - decompose complex tasks into parallel sub-tasks. "
|
||||||
|
"**HARD LIMIT: max 3 `task` calls per response.** "
|
||||||
|
"If >3 sub-tasks, split into sequential batches of ≤3. Synthesize after ALL batches complete.\n"
|
||||||
if subagent_enabled
|
if subagent_enabled
|
||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add subagent thinking guidance if enabled
|
# Add subagent thinking guidance if enabled
|
||||||
subagent_thinking = (
|
subagent_thinking = (
|
||||||
"- **DECOMPOSITION CHECK: Can this task be broken into 2+ parallel sub-tasks? If YES, decompose and launch multiple subagents in parallel. Your role is orchestrator, not executor.**\n"
|
"- **DECOMPOSITION CHECK: Can this task be broken into 2+ parallel sub-tasks? If YES, COUNT them. "
|
||||||
|
"If count > 3, you MUST plan batches of ≤3 and only launch the FIRST batch now. "
|
||||||
|
"NEVER launch more than 3 `task` calls in one response.**\n"
|
||||||
if subagent_enabled
|
if subagent_enabled
|
||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -0,0 +1,74 @@
|
|||||||
|
"""Middleware to fix dangling tool calls in message history.
|
||||||
|
|
||||||
|
A dangling tool call occurs when an AIMessage contains tool_calls but there are
|
||||||
|
no corresponding ToolMessages in the history (e.g., due to user interruption or
|
||||||
|
request cancellation). This causes LLM errors due to incomplete message format.
|
||||||
|
|
||||||
|
This middleware runs before the model call to detect and patch such gaps by
|
||||||
|
inserting synthetic ToolMessages with an error indicator.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import override
|
||||||
|
|
||||||
|
from langchain.agents import AgentState
|
||||||
|
from langchain.agents.middleware import AgentMiddleware
|
||||||
|
from langchain_core.messages import ToolMessage
|
||||||
|
from langgraph.runtime import Runtime
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class DanglingToolCallMiddleware(AgentMiddleware[AgentState]):
|
||||||
|
"""Inserts placeholder ToolMessages for dangling tool calls before model invocation.
|
||||||
|
|
||||||
|
Scans the message history for AIMessages whose tool_calls lack corresponding
|
||||||
|
ToolMessages, and injects synthetic error responses so the LLM receives a
|
||||||
|
well-formed conversation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _fix_dangling_tool_calls(self, state: AgentState) -> dict | None:
|
||||||
|
messages = state.get("messages", [])
|
||||||
|
if not messages:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Collect IDs of all existing ToolMessages
|
||||||
|
existing_tool_msg_ids: set[str] = set()
|
||||||
|
for msg in messages:
|
||||||
|
if isinstance(msg, ToolMessage):
|
||||||
|
existing_tool_msg_ids.add(msg.tool_call_id)
|
||||||
|
|
||||||
|
# Find dangling tool calls and build patch messages
|
||||||
|
patches: list[ToolMessage] = []
|
||||||
|
for msg in messages:
|
||||||
|
if getattr(msg, "type", None) != "ai":
|
||||||
|
continue
|
||||||
|
tool_calls = getattr(msg, "tool_calls", None)
|
||||||
|
if not tool_calls:
|
||||||
|
continue
|
||||||
|
for tc in tool_calls:
|
||||||
|
tc_id = tc.get("id")
|
||||||
|
if tc_id and tc_id not in existing_tool_msg_ids:
|
||||||
|
patches.append(
|
||||||
|
ToolMessage(
|
||||||
|
content="[Tool call was interrupted and did not return a result.]",
|
||||||
|
tool_call_id=tc_id,
|
||||||
|
name=tc.get("name", "unknown"),
|
||||||
|
status="error",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
existing_tool_msg_ids.add(tc_id)
|
||||||
|
|
||||||
|
if not patches:
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.warning(f"Injecting {len(patches)} placeholder ToolMessage(s) for dangling tool calls")
|
||||||
|
return {"messages": patches}
|
||||||
|
|
||||||
|
@override
|
||||||
|
def before_model(self, state: AgentState, runtime: Runtime) -> dict | None:
|
||||||
|
return self._fix_dangling_tool_calls(state)
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def abefore_model(self, state: AgentState, runtime: Runtime) -> dict | None:
|
||||||
|
return self._fix_dangling_tool_calls(state)
|
||||||
61
backend/src/agents/middlewares/subagent_limit_middleware.py
Normal file
61
backend/src/agents/middlewares/subagent_limit_middleware.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
"""Middleware to enforce maximum concurrent subagent tool calls per model response."""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import override
|
||||||
|
|
||||||
|
from langchain.agents import AgentState
|
||||||
|
from langchain.agents.middleware import AgentMiddleware
|
||||||
|
from langgraph.runtime import Runtime
|
||||||
|
|
||||||
|
from src.subagents.executor import MAX_CONCURRENT_SUBAGENTS
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class SubagentLimitMiddleware(AgentMiddleware[AgentState]):
|
||||||
|
"""Truncates excess 'task' tool calls from a single model response.
|
||||||
|
|
||||||
|
When an LLM generates more than MAX_CONCURRENT_SUBAGENTS parallel task tool calls
|
||||||
|
in one response, this middleware keeps only the first MAX_CONCURRENT_SUBAGENTS and
|
||||||
|
discards the rest. This is more reliable than prompt-based limits.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _truncate_task_calls(self, state: AgentState) -> dict | None:
|
||||||
|
messages = state.get("messages", [])
|
||||||
|
if not messages:
|
||||||
|
return None
|
||||||
|
|
||||||
|
last_msg = messages[-1]
|
||||||
|
if getattr(last_msg, "type", None) != "ai":
|
||||||
|
return None
|
||||||
|
|
||||||
|
tool_calls = getattr(last_msg, "tool_calls", None)
|
||||||
|
if not tool_calls:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Count task tool calls
|
||||||
|
task_indices = [i for i, tc in enumerate(tool_calls) if tc.get("name") == "task"]
|
||||||
|
if len(task_indices) <= MAX_CONCURRENT_SUBAGENTS:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Build set of indices to drop (excess task calls beyond the limit)
|
||||||
|
indices_to_drop = set(task_indices[MAX_CONCURRENT_SUBAGENTS:])
|
||||||
|
truncated_tool_calls = [tc for i, tc in enumerate(tool_calls) if i not in indices_to_drop]
|
||||||
|
|
||||||
|
dropped_count = len(indices_to_drop)
|
||||||
|
logger.warning(
|
||||||
|
f"Truncated {dropped_count} excess task tool call(s) from model response "
|
||||||
|
f"(limit: {MAX_CONCURRENT_SUBAGENTS})"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Replace the AIMessage with truncated tool_calls (same id triggers replacement)
|
||||||
|
updated_msg = last_msg.model_copy(update={"tool_calls": truncated_tool_calls})
|
||||||
|
return {"messages": [updated_msg]}
|
||||||
|
|
||||||
|
@override
|
||||||
|
def after_model(self, state: AgentState, runtime: Runtime) -> dict | None:
|
||||||
|
return self._truncate_task_calls(state)
|
||||||
|
|
||||||
|
@override
|
||||||
|
async def aafter_model(self, state: AgentState, runtime: Runtime) -> dict | None:
|
||||||
|
return self._truncate_task_calls(state)
|
||||||
@@ -392,23 +392,6 @@ class SubagentExecutor:
|
|||||||
MAX_CONCURRENT_SUBAGENTS = 3
|
MAX_CONCURRENT_SUBAGENTS = 3
|
||||||
|
|
||||||
|
|
||||||
def count_active_tasks_by_trace(trace_id: str) -> int:
|
|
||||||
"""Count active (PENDING or RUNNING) background tasks for a given trace_id.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
trace_id: The trace ID linking tasks to a parent invocation.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Number of active tasks with the given trace_id.
|
|
||||||
"""
|
|
||||||
with _background_tasks_lock:
|
|
||||||
return sum(
|
|
||||||
1
|
|
||||||
for task in _background_tasks.values()
|
|
||||||
if task.trace_id == trace_id and task.status in (SubagentStatus.PENDING, SubagentStatus.RUNNING)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_background_task_result(task_id: str) -> SubagentResult | None:
|
def get_background_task_result(task_id: str) -> SubagentResult | None:
|
||||||
"""Get the result of a background task.
|
"""Get the result of a background task.
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from langgraph.typing import ContextT
|
|||||||
|
|
||||||
from src.agents.thread_state import ThreadState
|
from src.agents.thread_state import ThreadState
|
||||||
from src.subagents import SubagentExecutor, get_subagent_config
|
from src.subagents import SubagentExecutor, get_subagent_config
|
||||||
from src.subagents.executor import MAX_CONCURRENT_SUBAGENTS, SubagentStatus, count_active_tasks_by_trace, get_background_task_result
|
from src.subagents.executor import SubagentStatus, get_background_task_result
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -86,11 +86,6 @@ def task_tool(
|
|||||||
# Get or generate trace_id for distributed tracing
|
# Get or generate trace_id for distributed tracing
|
||||||
trace_id = metadata.get("trace_id") or str(uuid.uuid4())[:8]
|
trace_id = metadata.get("trace_id") or str(uuid.uuid4())[:8]
|
||||||
|
|
||||||
# Check sub-agent limit before creating a new one
|
|
||||||
if trace_id and count_active_tasks_by_trace(trace_id) >= MAX_CONCURRENT_SUBAGENTS:
|
|
||||||
logger.warning(f"[trace={trace_id}] Sub-agent limit reached ({MAX_CONCURRENT_SUBAGENTS}). Rejecting new task: {description}")
|
|
||||||
return f"Error: Maximum number of concurrent sub-agents ({MAX_CONCURRENT_SUBAGENTS}) reached. Please wait for existing tasks to complete before launching new ones."
|
|
||||||
|
|
||||||
# Get available tools (excluding task tool to prevent nesting)
|
# Get available tools (excluding task tool to prevent nesting)
|
||||||
# Lazy import to avoid circular dependency
|
# Lazy import to avoid circular dependency
|
||||||
from src.tools import get_available_tools
|
from src.tools import get_available_tools
|
||||||
|
|||||||
3094
frontend/pnpm-lock.yaml
generated
3094
frontend/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -81,7 +81,7 @@ export const enUS: Translations = {
|
|||||||
"Reasoning, planning and executing, get more accurate results, may take more time",
|
"Reasoning, planning and executing, get more accurate results, may take more time",
|
||||||
ultraMode: "Ultra",
|
ultraMode: "Ultra",
|
||||||
ultraModeDescription:
|
ultraModeDescription:
|
||||||
"Reasoning, planning and execution with subagents to divide work; best for complex multi-step tasks",
|
"Pro mode with subagents to divide work; best for complex multi-step tasks",
|
||||||
searchModels: "Search models...",
|
searchModels: "Search models...",
|
||||||
surpriseMe: "Surprise",
|
surpriseMe: "Surprise",
|
||||||
surpriseMePrompt: "Surprise me",
|
surpriseMePrompt: "Surprise me",
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ export const zhCN: Translations = {
|
|||||||
proModeDescription: "思考、计划再执行,获得更精准的结果,可能需要更多时间",
|
proModeDescription: "思考、计划再执行,获得更精准的结果,可能需要更多时间",
|
||||||
ultraMode: "Ultra",
|
ultraMode: "Ultra",
|
||||||
ultraModeDescription:
|
ultraModeDescription:
|
||||||
"思考、计划并执行,可调用子代理分工协作,适合复杂多步骤任务,能力最强",
|
"继承自 Pro 模式,可调用子代理分工协作,适合复杂多步骤任务,能力最强",
|
||||||
searchModels: "搜索模型...",
|
searchModels: "搜索模型...",
|
||||||
surpriseMe: "小惊喜",
|
surpriseMe: "小惊喜",
|
||||||
surpriseMePrompt: "给我一个小惊喜吧",
|
surpriseMePrompt: "给我一个小惊喜吧",
|
||||||
|
|||||||
Reference in New Issue
Block a user