mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-03 06:12:14 +08:00
fix(harness): normalize structured content for titles (#1155)
* fix(harness): normalize structured content for titles Flatten structured LangChain message content before prompting the title model so list/block payloads don't leak Python reprs into generated thread titles. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> * Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -158,7 +158,7 @@ Middlewares execute in strict order in `packages/harness/deerflow/agents/lead_ag
|
||||
4. **DanglingToolCallMiddleware** - Injects placeholder ToolMessages for AIMessage tool_calls that lack responses (e.g., due to user interruption)
|
||||
5. **SummarizationMiddleware** - Context reduction when approaching token limits (optional, if enabled)
|
||||
6. **TodoListMiddleware** - Task tracking with `write_todos` tool (optional, if plan_mode)
|
||||
7. **TitleMiddleware** - Auto-generates thread title after first complete exchange
|
||||
7. **TitleMiddleware** - Auto-generates thread title after first complete exchange and normalizes structured message content before prompting the title model
|
||||
8. **MemoryMiddleware** - Queues conversations for async memory update (filters to user + final AI responses)
|
||||
9. **ViewImageMiddleware** - Injects base64 image data before LLM call (conditional on vision support)
|
||||
10. **SubagentLimitMiddleware** - Truncates excess `task` tool calls from model response to enforce `MAX_CONCURRENT_SUBAGENTS` limit (optional, if subagent_enabled)
|
||||
|
||||
@@ -6,12 +6,14 @@
|
||||
|
||||
## 实现方式
|
||||
|
||||
使用 `TitleMiddleware` 在 `after_agent` 钩子中:
|
||||
使用 `TitleMiddleware` 在 `after_model` 钩子中:
|
||||
1. 检测是否是首次对话(1个用户消息 + 1个助手回复)
|
||||
2. 检查 state 是否已有 title
|
||||
3. 调用 LLM 生成简洁的标题(默认最多6个词)
|
||||
4. 将 title 存储到 `ThreadState` 中(会被 checkpointer 持久化)
|
||||
|
||||
TitleMiddleware 会先把 LangChain message content 里的结构化 block/list 内容归一化为纯文本,再拼到 title prompt 里,避免把 Python/JSON 的原始 repr 泄漏到标题生成模型。
|
||||
|
||||
## ⚠️ 重要:存储机制
|
||||
|
||||
### Title 存储位置
|
||||
|
||||
@@ -21,6 +21,25 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
|
||||
|
||||
state_schema = TitleMiddlewareState
|
||||
|
||||
def _normalize_content(self, content: object) -> str:
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
|
||||
if isinstance(content, list):
|
||||
parts = [self._normalize_content(item) for item in content]
|
||||
return "\n".join(part for part in parts if part)
|
||||
|
||||
if isinstance(content, dict):
|
||||
text_value = content.get("text")
|
||||
if isinstance(text_value, str):
|
||||
return text_value
|
||||
|
||||
nested_content = content.get("content")
|
||||
if nested_content is not None:
|
||||
return self._normalize_content(nested_content)
|
||||
|
||||
return ""
|
||||
|
||||
def _should_generate_title(self, state: TitleMiddlewareState) -> bool:
|
||||
"""Check if we should generate a title for this thread."""
|
||||
config = get_title_config()
|
||||
@@ -52,9 +71,8 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
|
||||
user_msg_content = next((m.content for m in messages if m.type == "human"), "")
|
||||
assistant_msg_content = next((m.content for m in messages if m.type == "ai"), "")
|
||||
|
||||
# Ensure content is string (LangChain messages can have list content)
|
||||
user_msg = str(user_msg_content) if user_msg_content else ""
|
||||
assistant_msg = str(assistant_msg_content) if assistant_msg_content else ""
|
||||
user_msg = self._normalize_content(user_msg_content)
|
||||
assistant_msg = self._normalize_content(assistant_msg_content)
|
||||
|
||||
# Use a lightweight model to generate title
|
||||
model = create_chat_model(thinking_enabled=False)
|
||||
@@ -67,8 +85,7 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
|
||||
|
||||
try:
|
||||
response = await model.ainvoke(prompt)
|
||||
# Ensure response content is string
|
||||
title_content = str(response.content) if response.content else ""
|
||||
title_content = self._normalize_content(response.content)
|
||||
title = title_content.strip().strip('"').strip("'")
|
||||
# Limit to max characters
|
||||
return title[: config.max_chars] if len(title) > config.max_chars else title
|
||||
|
||||
@@ -92,6 +92,36 @@ class TestTitleMiddlewareCoreLogic:
|
||||
assert "'" not in title
|
||||
assert len(title) == 12
|
||||
|
||||
def test_generate_title_normalizes_structured_message_and_response_content(self, monkeypatch):
|
||||
_set_test_title_config(max_chars=20)
|
||||
middleware = TitleMiddleware()
|
||||
fake_model = MagicMock()
|
||||
fake_model.ainvoke = AsyncMock(
|
||||
return_value=MagicMock(content=[{"type": "text", "text": '"结构总结"'}]),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"deerflow.agents.middlewares.title_middleware.create_chat_model",
|
||||
lambda **kwargs: fake_model,
|
||||
)
|
||||
|
||||
state = {
|
||||
"messages": [
|
||||
HumanMessage(content=[{"type": "text", "text": "请帮我总结这段代码"}]),
|
||||
AIMessage(content=[{"type": "text", "text": "好的,先看结构"}]),
|
||||
]
|
||||
}
|
||||
|
||||
title = asyncio.run(middleware._generate_title(state))
|
||||
|
||||
prompt = fake_model.ainvoke.await_args.args[0]
|
||||
assert "请帮我总结这段代码" in prompt
|
||||
assert "好的,先看结构" in prompt
|
||||
# Ensure structured message dict/JSON reprs are not leaking into the prompt.
|
||||
assert "{'type':" not in prompt
|
||||
assert "'type':" not in prompt
|
||||
assert '"type":' not in prompt
|
||||
assert title == "结构总结"
|
||||
|
||||
def test_generate_title_fallback_when_model_fails(self, monkeypatch):
|
||||
_set_test_title_config(max_chars=20)
|
||||
middleware = TitleMiddleware()
|
||||
|
||||
Reference in New Issue
Block a user