diff --git a/backend/CLAUDE.md b/backend/CLAUDE.md index fb38d44..6c57461 100644 --- a/backend/CLAUDE.md +++ b/backend/CLAUDE.md @@ -158,7 +158,7 @@ Middlewares execute in strict order in `packages/harness/deerflow/agents/lead_ag 4. **DanglingToolCallMiddleware** - Injects placeholder ToolMessages for AIMessage tool_calls that lack responses (e.g., due to user interruption) 5. **SummarizationMiddleware** - Context reduction when approaching token limits (optional, if enabled) 6. **TodoListMiddleware** - Task tracking with `write_todos` tool (optional, if plan_mode) -7. **TitleMiddleware** - Auto-generates thread title after first complete exchange +7. **TitleMiddleware** - Auto-generates thread title after first complete exchange and normalizes structured message content before prompting the title model 8. **MemoryMiddleware** - Queues conversations for async memory update (filters to user + final AI responses) 9. **ViewImageMiddleware** - Injects base64 image data before LLM call (conditional on vision support) 10. **SubagentLimitMiddleware** - Truncates excess `task` tool calls from model response to enforce `MAX_CONCURRENT_SUBAGENTS` limit (optional, if subagent_enabled) diff --git a/backend/docs/AUTO_TITLE_GENERATION.md b/backend/docs/AUTO_TITLE_GENERATION.md index 024d4b4..1182a85 100644 --- a/backend/docs/AUTO_TITLE_GENERATION.md +++ b/backend/docs/AUTO_TITLE_GENERATION.md @@ -6,12 +6,14 @@ ## 实现方式 -使用 `TitleMiddleware` 在 `after_agent` 钩子中: +使用 `TitleMiddleware` 在 `after_model` 钩子中: 1. 检测是否是首次对话(1个用户消息 + 1个助手回复) 2. 检查 state 是否已有 title 3. 调用 LLM 生成简洁的标题(默认最多6个词) 4. 将 title 存储到 `ThreadState` 中(会被 checkpointer 持久化) +TitleMiddleware 会先把 LangChain message content 里的结构化 block/list 内容归一化为纯文本,再拼到 title prompt 里,避免把 Python/JSON 的原始 repr 泄漏到标题生成模型。 + ## ⚠️ 重要:存储机制 ### Title 存储位置 diff --git a/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py index 3c74068..8e33743 100644 --- a/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py +++ b/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py @@ -21,6 +21,25 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]): state_schema = TitleMiddlewareState + def _normalize_content(self, content: object) -> str: + if isinstance(content, str): + return content + + if isinstance(content, list): + parts = [self._normalize_content(item) for item in content] + return "\n".join(part for part in parts if part) + + if isinstance(content, dict): + text_value = content.get("text") + if isinstance(text_value, str): + return text_value + + nested_content = content.get("content") + if nested_content is not None: + return self._normalize_content(nested_content) + + return "" + def _should_generate_title(self, state: TitleMiddlewareState) -> bool: """Check if we should generate a title for this thread.""" config = get_title_config() @@ -52,9 +71,8 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]): user_msg_content = next((m.content for m in messages if m.type == "human"), "") assistant_msg_content = next((m.content for m in messages if m.type == "ai"), "") - # Ensure content is string (LangChain messages can have list content) - user_msg = str(user_msg_content) if user_msg_content else "" - assistant_msg = str(assistant_msg_content) if assistant_msg_content else "" + user_msg = self._normalize_content(user_msg_content) + assistant_msg = self._normalize_content(assistant_msg_content) # Use a lightweight model to generate title model = create_chat_model(thinking_enabled=False) @@ -67,8 +85,7 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]): try: response = await model.ainvoke(prompt) - # Ensure response content is string - title_content = str(response.content) if response.content else "" + title_content = self._normalize_content(response.content) title = title_content.strip().strip('"').strip("'") # Limit to max characters return title[: config.max_chars] if len(title) > config.max_chars else title diff --git a/backend/tests/test_title_middleware_core_logic.py b/backend/tests/test_title_middleware_core_logic.py index 8a60fd3..906e68c 100644 --- a/backend/tests/test_title_middleware_core_logic.py +++ b/backend/tests/test_title_middleware_core_logic.py @@ -92,6 +92,36 @@ class TestTitleMiddlewareCoreLogic: assert "'" not in title assert len(title) == 12 + def test_generate_title_normalizes_structured_message_and_response_content(self, monkeypatch): + _set_test_title_config(max_chars=20) + middleware = TitleMiddleware() + fake_model = MagicMock() + fake_model.ainvoke = AsyncMock( + return_value=MagicMock(content=[{"type": "text", "text": '"结构总结"'}]), + ) + monkeypatch.setattr( + "deerflow.agents.middlewares.title_middleware.create_chat_model", + lambda **kwargs: fake_model, + ) + + state = { + "messages": [ + HumanMessage(content=[{"type": "text", "text": "请帮我总结这段代码"}]), + AIMessage(content=[{"type": "text", "text": "好的,先看结构"}]), + ] + } + + title = asyncio.run(middleware._generate_title(state)) + + prompt = fake_model.ainvoke.await_args.args[0] + assert "请帮我总结这段代码" in prompt + assert "好的,先看结构" in prompt + # Ensure structured message dict/JSON reprs are not leaking into the prompt. + assert "{'type':" not in prompt + assert "'type':" not in prompt + assert '"type":' not in prompt + assert title == "结构总结" + def test_generate_title_fallback_when_model_fails(self, monkeypatch): _set_test_title_config(max_chars=20) middleware = TitleMiddleware()