From ca4ada5aa774fdecbe674791a4b8dd6a6d0fc30b Mon Sep 17 00:00:00 2001 From: Willem Jiang Date: Thu, 27 Nov 2025 21:47:08 +0800 Subject: [PATCH] fix: multiple web_search ToolMessages only showing last result (#717) * fix: Missing Required Fields in Plan Validation * fix: the exception of plan validation * Fixed the test errors * Addressed the comments of the PR reviews * fix: multiple web_search ToolMessages only showing last result --- src/graph/nodes.py | 23 ++- tests/integration/test_nodes.py | 258 ++++++++++++++++++++++++++++++++ 2 files changed, 275 insertions(+), 6 deletions(-) diff --git a/src/graph/nodes.py b/src/graph/nodes.py index a507333..406bdb0 100644 --- a/src/graph/nodes.py +++ b/src/graph/nodes.py @@ -1062,14 +1062,25 @@ async def _execute_agent_step( current_step.execution_res = response_content logger.info(f"Step '{current_step.title}' execution completed by {agent_name}") + # Include all messages from agent result to preserve intermediate tool calls/results + # This ensures multiple web_search calls all appear in the stream, not just the final result + agent_messages = result.get("messages", []) + logger.debug( + f"{agent_name.capitalize()} returned {len(agent_messages)} messages. " + f"Message types: {[type(msg).__name__ for msg in agent_messages]}" + ) + + # Count tool messages for logging + tool_message_count = sum(1 for msg in agent_messages if isinstance(msg, ToolMessage)) + if tool_message_count > 0: + logger.info( + f"{agent_name.capitalize()} agent made {tool_message_count} tool calls. " + f"All tool results will be preserved and streamed to frontend." + ) + return Command( update={ - "messages": [ - HumanMessage( - content=response_content, - name=agent_name, - ) - ], + "messages": agent_messages, "observations": observations + [response_content + validation_info], **preserve_state_meta_fields(state), }, diff --git a/tests/integration/test_nodes.py b/tests/integration/test_nodes.py index a99ec09..07c5a3f 100644 --- a/tests/integration/test_nodes.py +++ b/tests/integration/test_nodes.py @@ -2324,3 +2324,261 @@ def test_clarification_skips_specific_topics(): result.update["research_topic"] == "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model" ) + + +# ============================================================================ +# Issue #693 Tests: Multiple web_search ToolMessages Preservation +# ============================================================================ + + +@pytest.mark.asyncio +async def test_execute_agent_step_preserves_multiple_tool_messages(): + """ + Test for Issue #693: Verify that all ToolMessages from multiple tool calls + (e.g., multiple web_search calls) are preserved and not just the final result. + + This test ensures that when an agent makes multiple web_search calls, each + ToolMessage is preserved in the Command update, allowing the frontend to + receive and display all search results. + """ + from langchain_core.messages import AIMessage, ToolMessage + + # Create test state with a plan and an unexecuted step + class TestStep: + def __init__(self, title, description, execution_res=None): + self.title = title + self.description = description + self.execution_res = execution_res + + Plan = MagicMock() + Plan.title = "Test Research Plan" + Plan.steps = [ + TestStep(title="Test Step", description="Test Description", execution_res=None) + ] + + state = { + "current_plan": Plan, + "observations": [], + "locale": "en-US", + "resources": [], + } + + # Create a mock agent that simulates multiple web_search tool calls + # This mimics what a ReAct agent does internally + agent = MagicMock() + + async def mock_ainvoke(input, config): + # Simulate the agent making 2 web_search calls with this message sequence: + # 1. AIMessage with first tool call + # 2. ToolMessage with first tool result + # 3. AIMessage with second tool call + # 4. ToolMessage with second tool result + # 5. Final AIMessage with the complete response + + messages = [ + AIMessage( + content="I'll search for information about this topic.", + tool_calls=[{ + "id": "call_1", + "name": "web_search", + "args": {"query": "first search query"} + }] + ), + ToolMessage( + content="First search result content here", + tool_call_id="call_1", + name="web_search", + ), + AIMessage( + content="Let me search for more specific information.", + tool_calls=[{ + "id": "call_2", + "name": "web_search", + "args": {"query": "second search query"} + }] + ), + ToolMessage( + content="Second search result content here", + tool_call_id="call_2", + name="web_search", + ), + AIMessage( + content="Based on my research, here is the comprehensive answer..." + ), + ] + return {"messages": messages} + + agent.ainvoke = mock_ainvoke + + # Execute the agent step + with patch( + "src.graph.nodes.HumanMessage", + side_effect=lambda content, name=None: MagicMock(content=content, name=name), + ): + result = await _execute_agent_step(state, agent, "researcher") + + # Verify the result is a Command with correct goto + assert isinstance(result, Command) + assert result.goto == "research_team" + + # Verify that ALL messages are preserved in the Command update + # (not just the final message content) + messages_in_update = result.update.get("messages", []) + + # Should have 5 messages: 2 AIMessages + 2 ToolMessages + 1 final AIMessage + assert len(messages_in_update) == 5, ( + f"Expected 5 messages to be preserved, but got {len(messages_in_update)}. " + f"This indicates that intermediate ToolMessages are being dropped, " + f"which is the bug from Issue #693." + ) + + # Verify message types + message_types = [type(msg).__name__ for msg in messages_in_update] + assert message_types.count("AIMessage") == 3, "Should have 3 AIMessages" + assert message_types.count("ToolMessage") == 2, "Should have 2 ToolMessages" + + # Verify that we have both ToolMessages with their content + tool_messages = [msg for msg in messages_in_update if isinstance(msg, ToolMessage)] + assert len(tool_messages) == 2, "Should preserve both tool calls" + assert "First search result content here" in tool_messages[0].content + assert "Second search result content here" in tool_messages[1].content + + # Verify that observations still contain the final response + assert "observations" in result.update + observations = result.update["observations"] + assert len(observations) > 0 + assert "Based on my research" in observations[-1] + + # Verify step execution result is set to final message + assert state["current_plan"].steps[0].execution_res == "Based on my research, here is the comprehensive answer..." + + +@pytest.mark.asyncio +async def test_execute_agent_step_single_tool_call_still_works(): + """ + Test that the fix for Issue #693 doesn't break the case where + an agent makes only a single tool call. + """ + from langchain_core.messages import AIMessage, ToolMessage + + class TestStep: + def __init__(self, title, description, execution_res=None): + self.title = title + self.description = description + self.execution_res = execution_res + + Plan = MagicMock() + Plan.title = "Test Research Plan" + Plan.steps = [ + TestStep(title="Test Step", description="Test Description", execution_res=None) + ] + + state = { + "current_plan": Plan, + "observations": [], + "locale": "en-US", + "resources": [], + } + + agent = MagicMock() + + async def mock_ainvoke(input, config): + # Simulate a single web_search call + messages = [ + AIMessage( + content="I'll search for information.", + tool_calls=[{ + "id": "call_1", + "name": "web_search", + "args": {"query": "search query"} + }] + ), + ToolMessage( + content="Search result content", + tool_call_id="call_1", + name="web_search", + ), + AIMessage( + content="Here is the answer based on the search result." + ), + ] + return {"messages": messages} + + agent.ainvoke = mock_ainvoke + + with patch( + "src.graph.nodes.HumanMessage", + side_effect=lambda content, name=None: MagicMock(content=content, name=name), + ): + result = await _execute_agent_step(state, agent, "researcher") + + # Verify result structure + assert isinstance(result, Command) + assert result.goto == "research_team" + + # Verify all 3 messages are preserved + messages_in_update = result.update.get("messages", []) + assert len(messages_in_update) == 3 + + # Verify the single tool message is present + tool_messages = [msg for msg in messages_in_update if isinstance(msg, ToolMessage)] + assert len(tool_messages) == 1 + assert "Search result content" in tool_messages[0].content + + +@pytest.mark.asyncio +async def test_execute_agent_step_no_tool_calls_still_works(): + """ + Test that the fix for Issue #693 doesn't break the case where + an agent completes without making any tool calls. + """ + from langchain_core.messages import AIMessage + + class TestStep: + def __init__(self, title, description, execution_res=None): + self.title = title + self.description = description + self.execution_res = execution_res + + Plan = MagicMock() + Plan.title = "Test Research Plan" + Plan.steps = [ + TestStep(title="Test Step", description="Test Description", execution_res=None) + ] + + state = { + "current_plan": Plan, + "observations": [], + "locale": "en-US", + "resources": [], + } + + agent = MagicMock() + + async def mock_ainvoke(input, config): + # Agent responds without making any tool calls + messages = [ + AIMessage( + content="Based on my knowledge, here is the answer without needing to search." + ), + ] + return {"messages": messages} + + agent.ainvoke = mock_ainvoke + + with patch( + "src.graph.nodes.HumanMessage", + side_effect=lambda content, name=None: MagicMock(content=content, name=name), + ): + result = await _execute_agent_step(state, agent, "researcher") + + # Verify result structure + assert isinstance(result, Command) + assert result.goto == "research_team" + + # Verify the single message is preserved + messages_in_update = result.update.get("messages", []) + assert len(messages_in_update) == 1 + + # Verify step execution result is set + assert state["current_plan"].steps[0].execution_res == "Based on my knowledge, here is the answer without needing to search."