From ca4ada5aa774fdecbe674791a4b8dd6a6d0fc30b Mon Sep 17 00:00:00 2001
From: Willem Jiang <willem.jiang@gmail.com>
Date: Thu, 27 Nov 2025 21:47:08 +0800
Subject: [PATCH] fix: multiple web_search ToolMessages only showing last
 result (#717)

* fix: Missing Required Fields in Plan Validation

* fix: the exception of plan validation

* Fixed the test errors

* Addressed the comments of the PR reviews

* fix: multiple web_search ToolMessages only showing last result
---
 src/graph/nodes.py              |  23 ++-
 tests/integration/test_nodes.py | 258 ++++++++++++++++++++++++++++++++
 2 files changed, 275 insertions(+), 6 deletions(-)

diff --git a/src/graph/nodes.py b/src/graph/nodes.py
index a507333..406bdb0 100644
--- a/src/graph/nodes.py
+++ b/src/graph/nodes.py
@@ -1062,14 +1062,25 @@ async def _execute_agent_step(
     current_step.execution_res = response_content
     logger.info(f"Step '{current_step.title}' execution completed by {agent_name}")
 
+    # Include all messages from agent result to preserve intermediate tool calls/results
+    # This ensures multiple web_search calls all appear in the stream, not just the final result
+    agent_messages = result.get("messages", [])
+    logger.debug(
+        f"{agent_name.capitalize()} returned {len(agent_messages)} messages. "
+        f"Message types: {[type(msg).__name__ for msg in agent_messages]}"
+    )
+    
+    # Count tool messages for logging
+    tool_message_count = sum(1 for msg in agent_messages if isinstance(msg, ToolMessage))
+    if tool_message_count > 0:
+        logger.info(
+            f"{agent_name.capitalize()} agent made {tool_message_count} tool calls. "
+            f"All tool results will be preserved and streamed to frontend."
+        )
+
     return Command(
         update={
-            "messages": [
-                HumanMessage(
-                    content=response_content,
-                    name=agent_name,
-                )
-            ],
+            "messages": agent_messages,
             "observations": observations + [response_content + validation_info],
             **preserve_state_meta_fields(state),
         },
diff --git a/tests/integration/test_nodes.py b/tests/integration/test_nodes.py
index a99ec09..07c5a3f 100644
--- a/tests/integration/test_nodes.py
+++ b/tests/integration/test_nodes.py
@@ -2324,3 +2324,261 @@ def test_clarification_skips_specific_topics():
         result.update["research_topic"]
         == "Research Plan for Improving Efficiency of AI e-commerce Video Synthesis Technology Based on Transformer Model"
     )
+
+
+# ============================================================================
+# Issue #693 Tests: Multiple web_search ToolMessages Preservation
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_execute_agent_step_preserves_multiple_tool_messages():
+    """
+    Test for Issue #693: Verify that all ToolMessages from multiple tool calls
+    (e.g., multiple web_search calls) are preserved and not just the final result.
+    
+    This test ensures that when an agent makes multiple web_search calls, each
+    ToolMessage is preserved in the Command update, allowing the frontend to
+    receive and display all search results.
+    """
+    from langchain_core.messages import AIMessage, ToolMessage
+    
+    # Create test state with a plan and an unexecuted step
+    class TestStep:
+        def __init__(self, title, description, execution_res=None):
+            self.title = title
+            self.description = description
+            self.execution_res = execution_res
+    
+    Plan = MagicMock()
+    Plan.title = "Test Research Plan"
+    Plan.steps = [
+        TestStep(title="Test Step", description="Test Description", execution_res=None)
+    ]
+    
+    state = {
+        "current_plan": Plan,
+        "observations": [],
+        "locale": "en-US",
+        "resources": [],
+    }
+    
+    # Create a mock agent that simulates multiple web_search tool calls
+    # This mimics what a ReAct agent does internally
+    agent = MagicMock()
+    
+    async def mock_ainvoke(input, config):
+        # Simulate the agent making 2 web_search calls with this message sequence:
+        # 1. AIMessage with first tool call
+        # 2. ToolMessage with first tool result
+        # 3. AIMessage with second tool call
+        # 4. ToolMessage with second tool result
+        # 5. Final AIMessage with the complete response
+        
+        messages = [
+            AIMessage(
+                content="I'll search for information about this topic.",
+                tool_calls=[{
+                    "id": "call_1",
+                    "name": "web_search",
+                    "args": {"query": "first search query"}
+                }]
+            ),
+            ToolMessage(
+                content="First search result content here",
+                tool_call_id="call_1",
+                name="web_search",
+            ),
+            AIMessage(
+                content="Let me search for more specific information.",
+                tool_calls=[{
+                    "id": "call_2",
+                    "name": "web_search",
+                    "args": {"query": "second search query"}
+                }]
+            ),
+            ToolMessage(
+                content="Second search result content here",
+                tool_call_id="call_2",
+                name="web_search",
+            ),
+            AIMessage(
+                content="Based on my research, here is the comprehensive answer..."
+            ),
+        ]
+        return {"messages": messages}
+    
+    agent.ainvoke = mock_ainvoke
+    
+    # Execute the agent step
+    with patch(
+        "src.graph.nodes.HumanMessage",
+        side_effect=lambda content, name=None: MagicMock(content=content, name=name),
+    ):
+        result = await _execute_agent_step(state, agent, "researcher")
+    
+    # Verify the result is a Command with correct goto
+    assert isinstance(result, Command)
+    assert result.goto == "research_team"
+    
+    # Verify that ALL messages are preserved in the Command update
+    # (not just the final message content)
+    messages_in_update = result.update.get("messages", [])
+    
+    # Should have 5 messages: 2 AIMessages + 2 ToolMessages + 1 final AIMessage
+    assert len(messages_in_update) == 5, (
+        f"Expected 5 messages to be preserved, but got {len(messages_in_update)}. "
+        f"This indicates that intermediate ToolMessages are being dropped, "
+        f"which is the bug from Issue #693."
+    )
+    
+    # Verify message types
+    message_types = [type(msg).__name__ for msg in messages_in_update]
+    assert message_types.count("AIMessage") == 3, "Should have 3 AIMessages"
+    assert message_types.count("ToolMessage") == 2, "Should have 2 ToolMessages"
+    
+    # Verify that we have both ToolMessages with their content
+    tool_messages = [msg for msg in messages_in_update if isinstance(msg, ToolMessage)]
+    assert len(tool_messages) == 2, "Should preserve both tool calls"
+    assert "First search result content here" in tool_messages[0].content
+    assert "Second search result content here" in tool_messages[1].content
+    
+    # Verify that observations still contain the final response
+    assert "observations" in result.update
+    observations = result.update["observations"]
+    assert len(observations) > 0
+    assert "Based on my research" in observations[-1]
+    
+    # Verify step execution result is set to final message
+    assert state["current_plan"].steps[0].execution_res == "Based on my research, here is the comprehensive answer..."
+
+
+@pytest.mark.asyncio
+async def test_execute_agent_step_single_tool_call_still_works():
+    """
+    Test that the fix for Issue #693 doesn't break the case where
+    an agent makes only a single tool call.
+    """
+    from langchain_core.messages import AIMessage, ToolMessage
+    
+    class TestStep:
+        def __init__(self, title, description, execution_res=None):
+            self.title = title
+            self.description = description
+            self.execution_res = execution_res
+    
+    Plan = MagicMock()
+    Plan.title = "Test Research Plan"
+    Plan.steps = [
+        TestStep(title="Test Step", description="Test Description", execution_res=None)
+    ]
+    
+    state = {
+        "current_plan": Plan,
+        "observations": [],
+        "locale": "en-US",
+        "resources": [],
+    }
+    
+    agent = MagicMock()
+    
+    async def mock_ainvoke(input, config):
+        # Simulate a single web_search call
+        messages = [
+            AIMessage(
+                content="I'll search for information.",
+                tool_calls=[{
+                    "id": "call_1",
+                    "name": "web_search",
+                    "args": {"query": "search query"}
+                }]
+            ),
+            ToolMessage(
+                content="Search result content",
+                tool_call_id="call_1",
+                name="web_search",
+            ),
+            AIMessage(
+                content="Here is the answer based on the search result."
+            ),
+        ]
+        return {"messages": messages}
+    
+    agent.ainvoke = mock_ainvoke
+    
+    with patch(
+        "src.graph.nodes.HumanMessage",
+        side_effect=lambda content, name=None: MagicMock(content=content, name=name),
+    ):
+        result = await _execute_agent_step(state, agent, "researcher")
+    
+    # Verify result structure
+    assert isinstance(result, Command)
+    assert result.goto == "research_team"
+    
+    # Verify all 3 messages are preserved
+    messages_in_update = result.update.get("messages", [])
+    assert len(messages_in_update) == 3
+    
+    # Verify the single tool message is present
+    tool_messages = [msg for msg in messages_in_update if isinstance(msg, ToolMessage)]
+    assert len(tool_messages) == 1
+    assert "Search result content" in tool_messages[0].content
+
+
+@pytest.mark.asyncio
+async def test_execute_agent_step_no_tool_calls_still_works():
+    """
+    Test that the fix for Issue #693 doesn't break the case where
+    an agent completes without making any tool calls.
+    """
+    from langchain_core.messages import AIMessage
+    
+    class TestStep:
+        def __init__(self, title, description, execution_res=None):
+            self.title = title
+            self.description = description
+            self.execution_res = execution_res
+    
+    Plan = MagicMock()
+    Plan.title = "Test Research Plan"
+    Plan.steps = [
+        TestStep(title="Test Step", description="Test Description", execution_res=None)
+    ]
+    
+    state = {
+        "current_plan": Plan,
+        "observations": [],
+        "locale": "en-US",
+        "resources": [],
+    }
+    
+    agent = MagicMock()
+    
+    async def mock_ainvoke(input, config):
+        # Agent responds without making any tool calls
+        messages = [
+            AIMessage(
+                content="Based on my knowledge, here is the answer without needing to search."
+            ),
+        ]
+        return {"messages": messages}
+    
+    agent.ainvoke = mock_ainvoke
+    
+    with patch(
+        "src.graph.nodes.HumanMessage",
+        side_effect=lambda content, name=None: MagicMock(content=content, name=name),
+    ):
+        result = await _execute_agent_step(state, agent, "researcher")
+    
+    # Verify result structure
+    assert isinstance(result, Command)
+    assert result.goto == "research_team"
+    
+    # Verify the single message is preserved
+    messages_in_update = result.update.get("messages", [])
+    assert len(messages_in_update) == 1
+    
+    # Verify step execution result is set
+    assert state["current_plan"].steps[0].execution_res == "Based on my knowledge, here is the answer without needing to search."