From 423f5c829c464b135cf418e525000feedb80c35b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=A4=A7=E7=8C=AB=E5=AD=90?= <1811866786@qq.com>
Date: Mon, 16 Feb 2026 09:38:17 +0800
Subject: [PATCH] fix: strip <think> tags from reporter output to prevent
 thinking text leakage (#781) (#862)

* fix: strip <think> tags from LLM output to prevent thinking text leakage (#781)

Some models (e.g. DeepSeek-R1, QwQ via ollama) embed reasoning in
content using <think>...</think> tags instead of the separate
reasoning_content field. This causes thinking text to leak into
both streamed messages and the final report.

Fix at two layers:
- server/app.py: strip <think> tags in _create_event_stream_message
  so ALL streamed content is filtered (coordinator, planner, etc.)
- graph/nodes.py: strip <think> tags in reporter_node before storing
  final_report (which is not streamed through the event layer)

The regex uses a fast-path check ("<think>" in content) to avoid
unnecessary regex calls on normal content.

* refactor: add defensive check for think tag stripping and add reporter_node tests (#781)

- Add isinstance and fast-path check in reporter_node before regex, consistent with app.py
- Add TestReporterNodeThinkTagStripping with 5 test cases covering various scenarios

* chore: re-trigger review
---
 src/graph/nodes.py              |   7 ++
 src/server/app.py               |   6 ++
 tests/integration/test_nodes.py | 112 ++++++++++++++++++++++++++++++++
 tests/unit/server/test_app.py   |  51 +++++++++++++++
 4 files changed, 176 insertions(+)
diff --git a/src/graph/nodes.py b/src/graph/nodes.py
index 51a83c5..9f207c0 100644
--- a/src/graph/nodes.py
+++ b/src/graph/nodes.py
@@ -4,6 +4,7 @@
 import json
 import logging
 import os
+import re
 from functools import partial
 from typing import Annotated, Any, Literal
 
@@ -900,6 +901,12 @@ def reporter_node(state: State, config: RunnableConfig):
     logger.debug(f"Current invoke messages: {invoke_messages}")
     response = get_llm_by_type(AGENT_LLM_MAP["reporter"]).invoke(invoke_messages)
     response_content = response.content
+    # Strip <think>...</think> tags that some models (e.g. QwQ, DeepSeek) embed
+    # directly in content instead of using the reasoning_content field (#781)
+    if isinstance(response_content, str) and "<think>" in response_content:
+        response_content = re.sub(
+            r"<think>[\s\S]*?</think>", "", response_content
+        ).strip()
     logger.info(f"reporter response: {response_content}")
 
     return {
diff --git a/src/server/app.py b/src/server/app.py
index 97f2413..94ff3c0 100644
--- a/src/server/app.py
+++ b/src/server/app.py
@@ -6,6 +6,7 @@ import base64
 import json
 import logging
 import os
+import re
 from typing import Annotated, Any, List, Optional, cast
 from uuid import uuid4
 
@@ -423,6 +424,11 @@ def _create_event_stream_message(
     if not isinstance(content, str):
         content = json.dumps(content, ensure_ascii=False)
 
+    # Strip <think>...</think> tags that some models (e.g. DeepSeek-R1, QwQ via ollama)
+    # embed directly in content instead of using the reasoning_content field (#781)
+    if isinstance(content, str) and "<think>" in content:
+        content = re.sub(r"<think>[\s\S]*?</think>", "", content).strip()
+
     event_stream_message = {
         "thread_id": thread_id,
         "agent": agent_name,
diff --git a/tests/integration/test_nodes.py b/tests/integration/test_nodes.py
index 8445bfa..ce7b9c6 100644
--- a/tests/integration/test_nodes.py
+++ b/tests/integration/test_nodes.py
@@ -2823,3 +2823,115 @@ async def test_execute_agent_step_no_tool_calls_still_works():
     
     # Verify step execution result is set
     assert state["current_plan"].steps[0].execution_res == "Based on my knowledge, here is the answer without needing to search."
+
+
+class TestReporterNodeThinkTagStripping:
+    """Tests for stripping <think> tags from reporter_node output (#781).
+
+    Some models (e.g. DeepSeek-R1, QwQ via ollama) embed reasoning in
+    content using <think>...</think> tags instead of the separate
+    reasoning_content field.
+    """
+
+    def _make_mock_state(self):
+        plan = MagicMock()
+        plan.title = "Test Plan"
+        plan.thought = "Test Thought"
+        return {
+            "current_plan": plan,
+            "observations": [],
+            "citations": [],
+            "locale": "en-US",
+        }
+
+    def _run_reporter_node(self, response_content):
+        state = self._make_mock_state()
+        mock_response = MagicMock()
+        mock_response.content = response_content
+
+        mock_configurable = MagicMock()
+
+        with (
+            patch(
+                "src.graph.nodes.Configuration.from_runnable_config",
+                return_value=mock_configurable,
+            ),
+            patch(
+                "src.graph.nodes.apply_prompt_template",
+                return_value=[{"role": "user", "content": "test"}],
+            ),
+            patch("src.graph.nodes.get_llm_by_type") as mock_get_llm,
+            patch("src.graph.nodes.get_llm_token_limit_by_type", return_value=4096),
+            patch("src.graph.nodes.AGENT_LLM_MAP", {"reporter": "basic"}),
+            patch(
+                "src.graph.nodes.ContextManager"
+            ) as mock_ctx_mgr,
+        ):
+            mock_ctx_mgr.return_value.compress_messages.return_value = {"messages": []}
+            mock_llm = MagicMock()
+            mock_llm.invoke.return_value = mock_response
+            mock_get_llm.return_value = mock_llm
+
+            result = reporter_node(state, MagicMock())
+        return result
+
+    def test_strips_think_tag_at_beginning(self):
+        result = self._run_reporter_node(
+            "<think>\nLet me analyze...\n</think>\n\n# Report\n\nContent here."
+        )
+        assert "<think>" not in result["final_report"]
+        assert "# Report" in result["final_report"]
+        assert "Content here." in result["final_report"]
+
+    def test_strips_multiple_think_blocks(self):
+        result = self._run_reporter_node(
+            "<think>First thought</think>\nParagraph 1.\n<think>Second thought</think>\nParagraph 2."
+        )
+        assert "<think>" not in result["final_report"]
+        assert "Paragraph 1." in result["final_report"]
+        assert "Paragraph 2." in result["final_report"]
+
+    def test_preserves_content_without_think_tags(self):
+        result = self._run_reporter_node("Normal content without think tags.")
+        assert result["final_report"] == "Normal content without think tags."
+
+    def test_empty_content_after_stripping(self):
+        result = self._run_reporter_node(
+            "<think>Only thinking, no real content</think>"
+        )
+        assert "<think>" not in result["final_report"]
+
+    def test_non_string_content_passes_through(self):
+        """Verify non-string content is not broken by the stripping logic."""
+        state = self._make_mock_state()
+        mock_response = MagicMock()
+        # Simulate non-string content (e.g. list from multimodal model)
+        mock_response.content = ["some", "list"]
+
+        mock_configurable = MagicMock()
+
+        with (
+            patch(
+                "src.graph.nodes.Configuration.from_runnable_config",
+                return_value=mock_configurable,
+            ),
+            patch(
+                "src.graph.nodes.apply_prompt_template",
+                return_value=[{"role": "user", "content": "test"}],
+            ),
+            patch("src.graph.nodes.get_llm_by_type") as mock_get_llm,
+            patch("src.graph.nodes.get_llm_token_limit_by_type", return_value=4096),
+            patch("src.graph.nodes.AGENT_LLM_MAP", {"reporter": "basic"}),
+            patch(
+                "src.graph.nodes.ContextManager"
+            ) as mock_ctx_mgr,
+        ):
+            mock_ctx_mgr.return_value.compress_messages.return_value = {"messages": []}
+            mock_llm = MagicMock()
+            mock_llm.invoke.return_value = mock_response
+            mock_get_llm.return_value = mock_llm
+
+            result = reporter_node(state, MagicMock())
+
+        # Non-string content should pass through unchanged
+        assert result["final_report"] == ["some", "list"]
diff --git a/tests/unit/server/test_app.py b/tests/unit/server/test_app.py
index 5ed71f7..d57d018 100644
--- a/tests/unit/server/test_app.py
+++ b/tests/unit/server/test_app.py
@@ -16,6 +16,7 @@ from langgraph.types import Command
 from src.config.report_style import ReportStyle
 from src.server.app import (
     _astream_workflow_generator,
+    _create_event_stream_message,
     _create_interrupt_event,
     _make_event,
     _stream_graph_events,
@@ -1680,3 +1681,53 @@ class TestGlobalConnectionPoolUsage:
         """Helper to create an empty async generator."""
         if False:
             yield
+
+
+class TestCreateEventStreamMessageThinkTagStripping:
+    """Tests for stripping <think> tags from streamed content (#781).
+
+    Some models (e.g. DeepSeek-R1, QwQ via ollama) embed reasoning in
+    content using <think>...</think> tags instead of the separate
+    reasoning_content field.
+    """
+
+    def _make_mock_chunk(self, content):
+        chunk = AIMessageChunk(content=content)
+        chunk.id = "msg_test"
+        chunk.response_metadata = {}
+        return chunk
+
+    def test_strips_think_tag_at_beginning(self):
+        chunk = self._make_mock_chunk(
+            "<think>\nLet me analyze...\n</think>\n\n# Report\n\nContent here."
+        )
+        result = _create_event_stream_message(chunk, {}, "thread-1", "reporter")
+        assert "<think>" not in result["content"]
+        assert "# Report" in result["content"]
+        assert "Content here." in result["content"]
+
+    def test_strips_multiple_think_blocks(self):
+        chunk = self._make_mock_chunk(
+            "<think>First thought</think>\nParagraph 1.\n<think>Second thought</think>\nParagraph 2."
+        )
+        result = _create_event_stream_message(chunk, {}, "thread-1", "coordinator")
+        assert "<think>" not in result["content"]
+        assert "Paragraph 1." in result["content"]
+        assert "Paragraph 2." in result["content"]
+
+    def test_preserves_content_without_think_tags(self):
+        chunk = self._make_mock_chunk("Normal content without think tags.")
+        result = _create_event_stream_message(chunk, {}, "thread-1", "planner")
+        assert result["content"] == "Normal content without think tags."
+
+    def test_empty_content_after_stripping(self):
+        chunk = self._make_mock_chunk("<think>Only thinking, no real content</think>")
+        result = _create_event_stream_message(chunk, {}, "thread-1", "reporter")
+        assert "<think>" not in result["content"]
+
+    def test_preserves_reasoning_content_field(self):
+        chunk = self._make_mock_chunk("Actual content")
+        chunk.additional_kwargs["reasoning_content"] = "This is reasoning"
+        result = _create_event_stream_message(chunk, {}, "thread-1", "planner")
+        assert result["content"] == "Actual content"
+        assert result["reasoning_content"] == "This is reasoning"