diff --git a/src/graph/nodes.py b/src/graph/nodes.py
index 51a83c5..9f207c0 100644
--- a/src/graph/nodes.py
+++ b/src/graph/nodes.py
@@ -4,6 +4,7 @@
import json
import logging
import os
+import re
from functools import partial
from typing import Annotated, Any, Literal
@@ -900,6 +901,12 @@ def reporter_node(state: State, config: RunnableConfig):
logger.debug(f"Current invoke messages: {invoke_messages}")
response = get_llm_by_type(AGENT_LLM_MAP["reporter"]).invoke(invoke_messages)
response_content = response.content
+ # Strip ... tags that some models (e.g. QwQ, DeepSeek) embed
+ # directly in content instead of using the reasoning_content field (#781)
+ if isinstance(response_content, str) and "" in response_content:
+ response_content = re.sub(
+ r"[\s\S]*?", "", response_content
+ ).strip()
logger.info(f"reporter response: {response_content}")
return {
diff --git a/src/server/app.py b/src/server/app.py
index 97f2413..94ff3c0 100644
--- a/src/server/app.py
+++ b/src/server/app.py
@@ -6,6 +6,7 @@ import base64
import json
import logging
import os
+import re
from typing import Annotated, Any, List, Optional, cast
from uuid import uuid4
@@ -423,6 +424,11 @@ def _create_event_stream_message(
if not isinstance(content, str):
content = json.dumps(content, ensure_ascii=False)
+ # Strip ... tags that some models (e.g. DeepSeek-R1, QwQ via ollama)
+ # embed directly in content instead of using the reasoning_content field (#781)
+ if isinstance(content, str) and "" in content:
+ content = re.sub(r"[\s\S]*?", "", content).strip()
+
event_stream_message = {
"thread_id": thread_id,
"agent": agent_name,
diff --git a/tests/integration/test_nodes.py b/tests/integration/test_nodes.py
index 8445bfa..ce7b9c6 100644
--- a/tests/integration/test_nodes.py
+++ b/tests/integration/test_nodes.py
@@ -2823,3 +2823,115 @@ async def test_execute_agent_step_no_tool_calls_still_works():
# Verify step execution result is set
assert state["current_plan"].steps[0].execution_res == "Based on my knowledge, here is the answer without needing to search."
+
+
+class TestReporterNodeThinkTagStripping:
+ """Tests for stripping tags from reporter_node output (#781).
+
+ Some models (e.g. DeepSeek-R1, QwQ via ollama) embed reasoning in
+ content using ... tags instead of the separate
+ reasoning_content field.
+ """
+
+ def _make_mock_state(self):
+ plan = MagicMock()
+ plan.title = "Test Plan"
+ plan.thought = "Test Thought"
+ return {
+ "current_plan": plan,
+ "observations": [],
+ "citations": [],
+ "locale": "en-US",
+ }
+
+ def _run_reporter_node(self, response_content):
+ state = self._make_mock_state()
+ mock_response = MagicMock()
+ mock_response.content = response_content
+
+ mock_configurable = MagicMock()
+
+ with (
+ patch(
+ "src.graph.nodes.Configuration.from_runnable_config",
+ return_value=mock_configurable,
+ ),
+ patch(
+ "src.graph.nodes.apply_prompt_template",
+ return_value=[{"role": "user", "content": "test"}],
+ ),
+ patch("src.graph.nodes.get_llm_by_type") as mock_get_llm,
+ patch("src.graph.nodes.get_llm_token_limit_by_type", return_value=4096),
+ patch("src.graph.nodes.AGENT_LLM_MAP", {"reporter": "basic"}),
+ patch(
+ "src.graph.nodes.ContextManager"
+ ) as mock_ctx_mgr,
+ ):
+ mock_ctx_mgr.return_value.compress_messages.return_value = {"messages": []}
+ mock_llm = MagicMock()
+ mock_llm.invoke.return_value = mock_response
+ mock_get_llm.return_value = mock_llm
+
+ result = reporter_node(state, MagicMock())
+ return result
+
+ def test_strips_think_tag_at_beginning(self):
+ result = self._run_reporter_node(
+ "\nLet me analyze...\n\n\n# Report\n\nContent here."
+ )
+ assert "" not in result["final_report"]
+ assert "# Report" in result["final_report"]
+ assert "Content here." in result["final_report"]
+
+ def test_strips_multiple_think_blocks(self):
+ result = self._run_reporter_node(
+ "First thought\nParagraph 1.\nSecond thought\nParagraph 2."
+ )
+ assert "" not in result["final_report"]
+ assert "Paragraph 1." in result["final_report"]
+ assert "Paragraph 2." in result["final_report"]
+
+ def test_preserves_content_without_think_tags(self):
+ result = self._run_reporter_node("Normal content without think tags.")
+ assert result["final_report"] == "Normal content without think tags."
+
+ def test_empty_content_after_stripping(self):
+ result = self._run_reporter_node(
+ "Only thinking, no real content"
+ )
+ assert "" not in result["final_report"]
+
+ def test_non_string_content_passes_through(self):
+ """Verify non-string content is not broken by the stripping logic."""
+ state = self._make_mock_state()
+ mock_response = MagicMock()
+ # Simulate non-string content (e.g. list from multimodal model)
+ mock_response.content = ["some", "list"]
+
+ mock_configurable = MagicMock()
+
+ with (
+ patch(
+ "src.graph.nodes.Configuration.from_runnable_config",
+ return_value=mock_configurable,
+ ),
+ patch(
+ "src.graph.nodes.apply_prompt_template",
+ return_value=[{"role": "user", "content": "test"}],
+ ),
+ patch("src.graph.nodes.get_llm_by_type") as mock_get_llm,
+ patch("src.graph.nodes.get_llm_token_limit_by_type", return_value=4096),
+ patch("src.graph.nodes.AGENT_LLM_MAP", {"reporter": "basic"}),
+ patch(
+ "src.graph.nodes.ContextManager"
+ ) as mock_ctx_mgr,
+ ):
+ mock_ctx_mgr.return_value.compress_messages.return_value = {"messages": []}
+ mock_llm = MagicMock()
+ mock_llm.invoke.return_value = mock_response
+ mock_get_llm.return_value = mock_llm
+
+ result = reporter_node(state, MagicMock())
+
+ # Non-string content should pass through unchanged
+ assert result["final_report"] == ["some", "list"]
diff --git a/tests/unit/server/test_app.py b/tests/unit/server/test_app.py
index 5ed71f7..d57d018 100644
--- a/tests/unit/server/test_app.py
+++ b/tests/unit/server/test_app.py
@@ -16,6 +16,7 @@ from langgraph.types import Command
from src.config.report_style import ReportStyle
from src.server.app import (
_astream_workflow_generator,
+ _create_event_stream_message,
_create_interrupt_event,
_make_event,
_stream_graph_events,
@@ -1680,3 +1681,53 @@ class TestGlobalConnectionPoolUsage:
"""Helper to create an empty async generator."""
if False:
yield
+
+
+class TestCreateEventStreamMessageThinkTagStripping:
+ """Tests for stripping tags from streamed content (#781).
+
+ Some models (e.g. DeepSeek-R1, QwQ via ollama) embed reasoning in
+ content using ... tags instead of the separate
+ reasoning_content field.
+ """
+
+ def _make_mock_chunk(self, content):
+ chunk = AIMessageChunk(content=content)
+ chunk.id = "msg_test"
+ chunk.response_metadata = {}
+ return chunk
+
+ def test_strips_think_tag_at_beginning(self):
+ chunk = self._make_mock_chunk(
+ "\nLet me analyze...\n\n\n# Report\n\nContent here."
+ )
+ result = _create_event_stream_message(chunk, {}, "thread-1", "reporter")
+ assert "" not in result["content"]
+ assert "# Report" in result["content"]
+ assert "Content here." in result["content"]
+
+ def test_strips_multiple_think_blocks(self):
+ chunk = self._make_mock_chunk(
+ "First thought\nParagraph 1.\nSecond thought\nParagraph 2."
+ )
+ result = _create_event_stream_message(chunk, {}, "thread-1", "coordinator")
+ assert "" not in result["content"]
+ assert "Paragraph 1." in result["content"]
+ assert "Paragraph 2." in result["content"]
+
+ def test_preserves_content_without_think_tags(self):
+ chunk = self._make_mock_chunk("Normal content without think tags.")
+ result = _create_event_stream_message(chunk, {}, "thread-1", "planner")
+ assert result["content"] == "Normal content without think tags."
+
+ def test_empty_content_after_stripping(self):
+ chunk = self._make_mock_chunk("Only thinking, no real content")
+ result = _create_event_stream_message(chunk, {}, "thread-1", "reporter")
+ assert "" not in result["content"]
+
+ def test_preserves_reasoning_content_field(self):
+ chunk = self._make_mock_chunk("Actual content")
+ chunk.additional_kwargs["reasoning_content"] = "This is reasoning"
+ result = _create_event_stream_message(chunk, {}, "thread-1", "planner")
+ assert result["content"] == "Actual content"
+ assert result["reasoning_content"] == "This is reasoning"