fix: normalize structured LLM content in serialization and memory updater (#1215)

* fix: normalize ToolMessage structured content in serialization When models return ToolMessage content as a list of content blocks (e.g. [{"type": "text", "text": "..."}]), the UI previously displayed the raw Python repr string instead of the extracted text. Replace str(msg.content) with the existing _extract_text() helper in both _serialize_message() and stream() to properly normalize list-of-blocks content to plain text. Fixes #1149 Also fixes the same root cause as #1188 (characters displayed one per line when tool response content is returned as structured blocks). Added 11 regression tests covering string, list-of-blocks, mixed, empty, and fallback content types. * fix(memory): extract text from structured LLM responses in memory updater When LLMs return response content as list of content blocks (e.g. [{"type": "text", "text": "..."}]) instead of plain strings, str() produces Python repr which breaks JSON parsing in the memory updater. This caused memory updates to silently fail. Changes: - Add _extract_text() helper in updater.py for safe content normalization - Use _extract_text() instead of str(response.content) in update_memory() - Fix format_conversation_for_update() to handle plain strings in list content - Fix subagent executor fallback path to extract text from list content - Replace print() with structured logging (logger.info/warning/error) - Add 13 regression tests covering _extract_text, format_conversation, and update_memory with structured LLM responses * fix: address Copilot review - defensive text extraction + logger.exception - client.py _extract_text: use block.get('text') + isinstance check (prevent KeyError/TypeError) - prompt.py format_conversation_for_update: same defensive check for dict text blocks - executor.py: type-safe text extraction in both code paths, fallback to placeholder instead of str(raw_content) - updater.py: use logger.exception() instead of logger.error() for traceback preservation * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fix: preserve chunked structured content without spurious newlines * fix: restore backend unit test compatibility --------- Co-authored-by: Exploreunive <Exploreunive@users.noreply.github.com> Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-04-24 14:44:46 +08:00 · 2026-03-22 17:29:29 +08:00
parent 9fad717977
commit 3af709097e
8 changed files with 420 additions and 30 deletions
--- a/backend/packages/harness/deerflow/client.py
+++ b/backend/packages/harness/deerflow/client.py
@@ -241,7 +241,7 @@ class DeerFlowClient:
        if isinstance(msg, ToolMessage):
            return {
                "type": "tool",
-                "content": msg.content if isinstance(msg.content, str) else str(msg.content),
+                "content": DeerFlowClient._extract_text(msg.content),
                "name": getattr(msg, "name", None),
                "tool_call_id": getattr(msg, "tool_call_id", None),
                "id": getattr(msg, "id", None),
@@ -254,17 +254,44 @@ class DeerFlowClient:

    @staticmethod
    def _extract_text(content) -> str:
-        """Extract plain text from AIMessage content (str or list of blocks)."""
+        """Extract plain text from AIMessage content (str or list of blocks).
+
+        String chunks are concatenated without separators to avoid corrupting
+        token/character deltas or chunked JSON payloads. Dict-based text blocks
+        are treated as full text blocks and joined with newlines to preserve
+        readability.
+        """
        if isinstance(content, str):
            return content
        if isinstance(content, list):
-            parts = []
+            if content and all(isinstance(block, str) for block in content):
+                chunk_like = len(content) > 1 and all(
+                    isinstance(block, str)
+                    and len(block) <= 20
+                    and any(ch in block for ch in '{}[]":,')
+                    for block in content
+                )
+                return "".join(content) if chunk_like else "\n".join(content)
+
+            pieces: list[str] = []
+            pending_str_parts: list[str] = []
+
+            def flush_pending_str_parts() -> None:
+                if pending_str_parts:
+                    pieces.append("".join(pending_str_parts))
+                    pending_str_parts.clear()
+
            for block in content:
                if isinstance(block, str):
-                    parts.append(block)
-                elif isinstance(block, dict) and block.get("type") == "text":
-                    parts.append(block["text"])
-            return "\n".join(parts) if parts else ""
+                    pending_str_parts.append(block)
+                elif isinstance(block, dict):
+                    flush_pending_str_parts()
+                    text_val = block.get("text")
+                    if isinstance(text_val, str):
+                        pieces.append(text_val)
+
+            flush_pending_str_parts()
+            return "\n".join(pieces) if pieces else ""
        return str(content)

    # ------------------------------------------------------------------
@@ -360,7 +387,7 @@ class DeerFlowClient:
                        type="messages-tuple",
                        data={
                            "type": "tool",
-                            "content": msg.content if isinstance(msg.content, str) else str(msg.content),
+                            "content": self._extract_text(msg.content),
                            "name": getattr(msg, "name", None),
                            "tool_call_id": getattr(msg, "tool_call_id", None),
                            "id": msg_id,