merge: upstream/experimental with citations feature

- Merge upstream changes including image search, tooltips, and UI improvements - Keep citations feature with inline hover cards - Resolve conflict in message-list-item.tsx: use upstream img max-width (90%) while preserving citations logic - Maintain file upload improvements with citations support Co-authored-by: Cursor <cursoragent@cursor.com>
2026-04-16 03:14:45 +08:00 · 2026-01-29 12:55:43 +08:00
parent ce9731c10a f809b67c47
commit ac283b92aa
20 changed files with 771 additions and 112 deletions
--- a/backend/src/agents/lead_agent/prompt.py
+++ b/backend/src/agents/lead_agent/prompt.py
@@ -158,6 +158,7 @@ The key AI trends for 2026 include enhanced reasoning capabilities, multimodal i
 - Progressive Loading: Load resources incrementally as referenced in skills
 - Output Files: Final deliverables must be in `/mnt/user-data/outputs`
 - Clarity: Be direct and helpful, avoid unnecessary meta-commentary
+- Including Images and Mermaid: Images and Mermaid diagrams are always welcomed in the Markdown format, and you're encouraged to use `![Image Description](image_path)\n\n` or "```mermaid" to display images in response or Markdown files
 - Multi-task: Better utilize parallel tool calling to call multiple tools at one time for better performance
 - Language Consistency: Keep using the same language as user's
 - Always Respond: Your thinking is internal. You MUST always provide a visible response to the user after thinking.
--- a/backend/src/agents/middlewares/clarification_middleware.py
+++ b/backend/src/agents/middlewares/clarification_middleware.py
@@ -5,7 +5,7 @@ from typing import override

 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
-from langchain_core.messages import AIMessage, ToolMessage
+from langchain_core.messages import ToolMessage
 from langgraph.graph import END
 from langgraph.prebuilt.tool_node import ToolCallRequest
 from langgraph.types import Command
@@ -118,17 +118,13 @@ class ClarificationMiddleware(AgentMiddleware[ClarificationMiddlewareState]):
            name="ask_clarification",
        )

-        ai_response_message = AIMessage(content=formatted_message)
-
        # Return a Command that:
-        # 1. Adds the formatted tool message (keeping the AI message intact)
+        # 1. Adds the formatted tool message
        # 2. Interrupts execution by going to __end__
-        # Note: We don't modify the AI message to preserve all fields (reasoning_content, tool_calls, etc.)
-        # This is especially important for thinking mode where reasoning_content is required
-
-        # Return Command to add the tool message and interrupt
+        # Note: We don't add an extra AIMessage here - the frontend will detect
+        # and display ask_clarification tool messages directly
        return Command(
-            update={"messages": [tool_message, ai_response_message]},
+            update={"messages": [tool_message]},
            goto=END,
        )

--- a/backend/src/community/image_search/init.py
+++ b/backend/src/community/image_search/init.py
@@ -0,0 +1,3 @@
+from .tools import image_search_tool
+
+__all__ = ["image_search_tool"]
--- a/backend/src/community/image_search/tools.py
+++ b/backend/src/community/image_search/tools.py
@@ -0,0 +1,139 @@
+"""
+Image Search Tool - Search images using DuckDuckGo for reference in image generation.
+"""
+
+import json
+import logging
+
+from langchain.tools import tool
+
+from src.config import get_app_config
+
+logger = logging.getLogger(__name__)
+
+
+def _search_images(
+    query: str,
+    max_results: int = 5,
+    region: str = "wt-wt",
+    safesearch: str = "moderate",
+    size: str | None = None,
+    color: str | None = None,
+    type_image: str | None = None,
+    layout: str | None = None,
+    license_image: str | None = None,
+) -> list[dict]:
+    """
+    Execute image search using DuckDuckGo.
+
+    Args:
+        query: Search keywords
+        max_results: Maximum number of results
+        region: Search region
+        safesearch: Safe search level
+        size: Image size (Small/Medium/Large/Wallpaper)
+        color: Color filter
+        type_image: Image type (photo/clipart/gif/transparent/line)
+        layout: Layout (Square/Tall/Wide)
+        license_image: License filter
+
+    Returns:
+        List of search results
+    """
+    try:
+        from ddgs import DDGS
+    except ImportError:
+        logger.error("ddgs library not installed. Run: pip install ddgs")
+        return []
+
+    ddgs = DDGS()
+
+    try:
+        kwargs = {
+            "region": region,
+            "safesearch": safesearch,
+            "max_results": max_results,
+        }
+
+        if size:
+            kwargs["size"] = size
+        if color:
+            kwargs["color"] = color
+        if type_image:
+            kwargs["type_image"] = type_image
+        if layout:
+            kwargs["layout"] = layout
+        if license_image:
+            kwargs["license_image"] = license_image
+
+        results = ddgs.images(query, **kwargs)
+        return list(results) if results else []
+
+    except Exception as e:
+        logger.error(f"Failed to search images: {e}")
+        return []
+
+
+@tool("image_search", parse_docstring=True)
+def image_search_tool(
+    query: str,
+    max_results: int = 5,
+    size: str | None = None,
+    type_image: str | None = None,
+    layout: str | None = None,
+) -> str:
+    """Search for images online. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy.
+
+    **When to use:**
+    - Before generating character/portrait images: search for similar poses, expressions, styles
+    - Before generating specific objects/products: search for accurate visual references
+    - Before generating scenes/locations: search for architectural or environmental references
+    - Before generating fashion/clothing: search for style and detail references
+
+    The returned image URLs can be used as reference images in image generation to significantly improve quality.
+
+    Args:
+        query: Search keywords describing the images you want to find. Be specific for better results (e.g., "Japanese woman street photography 1990s" instead of just "woman").
+        max_results: Maximum number of images to return. Default is 5.
+        size: Image size filter. Options: "Small", "Medium", "Large", "Wallpaper". Use "Large" for reference images.
+        type_image: Image type filter. Options: "photo", "clipart", "gif", "transparent", "line". Use "photo" for realistic references.
+        layout: Layout filter. Options: "Square", "Tall", "Wide". Choose based on your generation needs.
+    """
+    config = get_app_config().get_tool_config("image_search")
+
+    # Override max_results from config if set
+    if config is not None and "max_results" in config.model_extra:
+        max_results = config.model_extra.get("max_results", max_results)
+
+    results = _search_images(
+        query=query,
+        max_results=max_results,
+        size=size,
+        type_image=type_image,
+        layout=layout,
+    )
+
+    if not results:
+        return json.dumps({"error": "No images found", "query": query}, ensure_ascii=False)
+
+    normalized_results = [
+        {
+            "title": r.get("title", ""),
+            "image_url": r.get("image", ""),
+            "thumbnail_url": r.get("thumbnail", ""),
+            "source_url": r.get("url", ""),
+            "source": r.get("source", ""),
+            "width": r.get("width"),
+            "height": r.get("height"),
+        }
+        for r in results
+    ]
+
+    output = {
+        "query": query,
+        "total_results": len(normalized_results),
+        "results": normalized_results,
+        "usage_hint": "Use the 'image_url' values as reference images in image generation. Download them first if needed.",
+    }
+
+    return json.dumps(output, indent=2, ensure_ascii=False)
--- a/backend/src/config/sandbox_config.py
+++ b/backend/src/config/sandbox_config.py
@@ -55,8 +55,7 @@ class SandboxConfig(BaseModel):
    )
    environment: dict[str, str] = Field(
        default_factory=dict,
-        description="Environment variables to inject into the sandbox container. "
-        "Values starting with $ will be resolved from host environment variables.",
+        description="Environment variables to inject into the sandbox container. Values starting with $ will be resolved from host environment variables.",
    )

    model_config = ConfigDict(extra="allow")
--- a/backend/src/gateway/routers/mcp.py
+++ b/backend/src/gateway/routers/mcp.py
@@ -71,9 +71,7 @@ async def get_mcp_configuration() -> McpConfigResponse:
    """
    config = get_extensions_config()

-    return McpConfigResponse(
-        mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in config.mcp_servers.items()}
-    )
+    return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in config.mcp_servers.items()})


@router.put(
@@ -143,9 +141,7 @@ async def update_mcp_configuration(request: McpConfigUpdateRequest) -> McpConfig

        # Reload the configuration and update the global cache
        reloaded_config = reload_extensions_config()
-        return McpConfigResponse(
-            mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in reloaded_config.mcp_servers.items()}
-        )
+        return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in reloaded_config.mcp_servers.items()})

    except Exception as e:
        logger.error(f"Failed to update MCP configuration: {e}", exc_info=True)
--- a/backend/src/models/patched_deepseek.py
+++ b/backend/src/models/patched_deepseek.py
@@ -48,20 +48,14 @@ class PatchedChatDeepSeek(ChatDeepSeek):
        # Iterate through both and match by position
        if len(payload_messages) == len(original_messages):
            for payload_msg, orig_msg in zip(payload_messages, original_messages):
-                if (
-                    payload_msg.get("role") == "assistant"
-                    and isinstance(orig_msg, AIMessage)
-                ):
+                if payload_msg.get("role") == "assistant" and isinstance(orig_msg, AIMessage):
                    reasoning_content = orig_msg.additional_kwargs.get("reasoning_content")
                    if reasoning_content is not None:
                        payload_msg["reasoning_content"] = reasoning_content
        else:
            # Fallback: match by counting assistant messages
            ai_messages = [m for m in original_messages if isinstance(m, AIMessage)]
-            assistant_payloads = [
-                (i, m) for i, m in enumerate(payload_messages)
-                if m.get("role") == "assistant"
-            ]
+            assistant_payloads = [(i, m) for i, m in enumerate(payload_messages) if m.get("role") == "assistant"]

            for (idx, payload_msg), ai_msg in zip(assistant_payloads, ai_messages):
                reasoning_content = ai_msg.additional_kwargs.get("reasoning_content")