merge: upstream/experimental with citations feature

- Merge upstream changes including image search, tooltips, and UI improvements
- Keep citations feature with inline hover cards
- Resolve conflict in message-list-item.tsx: use upstream img max-width (90%) while preserving citations logic
- Maintain file upload improvements with citations support

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
ruitanglin
2026-01-29 12:55:43 +08:00
20 changed files with 771 additions and 112 deletions

View File

@@ -158,6 +158,7 @@ The key AI trends for 2026 include enhanced reasoning capabilities, multimodal i
- Progressive Loading: Load resources incrementally as referenced in skills
- Output Files: Final deliverables must be in `/mnt/user-data/outputs`
- Clarity: Be direct and helpful, avoid unnecessary meta-commentary
- Including Images and Mermaid: Images and Mermaid diagrams are always welcomed in the Markdown format, and you're encouraged to use `![Image Description](image_path)\n\n` or "```mermaid" to display images in response or Markdown files
- Multi-task: Better utilize parallel tool calling to call multiple tools at one time for better performance
- Language Consistency: Keep using the same language as user's
- Always Respond: Your thinking is internal. You MUST always provide a visible response to the user after thinking.

View File

@@ -5,7 +5,7 @@ from typing import override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import AIMessage, ToolMessage
from langchain_core.messages import ToolMessage
from langgraph.graph import END
from langgraph.prebuilt.tool_node import ToolCallRequest
from langgraph.types import Command
@@ -118,17 +118,13 @@ class ClarificationMiddleware(AgentMiddleware[ClarificationMiddlewareState]):
name="ask_clarification",
)
ai_response_message = AIMessage(content=formatted_message)
# Return a Command that:
# 1. Adds the formatted tool message (keeping the AI message intact)
# 1. Adds the formatted tool message
# 2. Interrupts execution by going to __end__
# Note: We don't modify the AI message to preserve all fields (reasoning_content, tool_calls, etc.)
# This is especially important for thinking mode where reasoning_content is required
# Return Command to add the tool message and interrupt
# Note: We don't add an extra AIMessage here - the frontend will detect
# and display ask_clarification tool messages directly
return Command(
update={"messages": [tool_message, ai_response_message]},
update={"messages": [tool_message]},
goto=END,
)

View File

@@ -0,0 +1,3 @@
from .tools import image_search_tool
__all__ = ["image_search_tool"]

View File

@@ -0,0 +1,139 @@
"""
Image Search Tool - Search images using DuckDuckGo for reference in image generation.
"""
import json
import logging
from langchain.tools import tool
from src.config import get_app_config
logger = logging.getLogger(__name__)
def _search_images(
query: str,
max_results: int = 5,
region: str = "wt-wt",
safesearch: str = "moderate",
size: str | None = None,
color: str | None = None,
type_image: str | None = None,
layout: str | None = None,
license_image: str | None = None,
) -> list[dict]:
"""
Execute image search using DuckDuckGo.
Args:
query: Search keywords
max_results: Maximum number of results
region: Search region
safesearch: Safe search level
size: Image size (Small/Medium/Large/Wallpaper)
color: Color filter
type_image: Image type (photo/clipart/gif/transparent/line)
layout: Layout (Square/Tall/Wide)
license_image: License filter
Returns:
List of search results
"""
try:
from ddgs import DDGS
except ImportError:
logger.error("ddgs library not installed. Run: pip install ddgs")
return []
ddgs = DDGS()
try:
kwargs = {
"region": region,
"safesearch": safesearch,
"max_results": max_results,
}
if size:
kwargs["size"] = size
if color:
kwargs["color"] = color
if type_image:
kwargs["type_image"] = type_image
if layout:
kwargs["layout"] = layout
if license_image:
kwargs["license_image"] = license_image
results = ddgs.images(query, **kwargs)
return list(results) if results else []
except Exception as e:
logger.error(f"Failed to search images: {e}")
return []
@tool("image_search", parse_docstring=True)
def image_search_tool(
query: str,
max_results: int = 5,
size: str | None = None,
type_image: str | None = None,
layout: str | None = None,
) -> str:
"""Search for images online. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy.
**When to use:**
- Before generating character/portrait images: search for similar poses, expressions, styles
- Before generating specific objects/products: search for accurate visual references
- Before generating scenes/locations: search for architectural or environmental references
- Before generating fashion/clothing: search for style and detail references
The returned image URLs can be used as reference images in image generation to significantly improve quality.
Args:
query: Search keywords describing the images you want to find. Be specific for better results (e.g., "Japanese woman street photography 1990s" instead of just "woman").
max_results: Maximum number of images to return. Default is 5.
size: Image size filter. Options: "Small", "Medium", "Large", "Wallpaper". Use "Large" for reference images.
type_image: Image type filter. Options: "photo", "clipart", "gif", "transparent", "line". Use "photo" for realistic references.
layout: Layout filter. Options: "Square", "Tall", "Wide". Choose based on your generation needs.
"""
config = get_app_config().get_tool_config("image_search")
# Override max_results from config if set
if config is not None and "max_results" in config.model_extra:
max_results = config.model_extra.get("max_results", max_results)
results = _search_images(
query=query,
max_results=max_results,
size=size,
type_image=type_image,
layout=layout,
)
if not results:
return json.dumps({"error": "No images found", "query": query}, ensure_ascii=False)
normalized_results = [
{
"title": r.get("title", ""),
"image_url": r.get("image", ""),
"thumbnail_url": r.get("thumbnail", ""),
"source_url": r.get("url", ""),
"source": r.get("source", ""),
"width": r.get("width"),
"height": r.get("height"),
}
for r in results
]
output = {
"query": query,
"total_results": len(normalized_results),
"results": normalized_results,
"usage_hint": "Use the 'image_url' values as reference images in image generation. Download them first if needed.",
}
return json.dumps(output, indent=2, ensure_ascii=False)

View File

@@ -55,8 +55,7 @@ class SandboxConfig(BaseModel):
)
environment: dict[str, str] = Field(
default_factory=dict,
description="Environment variables to inject into the sandbox container. "
"Values starting with $ will be resolved from host environment variables.",
description="Environment variables to inject into the sandbox container. Values starting with $ will be resolved from host environment variables.",
)
model_config = ConfigDict(extra="allow")

View File

@@ -71,9 +71,7 @@ async def get_mcp_configuration() -> McpConfigResponse:
"""
config = get_extensions_config()
return McpConfigResponse(
mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in config.mcp_servers.items()}
)
return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in config.mcp_servers.items()})
@router.put(
@@ -143,9 +141,7 @@ async def update_mcp_configuration(request: McpConfigUpdateRequest) -> McpConfig
# Reload the configuration and update the global cache
reloaded_config = reload_extensions_config()
return McpConfigResponse(
mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in reloaded_config.mcp_servers.items()}
)
return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in reloaded_config.mcp_servers.items()})
except Exception as e:
logger.error(f"Failed to update MCP configuration: {e}", exc_info=True)

View File

@@ -48,20 +48,14 @@ class PatchedChatDeepSeek(ChatDeepSeek):
# Iterate through both and match by position
if len(payload_messages) == len(original_messages):
for payload_msg, orig_msg in zip(payload_messages, original_messages):
if (
payload_msg.get("role") == "assistant"
and isinstance(orig_msg, AIMessage)
):
if payload_msg.get("role") == "assistant" and isinstance(orig_msg, AIMessage):
reasoning_content = orig_msg.additional_kwargs.get("reasoning_content")
if reasoning_content is not None:
payload_msg["reasoning_content"] = reasoning_content
else:
# Fallback: match by counting assistant messages
ai_messages = [m for m in original_messages if isinstance(m, AIMessage)]
assistant_payloads = [
(i, m) for i, m in enumerate(payload_messages)
if m.get("role") == "assistant"
]
assistant_payloads = [(i, m) for i, m in enumerate(payload_messages) if m.get("role") == "assistant"]
for (idx, payload_msg), ai_msg in zip(assistant_payloads, ai_messages):
reasoning_content = ai_msg.additional_kwargs.get("reasoning_content")