feat: optimize vision tools and image handling

- Add model-aware vision tool loading based on supports_vision flag - Move view_image_tool from config to builtin tools for dynamic inclusion - Add timeout to image search to prevent hanging requests - Optimize image search results format using thumbnails - Add image validation for reference images in generation - Improve error handling with detailed messages Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-04-03 14:22:13 +08:00 · 2026-01-29 14:57:26 +08:00
parent 3cbf54b2eb
commit 7aa10b980f
5 changed files with 59 additions and 19 deletions
--- a/backend/src/agents/lead_agent/agent.py
+++ b/backend/src/agents/lead_agent/agent.py
@@ -230,7 +230,7 @@ def make_lead_agent(config: RunnableConfig):
    print(f"thinking_enabled: {thinking_enabled}, model_name: {model_name}, is_plan_mode: {is_plan_mode}")
    return create_agent(
        model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
-        tools=get_available_tools(),
+        tools=get_available_tools(model_name=model_name),
        middleware=_build_middlewares(config),
        system_prompt=apply_prompt_template(),
        state_schema=ThreadState,
--- a/backend/src/community/image_search/tools.py
+++ b/backend/src/community/image_search/tools.py
@@ -46,7 +46,7 @@ def _search_images(
        logger.error("ddgs library not installed. Run: pip install ddgs")
        return []

-    ddgs = DDGS()
+    ddgs = DDGS(timeout=30)

    try:
        kwargs = {
@@ -119,12 +119,8 @@ def image_search_tool(
    normalized_results = [
        {
            "title": r.get("title", ""),
-            "image_url": r.get("image", ""),
+            "image_url": r.get("thumbnail", ""),
            "thumbnail_url": r.get("thumbnail", ""),
-            "source_url": r.get("url", ""),
-            "source": r.get("source", ""),
-            "width": r.get("width"),
-            "height": r.get("height"),
        }
        for r in results
    ]
--- a/backend/src/tools/tools.py
+++ b/backend/src/tools/tools.py
@@ -4,7 +4,7 @@ from langchain.tools import BaseTool

 from src.config import get_app_config
 from src.reflection import resolve_variable
-from src.tools.builtins import ask_clarification_tool, present_file_tool
+from src.tools.builtins import ask_clarification_tool, present_file_tool, view_image_tool

 logger = logging.getLogger(__name__)

@@ -14,7 +14,7 @@ BUILTIN_TOOLS = [
 ]


-def get_available_tools(groups: list[str] | None = None, include_mcp: bool = True) -> list[BaseTool]:
+def get_available_tools(groups: list[str] | None = None, include_mcp: bool = True, model_name: str | None = None) -> list[BaseTool]:
    """Get all available tools from config.

    Note: MCP tools should be initialized at application startup using
@@ -23,6 +23,7 @@ def get_available_tools(groups: list[str] | None = None, include_mcp: bool = Tru
    Args:
        groups: Optional list of tool groups to filter by.
        include_mcp: Whether to include tools from MCP servers (default: True).
+        model_name: Optional model name to determine if vision tools should be included.

    Returns:
        List of available tools.
@@ -51,4 +52,16 @@ def get_available_tools(groups: list[str] | None = None, include_mcp: bool = Tru
        except Exception as e:
            logger.error(f"Failed to get cached MCP tools: {e}")

-    return loaded_tools + BUILTIN_TOOLS + mcp_tools
+    # Conditionally add view_image_tool only if the model supports vision
+    builtin_tools = BUILTIN_TOOLS.copy()
+
+    # If no model_name specified, use the first model (default)
+    if model_name is None and config.models:
+        model_name = config.models[0].name
+
+    model_config = config.get_model_config(model_name) if model_name else None
+    if model_config is not None and model_config.supports_vision:
+        builtin_tools.append(view_image_tool)
+        logger.info(f"Including view_image_tool for model '{model_name}' (supports_vision=True)")
+
+    return loaded_tools + builtin_tools + mcp_tools