feat: add view_image tool and optimize web fetch tools

Add image viewing capability for vision-enabled models with ViewImageMiddleware and view_image_tool. Limit web_fetch tool output to 4096 characters to prevent excessive content. Update model config to support vision capability flag. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-04-03 06:12:14 +08:00 · 2026-01-29 13:44:04 +08:00
parent d77e7c32b2
commit 7414947cc6
12 changed files with 390 additions and 13 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -21,6 +21,7 @@ models:
    api_key: $OPENAI_API_KEY  # Use environment variable
    max_tokens: 4096
    temperature: 0.7
+    supports_vision: true  # Enable vision support for view_image tool

  # Example: Anthropic Claude model
  # - name: claude-3-5-sonnet
@@ -29,6 +30,7 @@ models:
  #   model: claude-3-5-sonnet-20241022
  #   api_key: $ANTHROPIC_API_KEY
  #   max_tokens: 8192
+  #   supports_vision: true  # Enable vision support for view_image tool

  # Example: DeepSeek model (with thinking support)
  # - name: deepseek-v3
@@ -38,6 +40,7 @@ models:
  #   api_key: $DEEPSEEK_API_KEY
  #   max_tokens: 16384
  #   supports_thinking: true
+  #   supports_vision: false  # DeepSeek V3 does not support vision
  #   when_thinking_enabled:
  #     extra_body:
  #       thinking:
@@ -51,6 +54,7 @@ models:
  #   api_base: https://ark.cn-beijing.volces.com/api/v3
  #   api_key: $VOLCENGINE_API_KEY
  #   supports_thinking: true
+  #   supports_vision: false  # Check your specific model's capabilities
  #   when_thinking_enabled:
  #     extra_body:
  #       thinking:
@@ -65,6 +69,7 @@ models:
  #   api_key: $MOONSHOT_API_KEY
  #   max_tokens: 32768
  #   supports_thinking: true
+  #   supports_vision: false  # Check your specific model's capabilities
  #   when_thinking_enabled:
  #     extra_body:
  #       thinking:
@@ -107,6 +112,11 @@ tools:
    use: src.community.image_search.tools:image_search_tool
    max_results: 5

+  # View image tool (display local images to user)
+  - name: view_image
+    group: file:read
+    use: src.tools.builtins:view_image_tool
+
  # File operations tools
  - name: ls
    group: file:read