feat: add view_image tool and optimize web fetch tools

Add image viewing capability for vision-enabled models with ViewImageMiddleware and view_image_tool. Limit web_fetch tool output to 4096 characters to prevent excessive content. Update model config to support vision capability flag.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
hetao
2026-01-29 13:44:04 +08:00
parent d77e7c32b2
commit 7414947cc6
12 changed files with 390 additions and 13 deletions

View File

@@ -21,6 +21,7 @@ models:
api_key: $OPENAI_API_KEY # Use environment variable
max_tokens: 4096
temperature: 0.7
supports_vision: true # Enable vision support for view_image tool
# Example: Anthropic Claude model
# - name: claude-3-5-sonnet
@@ -29,6 +30,7 @@ models:
# model: claude-3-5-sonnet-20241022
# api_key: $ANTHROPIC_API_KEY
# max_tokens: 8192
# supports_vision: true # Enable vision support for view_image tool
# Example: DeepSeek model (with thinking support)
# - name: deepseek-v3
@@ -38,6 +40,7 @@ models:
# api_key: $DEEPSEEK_API_KEY
# max_tokens: 16384
# supports_thinking: true
# supports_vision: false # DeepSeek V3 does not support vision
# when_thinking_enabled:
# extra_body:
# thinking:
@@ -51,6 +54,7 @@ models:
# api_base: https://ark.cn-beijing.volces.com/api/v3
# api_key: $VOLCENGINE_API_KEY
# supports_thinking: true
# supports_vision: false # Check your specific model's capabilities
# when_thinking_enabled:
# extra_body:
# thinking:
@@ -65,6 +69,7 @@ models:
# api_key: $MOONSHOT_API_KEY
# max_tokens: 32768
# supports_thinking: true
# supports_vision: false # Check your specific model's capabilities
# when_thinking_enabled:
# extra_body:
# thinking:
@@ -107,6 +112,11 @@ tools:
use: src.community.image_search.tools:image_search_tool
max_results: 5
# View image tool (display local images to user)
- name: view_image
group: file:read
use: src.tools.builtins:view_image_tool
# File operations tools
- name: ls
group: file:read