feat(tools): add tool_search for deferred MCP tool loading (#1176)

* feat(tools): add tool_search for deferred MCP tool loading

When multiple MCP servers are enabled, total tool count can exceed 30-50,
causing context bloat and degraded tool selection accuracy. This adds a
deferred tool loading mechanism controlled by `tool_search.enabled` config.

- Add ToolSearchConfig with single `enabled` field
- Add DeferredToolRegistry with regex search (select:, +keyword, keyword)
- Add tool_search tool returning OpenAI-compatible function JSON
- Add DeferredToolFilterMiddleware to hide deferred schemas from bind_tools
- Add <available-deferred-tools> section to system prompt
- Enable MCP tool_name_prefix to prevent cross-server name collisions
- Add 34 unit tests covering registry, tool, prompt, and middleware

* fix: reset stale deferred registry and bump config_version

- Reset deferred registry upfront in get_available_tools() to prevent
  stale tool entries when MCP servers are disabled between calls
- Bump config_version to 2 for new tool_search config field

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix(tests): mock get_app_config in prompt section tests for CI

CI has no config.yaml, causing TestDeferredToolsPromptSection to fail
with FileNotFoundError. Add autouse fixture to mock get_app_config.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
lhd
2026-03-17 20:43:55 +08:00
committed by GitHub
parent f29db80be7
commit 0091d9f071
10 changed files with 721 additions and 26 deletions

View File

@@ -5,6 +5,7 @@ from langchain.tools import BaseTool
from deerflow.config import get_app_config
from deerflow.reflection import resolve_variable
from deerflow.tools.builtins import ask_clarification_tool, present_file_tool, task_tool, view_image_tool
from deerflow.tools.builtins.tool_search import reset_deferred_registry
logger = logging.getLogger(__name__)
@@ -42,27 +43,6 @@ def get_available_tools(
config = get_app_config()
loaded_tools = [resolve_variable(tool.use, BaseTool) for tool in config.tools if groups is None or tool.group in groups]
# Get cached MCP tools if enabled
# NOTE: We use ExtensionsConfig.from_file() instead of config.extensions
# to always read the latest configuration from disk. This ensures that changes
# made through the Gateway API (which runs in a separate process) are immediately
# reflected when loading MCP tools.
mcp_tools = []
if include_mcp:
try:
from deerflow.config.extensions_config import ExtensionsConfig
from deerflow.mcp.cache import get_cached_mcp_tools
extensions_config = ExtensionsConfig.from_file()
if extensions_config.get_enabled_mcp_servers():
mcp_tools = get_cached_mcp_tools()
if mcp_tools:
logger.info(f"Using {len(mcp_tools)} cached MCP tool(s)")
except ImportError:
logger.warning("MCP module not available. Install 'langchain-mcp-adapters' package to enable MCP tools.")
except Exception as e:
logger.error(f"Failed to get cached MCP tools: {e}")
# Conditionally add tools based on config
builtin_tools = BUILTIN_TOOLS.copy()
@@ -81,4 +61,41 @@ def get_available_tools(
builtin_tools.append(view_image_tool)
logger.info(f"Including view_image_tool for model '{model_name}' (supports_vision=True)")
# Get cached MCP tools if enabled
# NOTE: We use ExtensionsConfig.from_file() instead of config.extensions
# to always read the latest configuration from disk. This ensures that changes
# made through the Gateway API (which runs in a separate process) are immediately
# reflected when loading MCP tools.
mcp_tools = []
# Reset deferred registry upfront to prevent stale state from previous calls
reset_deferred_registry()
if include_mcp:
try:
from deerflow.config.extensions_config import ExtensionsConfig
from deerflow.mcp.cache import get_cached_mcp_tools
extensions_config = ExtensionsConfig.from_file()
if extensions_config.get_enabled_mcp_servers():
mcp_tools = get_cached_mcp_tools()
if mcp_tools:
logger.info(f"Using {len(mcp_tools)} cached MCP tool(s)")
# When tool_search is enabled, register MCP tools in the
# deferred registry and add tool_search to builtin tools.
if config.tool_search.enabled:
from deerflow.tools.builtins.tool_search import DeferredToolRegistry, set_deferred_registry
from deerflow.tools.builtins.tool_search import tool_search as tool_search_tool
registry = DeferredToolRegistry()
for t in mcp_tools:
registry.register(t)
set_deferred_registry(registry)
builtin_tools.append(tool_search_tool)
logger.info(f"Tool search active: {len(mcp_tools)} tools deferred")
except ImportError:
logger.warning("MCP module not available. Install 'langchain-mcp-adapters' package to enable MCP tools.")
except Exception as e:
logger.error(f"Failed to get cached MCP tools: {e}")
logger.info(f"Total tools loaded: {len(loaded_tools)}, built-in tools: {len(builtin_tools)}, MCP tools: {len(mcp_tools)}")
return loaded_tools + builtin_tools + mcp_tools