mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-03 06:12:14 +08:00
* fix(llm): filter unexpected config keys to prevent LangChain warnings (#411) Add allowlist validation for LLM configuration keys to prevent unexpected parameters like SEARCH_ENGINE from being passed to LLM constructors. Changes: - Add ALLOWED_LLM_CONFIG_KEYS set with valid LLM configuration parameters - Filter out unexpected keys before creating LLM instances - Log clear warning messages when unexpected keys are removed - Add unit test for configuration key filtering This fixes the confusing LangChain warning "WARNING! SEARCH_ENGINE is not default parameter. SEARCH_ENGINE was transferred to model_kwargs" that occurred when users accidentally placed configuration keys in wrong sections of conf.yaml. * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, get_args
|
||||
@@ -15,9 +16,57 @@ from src.config import load_yaml_config
|
||||
from src.config.agents import LLMType
|
||||
from src.llms.providers.dashscope import ChatDashscope
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache for LLM instances
|
||||
_llm_cache: dict[LLMType, BaseChatModel] = {}
|
||||
|
||||
# Allowed LLM configuration keys to prevent unexpected parameters from being passed
|
||||
# to LLM constructors (Issue #411 - SEARCH_ENGINE warning fix)
|
||||
ALLOWED_LLM_CONFIG_KEYS = {
|
||||
# Common LLM configuration keys
|
||||
"model",
|
||||
"api_key",
|
||||
"base_url",
|
||||
"api_base",
|
||||
"max_retries",
|
||||
"timeout",
|
||||
"max_tokens",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"frequency_penalty",
|
||||
"presence_penalty",
|
||||
"stop",
|
||||
"n",
|
||||
"stream",
|
||||
"logprobs",
|
||||
"echo",
|
||||
"best_of",
|
||||
"logit_bias",
|
||||
"user",
|
||||
"seed",
|
||||
# SSL and HTTP client settings
|
||||
"verify_ssl",
|
||||
"http_client",
|
||||
"http_async_client",
|
||||
# Platform-specific keys
|
||||
"platform",
|
||||
"google_api_key",
|
||||
# Azure-specific keys
|
||||
"azure_endpoint",
|
||||
"azure_deployment",
|
||||
"api_version",
|
||||
"azure_ad_token",
|
||||
"azure_ad_token_provider",
|
||||
# Dashscope/Doubao specific keys
|
||||
"extra_body",
|
||||
# Token limit for context compression (removed before passing to LLM)
|
||||
"token_limit",
|
||||
# Default headers
|
||||
"default_headers",
|
||||
"default_query",
|
||||
}
|
||||
|
||||
|
||||
def _get_config_file_path() -> str:
|
||||
"""Get the path to the configuration file."""
|
||||
@@ -67,6 +116,18 @@ def _create_llm_use_conf(llm_type: LLMType, conf: Dict[str, Any]) -> BaseChatMod
|
||||
# Merge configurations, with environment variables taking precedence
|
||||
merged_conf = {**llm_conf, **env_conf}
|
||||
|
||||
# Filter out unexpected parameters to prevent LangChain warnings (Issue #411)
|
||||
# This prevents configuration keys like SEARCH_ENGINE from being passed to LLM constructors
|
||||
allowed_keys_lower = {k.lower() for k in ALLOWED_LLM_CONFIG_KEYS}
|
||||
unexpected_keys = [key for key in merged_conf.keys() if key.lower() not in allowed_keys_lower]
|
||||
for key in unexpected_keys:
|
||||
removed_value = merged_conf.pop(key)
|
||||
logger.warning(
|
||||
f"Removed unexpected LLM configuration key '{key}'. "
|
||||
f"This key is not a valid LLM parameter and may have been placed in the wrong section of conf.yaml. "
|
||||
f"Valid LLM config keys include: model, api_key, base_url, max_retries, temperature, etc."
|
||||
)
|
||||
|
||||
# Remove unnecessary parameters when initializing the client
|
||||
if "token_limit" in merged_conf:
|
||||
merged_conf.pop("token_limit")
|
||||
|
||||
@@ -85,3 +85,43 @@ def test_get_llm_by_type_caches(monkeypatch, dummy_conf):
|
||||
inst2 = llm.get_llm_by_type("basic")
|
||||
assert inst1 is inst2
|
||||
assert called["called"]
|
||||
|
||||
|
||||
def test_create_llm_filters_unexpected_keys(monkeypatch, caplog):
|
||||
"""Test that unexpected configuration keys like SEARCH_ENGINE are filtered out (Issue #411)."""
|
||||
import logging
|
||||
|
||||
# Clear any existing environment variables that might interfere
|
||||
monkeypatch.delenv("BASIC_MODEL__API_KEY", raising=False)
|
||||
monkeypatch.delenv("BASIC_MODEL__BASE_URL", raising=False)
|
||||
monkeypatch.delenv("BASIC_MODEL__MODEL", raising=False)
|
||||
|
||||
# Config with unexpected keys that should be filtered
|
||||
conf_with_unexpected_keys = {
|
||||
"BASIC_MODEL": {
|
||||
"api_key": "test_key",
|
||||
"base_url": "http://test",
|
||||
"model": "gpt-4",
|
||||
"SEARCH_ENGINE": {"include_domains": ["example.com"]}, # Should be filtered
|
||||
"engine": "tavily", # Should be filtered
|
||||
}
|
||||
}
|
||||
|
||||
with caplog.at_level(logging.WARNING):
|
||||
result = llm._create_llm_use_conf("basic", conf_with_unexpected_keys)
|
||||
|
||||
# Verify the LLM was created
|
||||
assert isinstance(result, DummyChatOpenAI)
|
||||
|
||||
# Verify unexpected keys were not passed to the LLM
|
||||
assert "SEARCH_ENGINE" not in result.kwargs
|
||||
assert "engine" not in result.kwargs
|
||||
|
||||
# Verify valid keys were passed
|
||||
assert result.kwargs["api_key"] == "test_key"
|
||||
assert result.kwargs["base_url"] == "http://test"
|
||||
assert result.kwargs["model"] == "gpt-4"
|
||||
|
||||
# Verify warnings were logged
|
||||
assert any("SEARCH_ENGINE" in record.message for record in caplog.records)
|
||||
assert any("engine" in record.message for record in caplog.records)
|
||||
|
||||
Reference in New Issue
Block a user