fix(llm): filter unexpected config keys to prevent LangChain warnings (#411) (#726)

* fix(llm): filter unexpected config keys to prevent LangChain warnings (#411) Add allowlist validation for LLM configuration keys to prevent unexpected parameters like SEARCH_ENGINE from being passed to LLM constructors. Changes: - Add ALLOWED_LLM_CONFIG_KEYS set with valid LLM configuration parameters - Filter out unexpected keys before creating LLM instances - Log clear warning messages when unexpected keys are removed - Add unit test for configuration key filtering This fixes the confusing LangChain warning "WARNING! SEARCH_ENGINE is not default parameter. SEARCH_ENGINE was transferred to model_kwargs" that occurred when users accidentally placed configuration keys in wrong sections of conf.yaml. * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-04-24 22:54:46 +08:00 · 2025-11-29 16:13:05 +08:00
parent 2e010a4619
commit 4a78cfe12a
2 changed files with 101 additions and 0 deletions
--- a/src/llms/llm.py
+++ b/src/llms/llm.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
 # SPDX-License-Identifier: MIT
 import logging
 import os
 from pathlib import Path
 from typing import Any, Dict, get_args
@@ -15,9 +16,57 @@ from src.config import load_yaml_config
 from src.config.agents import LLMType
 from src.llms.providers.dashscope import ChatDashscope
 logger = logging.getLogger(__name__)
 # Cache for LLM instances
 _llm_cache: dict[LLMType, BaseChatModel] = {}
 # Allowed LLM configuration keys to prevent unexpected parameters from being passed
 # to LLM constructors (Issue #411 - SEARCH_ENGINE warning fix)
 ALLOWED_LLM_CONFIG_KEYS = {
    # Common LLM configuration keys
    "model",
    "api_key",
    "base_url",
    "api_base",
    "max_retries",
    "timeout",
    "max_tokens",
    "temperature",
    "top_p",
    "frequency_penalty",
    "presence_penalty",
    "stop",
    "n",
    "stream",
    "logprobs",
    "echo",
    "best_of",
    "logit_bias",
    "user",
    "seed",
    # SSL and HTTP client settings
    "verify_ssl",
    "http_client",
    "http_async_client",
    # Platform-specific keys
    "platform",
    "google_api_key",
    # Azure-specific keys
    "azure_endpoint",
    "azure_deployment",
    "api_version",
    "azure_ad_token",
    "azure_ad_token_provider",
    # Dashscope/Doubao specific keys
    "extra_body",
    # Token limit for context compression (removed before passing to LLM)
    "token_limit",
    # Default headers
    "default_headers",
    "default_query",
 }
 def _get_config_file_path() -> str:
    """Get the path to the configuration file."""
@@ -67,6 +116,18 @@ def _create_llm_use_conf(llm_type: LLMType, conf: Dict[str, Any]) -> BaseChatMod
    # Merge configurations, with environment variables taking precedence
    merged_conf = {**llm_conf, **env_conf}
    # Filter out unexpected parameters to prevent LangChain warnings (Issue #411)
    # This prevents configuration keys like SEARCH_ENGINE from being passed to LLM constructors
    allowed_keys_lower = {k.lower() for k in ALLOWED_LLM_CONFIG_KEYS}
    unexpected_keys = [key for key in merged_conf.keys() if key.lower() not in allowed_keys_lower]
    for key in unexpected_keys:
        removed_value = merged_conf.pop(key)
        logger.warning(
            f"Removed unexpected LLM configuration key '{key}'. "
            f"This key is not a valid LLM parameter and may have been placed in the wrong section of conf.yaml. "
            f"Valid LLM config keys include: model, api_key, base_url, max_retries, temperature, etc."
        )
    # Remove unnecessary parameters when initializing the client
    if "token_limit" in merged_conf:
        merged_conf.pop("token_limit")
--- a/tests/unit/llms/test_llm.py
+++ b/tests/unit/llms/test_llm.py
@@ -85,3 +85,43 @@ def test_get_llm_by_type_caches(monkeypatch, dummy_conf):
    inst2 = llm.get_llm_by_type("basic")
    assert inst1 is inst2
    assert called["called"]
 def test_create_llm_filters_unexpected_keys(monkeypatch, caplog):
    """Test that unexpected configuration keys like SEARCH_ENGINE are filtered out (Issue #411)."""
    import logging
    # Clear any existing environment variables that might interfere
    monkeypatch.delenv("BASIC_MODEL__API_KEY", raising=False)
    monkeypatch.delenv("BASIC_MODEL__BASE_URL", raising=False)
    monkeypatch.delenv("BASIC_MODEL__MODEL", raising=False)
    # Config with unexpected keys that should be filtered
    conf_with_unexpected_keys = {
        "BASIC_MODEL": {
            "api_key": "test_key",
            "base_url": "http://test",
            "model": "gpt-4",
            "SEARCH_ENGINE": {"include_domains": ["example.com"]},  # Should be filtered
            "engine": "tavily",  # Should be filtered
        }
    }
    with caplog.at_level(logging.WARNING):
        result = llm._create_llm_use_conf("basic", conf_with_unexpected_keys)
    # Verify the LLM was created
    assert isinstance(result, DummyChatOpenAI)
    # Verify unexpected keys were not passed to the LLM
    assert "SEARCH_ENGINE" not in result.kwargs
    assert "engine" not in result.kwargs
    # Verify valid keys were passed
    assert result.kwargs["api_key"] == "test_key"
    assert result.kwargs["base_url"] == "http://test"
    assert result.kwargs["model"] == "gpt-4"
    # Verify warnings were logged
    assert any("SEARCH_ENGINE" in record.message for record in caplog.records)
    assert any("engine" in record.message for record in caplog.records)