fix(llm): filter unexpected config keys to prevent LangChain warnings (#411) (#726)

* fix(llm): filter unexpected config keys to prevent LangChain warnings (#411)

Add allowlist validation for LLM configuration keys to prevent unexpected
parameters like SEARCH_ENGINE from being passed to LLM constructors.

Changes:
- Add ALLOWED_LLM_CONFIG_KEYS set with valid LLM configuration parameters
- Filter out unexpected keys before creating LLM instances
- Log clear warning messages when unexpected keys are removed
- Add unit test for configuration key filtering

This fixes the confusing LangChain warning "WARNING! SEARCH_ENGINE is not
default parameter. SEARCH_ENGINE was transferred to model_kwargs" that
occurred when users accidentally placed configuration keys in wrong sections
of conf.yaml.

* Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Willem Jiang
2025-11-29 16:13:05 +08:00
committed by GitHub
parent 2e010a4619
commit 4a78cfe12a
2 changed files with 101 additions and 0 deletions

View File

@@ -1,6 +1,7 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
import logging
import os
from pathlib import Path
from typing import Any, Dict, get_args
@@ -15,9 +16,57 @@ from src.config import load_yaml_config
from src.config.agents import LLMType
from src.llms.providers.dashscope import ChatDashscope
logger = logging.getLogger(__name__)
# Cache for LLM instances
_llm_cache: dict[LLMType, BaseChatModel] = {}
# Allowed LLM configuration keys to prevent unexpected parameters from being passed
# to LLM constructors (Issue #411 - SEARCH_ENGINE warning fix)
ALLOWED_LLM_CONFIG_KEYS = {
# Common LLM configuration keys
"model",
"api_key",
"base_url",
"api_base",
"max_retries",
"timeout",
"max_tokens",
"temperature",
"top_p",
"frequency_penalty",
"presence_penalty",
"stop",
"n",
"stream",
"logprobs",
"echo",
"best_of",
"logit_bias",
"user",
"seed",
# SSL and HTTP client settings
"verify_ssl",
"http_client",
"http_async_client",
# Platform-specific keys
"platform",
"google_api_key",
# Azure-specific keys
"azure_endpoint",
"azure_deployment",
"api_version",
"azure_ad_token",
"azure_ad_token_provider",
# Dashscope/Doubao specific keys
"extra_body",
# Token limit for context compression (removed before passing to LLM)
"token_limit",
# Default headers
"default_headers",
"default_query",
}
def _get_config_file_path() -> str:
"""Get the path to the configuration file."""
@@ -67,6 +116,18 @@ def _create_llm_use_conf(llm_type: LLMType, conf: Dict[str, Any]) -> BaseChatMod
# Merge configurations, with environment variables taking precedence
merged_conf = {**llm_conf, **env_conf}
# Filter out unexpected parameters to prevent LangChain warnings (Issue #411)
# This prevents configuration keys like SEARCH_ENGINE from being passed to LLM constructors
allowed_keys_lower = {k.lower() for k in ALLOWED_LLM_CONFIG_KEYS}
unexpected_keys = [key for key in merged_conf.keys() if key.lower() not in allowed_keys_lower]
for key in unexpected_keys:
removed_value = merged_conf.pop(key)
logger.warning(
f"Removed unexpected LLM configuration key '{key}'. "
f"This key is not a valid LLM parameter and may have been placed in the wrong section of conf.yaml. "
f"Valid LLM config keys include: model, api_key, base_url, max_retries, temperature, etc."
)
# Remove unnecessary parameters when initializing the client
if "token_limit" in merged_conf:
merged_conf.pop("token_limit")

View File

@@ -85,3 +85,43 @@ def test_get_llm_by_type_caches(monkeypatch, dummy_conf):
inst2 = llm.get_llm_by_type("basic")
assert inst1 is inst2
assert called["called"]
def test_create_llm_filters_unexpected_keys(monkeypatch, caplog):
"""Test that unexpected configuration keys like SEARCH_ENGINE are filtered out (Issue #411)."""
import logging
# Clear any existing environment variables that might interfere
monkeypatch.delenv("BASIC_MODEL__API_KEY", raising=False)
monkeypatch.delenv("BASIC_MODEL__BASE_URL", raising=False)
monkeypatch.delenv("BASIC_MODEL__MODEL", raising=False)
# Config with unexpected keys that should be filtered
conf_with_unexpected_keys = {
"BASIC_MODEL": {
"api_key": "test_key",
"base_url": "http://test",
"model": "gpt-4",
"SEARCH_ENGINE": {"include_domains": ["example.com"]}, # Should be filtered
"engine": "tavily", # Should be filtered
}
}
with caplog.at_level(logging.WARNING):
result = llm._create_llm_use_conf("basic", conf_with_unexpected_keys)
# Verify the LLM was created
assert isinstance(result, DummyChatOpenAI)
# Verify unexpected keys were not passed to the LLM
assert "SEARCH_ENGINE" not in result.kwargs
assert "engine" not in result.kwargs
# Verify valid keys were passed
assert result.kwargs["api_key"] == "test_key"
assert result.kwargs["base_url"] == "http://test"
assert result.kwargs["model"] == "gpt-4"
# Verify warnings were logged
assert any("SEARCH_ENGINE" in record.message for record in caplog.records)
assert any("engine" in record.message for record in caplog.records)