fix(llm): filter unexpected config keys to prevent LangChain warnings (#411) (#726)

* fix(llm): filter unexpected config keys to prevent LangChain warnings (#411)

Add allowlist validation for LLM configuration keys to prevent unexpected
parameters like SEARCH_ENGINE from being passed to LLM constructors.

Changes:
- Add ALLOWED_LLM_CONFIG_KEYS set with valid LLM configuration parameters
- Filter out unexpected keys before creating LLM instances
- Log clear warning messages when unexpected keys are removed
- Add unit test for configuration key filtering

This fixes the confusing LangChain warning "WARNING! SEARCH_ENGINE is not
default parameter. SEARCH_ENGINE was transferred to model_kwargs" that
occurred when users accidentally placed configuration keys in wrong sections
of conf.yaml.

* Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Willem Jiang
2025-11-29 16:13:05 +08:00
committed by GitHub
parent 2e010a4619
commit 4a78cfe12a
2 changed files with 101 additions and 0 deletions

View File

@@ -1,6 +1,7 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
import logging
import os
from pathlib import Path
from typing import Any, Dict, get_args
@@ -15,9 +16,57 @@ from src.config import load_yaml_config
from src.config.agents import LLMType
from src.llms.providers.dashscope import ChatDashscope
logger = logging.getLogger(__name__)
# Cache for LLM instances
_llm_cache: dict[LLMType, BaseChatModel] = {}
# Allowed LLM configuration keys to prevent unexpected parameters from being passed
# to LLM constructors (Issue #411 - SEARCH_ENGINE warning fix)
ALLOWED_LLM_CONFIG_KEYS = {
# Common LLM configuration keys
"model",
"api_key",
"base_url",
"api_base",
"max_retries",
"timeout",
"max_tokens",
"temperature",
"top_p",
"frequency_penalty",
"presence_penalty",
"stop",
"n",
"stream",
"logprobs",
"echo",
"best_of",
"logit_bias",
"user",
"seed",
# SSL and HTTP client settings
"verify_ssl",
"http_client",
"http_async_client",
# Platform-specific keys
"platform",
"google_api_key",
# Azure-specific keys
"azure_endpoint",
"azure_deployment",
"api_version",
"azure_ad_token",
"azure_ad_token_provider",
# Dashscope/Doubao specific keys
"extra_body",
# Token limit for context compression (removed before passing to LLM)
"token_limit",
# Default headers
"default_headers",
"default_query",
}
def _get_config_file_path() -> str:
"""Get the path to the configuration file."""
@@ -67,6 +116,18 @@ def _create_llm_use_conf(llm_type: LLMType, conf: Dict[str, Any]) -> BaseChatMod
# Merge configurations, with environment variables taking precedence
merged_conf = {**llm_conf, **env_conf}
# Filter out unexpected parameters to prevent LangChain warnings (Issue #411)
# This prevents configuration keys like SEARCH_ENGINE from being passed to LLM constructors
allowed_keys_lower = {k.lower() for k in ALLOWED_LLM_CONFIG_KEYS}
unexpected_keys = [key for key in merged_conf.keys() if key.lower() not in allowed_keys_lower]
for key in unexpected_keys:
removed_value = merged_conf.pop(key)
logger.warning(
f"Removed unexpected LLM configuration key '{key}'. "
f"This key is not a valid LLM parameter and may have been placed in the wrong section of conf.yaml. "
f"Valid LLM config keys include: model, api_key, base_url, max_retries, temperature, etc."
)
# Remove unnecessary parameters when initializing the client
if "token_limit" in merged_conf:
merged_conf.pop("token_limit")