fix(llm): filter unexpected config keys to prevent LangChain warnings (#411) (#726)

* fix(llm): filter unexpected config keys to prevent LangChain warnings (#411) Add allowlist validation for LLM configuration keys to prevent unexpected parameters like SEARCH_ENGINE from being passed to LLM constructors. Changes: - Add ALLOWED_LLM_CONFIG_KEYS set with valid LLM configuration parameters - Filter out unexpected keys before creating LLM instances - Log clear warning messages when unexpected keys are removed - Add unit test for configuration key filtering This fixes the confusing LangChain warning "WARNING! SEARCH_ENGINE is not default parameter. SEARCH_ENGINE was transferred to model_kwargs" that occurred when users accidentally placed configuration keys in wrong sections of conf.yaml. * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-04-03 06:12:14 +08:00 · 2025-11-29 16:13:05 +08:00
parent 2e010a4619
commit 4a78cfe12a
2 changed files with 101 additions and 0 deletions
--- a/tests/unit/llms/test_llm.py
+++ b/tests/unit/llms/test_llm.py
@@ -85,3 +85,43 @@ def test_get_llm_by_type_caches(monkeypatch, dummy_conf):
    inst2 = llm.get_llm_by_type("basic")
    assert inst1 is inst2
    assert called["called"]
+
+
+def test_create_llm_filters_unexpected_keys(monkeypatch, caplog):
+    """Test that unexpected configuration keys like SEARCH_ENGINE are filtered out (Issue #411)."""
+    import logging
+    
+    # Clear any existing environment variables that might interfere
+    monkeypatch.delenv("BASIC_MODEL__API_KEY", raising=False)
+    monkeypatch.delenv("BASIC_MODEL__BASE_URL", raising=False)
+    monkeypatch.delenv("BASIC_MODEL__MODEL", raising=False)
+    
+    # Config with unexpected keys that should be filtered
+    conf_with_unexpected_keys = {
+        "BASIC_MODEL": {
+            "api_key": "test_key",
+            "base_url": "http://test",
+            "model": "gpt-4",
+            "SEARCH_ENGINE": {"include_domains": ["example.com"]},  # Should be filtered
+            "engine": "tavily",  # Should be filtered
+        }
+    }
+    
+    with caplog.at_level(logging.WARNING):
+        result = llm._create_llm_use_conf("basic", conf_with_unexpected_keys)
+    
+    # Verify the LLM was created
+    assert isinstance(result, DummyChatOpenAI)
+    
+    # Verify unexpected keys were not passed to the LLM
+    assert "SEARCH_ENGINE" not in result.kwargs
+    assert "engine" not in result.kwargs
+    
+    # Verify valid keys were passed
+    assert result.kwargs["api_key"] == "test_key"
+    assert result.kwargs["base_url"] == "http://test"
+    assert result.kwargs["model"] == "gpt-4"
+    
+    # Verify warnings were logged
+    assert any("SEARCH_ENGINE" in record.message for record in caplog.records)
+    assert any("engine" in record.message for record in caplog.records)