diff --git a/backend/src/config/model_config.py b/backend/src/config/model_config.py index 2efb4a0..ca0344e 100644 --- a/backend/src/config/model_config.py +++ b/backend/src/config/model_config.py @@ -20,3 +20,10 @@ class ModelConfig(BaseModel): description="Extra settings to be passed to the model when thinking is enabled", ) supports_vision: bool = Field(default_factory=lambda: False, description="Whether the model supports vision/image inputs") + thinking: dict | None = Field( + default_factory=lambda: None, + description=( + "Thinking settings for the model. If provided, these settings will be passed to the model when thinking is enabled. " + "This is a shortcut for `when_thinking_enabled` and will be merged with `when_thinking_enabled` if both are provided." + ), + ) diff --git a/backend/src/models/factory.py b/backend/src/models/factory.py index dcd5f02..769afa2 100644 --- a/backend/src/models/factory.py +++ b/backend/src/models/factory.py @@ -34,18 +34,33 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, * "supports_thinking", "supports_reasoning_effort", "when_thinking_enabled", + "thinking", "supports_vision", }, ) - if thinking_enabled and model_config.when_thinking_enabled is not None: + # Compute effective when_thinking_enabled by merging in the `thinking` shortcut field. + # The `thinking` shortcut is equivalent to setting when_thinking_enabled["thinking"]. + has_thinking_settings = (model_config.when_thinking_enabled is not None) or (model_config.thinking is not None) + effective_wte: dict = dict(model_config.when_thinking_enabled) if model_config.when_thinking_enabled else {} + if model_config.thinking is not None: + merged_thinking = {**(effective_wte.get("thinking") or {}), **model_config.thinking} + effective_wte = {**effective_wte, "thinking": merged_thinking} + if thinking_enabled and has_thinking_settings: if not model_config.supports_thinking: raise ValueError(f"Model {name} does not support thinking. Set `supports_thinking` to true in the `config.yaml` to enable thinking.") from None - model_settings_from_config.update(model_config.when_thinking_enabled) - if not thinking_enabled and model_config.when_thinking_enabled and model_config.when_thinking_enabled.get("extra_body", {}).get("thinking", {}).get("type"): - kwargs.update({"extra_body": {"thinking": {"type": "disabled"}}}) - kwargs.update({"reasoning_effort": "minimal"}) + if effective_wte: + model_settings_from_config.update(effective_wte) + if not thinking_enabled and has_thinking_settings: + if effective_wte.get("extra_body", {}).get("thinking", {}).get("type"): + # OpenAI-compatible gateway: thinking is nested under extra_body + kwargs.update({"extra_body": {"thinking": {"type": "disabled"}}}) + kwargs.update({"reasoning_effort": "minimal"}) + elif effective_wte.get("thinking", {}).get("type"): + # Native langchain_anthropic: thinking is a direct constructor parameter + kwargs.update({"thinking": {"type": "disabled"}}) if not model_config.supports_reasoning_effort: kwargs.update({"reasoning_effort": None}) + model_instance = model_class(**kwargs, **model_settings_from_config) if is_tracing_enabled(): diff --git a/backend/tests/test_model_factory.py b/backend/tests/test_model_factory.py new file mode 100644 index 0000000..70ee690 --- /dev/null +++ b/backend/tests/test_model_factory.py @@ -0,0 +1,412 @@ +"""Tests for src.models.factory.create_chat_model.""" + +from __future__ import annotations + +import pytest +from langchain.chat_models import BaseChatModel + +from src.config.app_config import AppConfig +from src.config.model_config import ModelConfig +from src.config.sandbox_config import SandboxConfig +from src.models import factory as factory_module + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_app_config(models: list[ModelConfig]) -> AppConfig: + return AppConfig( + models=models, + sandbox=SandboxConfig(use="src.sandbox.local:LocalSandboxProvider"), + ) + + +def _make_model( + name: str = "test-model", + *, + use: str = "langchain_openai:ChatOpenAI", + supports_thinking: bool = False, + supports_reasoning_effort: bool = False, + when_thinking_enabled: dict | None = None, + thinking: dict | None = None, +) -> ModelConfig: + return ModelConfig( + name=name, + display_name=name, + description=None, + use=use, + model=name, + supports_thinking=supports_thinking, + supports_reasoning_effort=supports_reasoning_effort, + when_thinking_enabled=when_thinking_enabled, + thinking=thinking, + supports_vision=False, + ) + + +class FakeChatModel(BaseChatModel): + """Minimal BaseChatModel stub that records the kwargs it was called with.""" + + captured_kwargs: dict = {} + + def __init__(self, **kwargs): + # Store kwargs before pydantic processes them + FakeChatModel.captured_kwargs = dict(kwargs) + super().__init__(**kwargs) + + @property + def _llm_type(self) -> str: + return "fake" + + def _generate(self, *args, **kwargs): # type: ignore[override] + raise NotImplementedError + + def _stream(self, *args, **kwargs): # type: ignore[override] + raise NotImplementedError + + +def _patch_factory(monkeypatch, app_config: AppConfig, model_class=FakeChatModel): + """Patch get_app_config, resolve_class, and tracing for isolated unit tests.""" + monkeypatch.setattr(factory_module, "get_app_config", lambda: app_config) + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: model_class) + monkeypatch.setattr(factory_module, "is_tracing_enabled", lambda: False) + + +# --------------------------------------------------------------------------- +# Model selection +# --------------------------------------------------------------------------- + + +def test_uses_first_model_when_name_is_none(monkeypatch): + cfg = _make_app_config([_make_model("alpha"), _make_model("beta")]) + _patch_factory(monkeypatch, cfg) + + FakeChatModel.captured_kwargs = {} + factory_module.create_chat_model(name=None) + + # resolve_class is called — if we reach here without ValueError, the correct model was used + assert FakeChatModel.captured_kwargs.get("model") == "alpha" + + +def test_raises_when_model_not_found(monkeypatch): + cfg = _make_app_config([_make_model("only-model")]) + monkeypatch.setattr(factory_module, "get_app_config", lambda: cfg) + monkeypatch.setattr(factory_module, "is_tracing_enabled", lambda: False) + + with pytest.raises(ValueError, match="ghost-model"): + factory_module.create_chat_model(name="ghost-model") + + +# --------------------------------------------------------------------------- +# thinking_enabled=True +# --------------------------------------------------------------------------- + + +def test_thinking_enabled_raises_when_not_supported_but_when_thinking_enabled_is_set(monkeypatch): + """supports_thinking guard fires only when when_thinking_enabled is configured — + the factory uses that as the signal that the caller explicitly expects thinking to work.""" + wte = {"thinking": {"type": "enabled", "budget_tokens": 5000}} + cfg = _make_app_config([_make_model("no-think", supports_thinking=False, when_thinking_enabled=wte)]) + _patch_factory(monkeypatch, cfg) + + with pytest.raises(ValueError, match="does not support thinking"): + factory_module.create_chat_model(name="no-think", thinking_enabled=True) + + +def test_thinking_enabled_raises_for_empty_when_thinking_enabled_explicitly_set(monkeypatch): + """supports_thinking guard fires when when_thinking_enabled is set to an empty dict — + the user explicitly provided the section, so the guard must still fire even though + effective_wte would be falsy.""" + cfg = _make_app_config([_make_model("no-think-empty", supports_thinking=False, when_thinking_enabled={})]) + _patch_factory(monkeypatch, cfg) + + with pytest.raises(ValueError, match="does not support thinking"): + factory_module.create_chat_model(name="no-think-empty", thinking_enabled=True) + + +def test_thinking_enabled_merges_when_thinking_enabled_settings(monkeypatch): + wte = {"temperature": 1.0, "max_tokens": 16000} + cfg = _make_app_config([_make_model("thinker", supports_thinking=True, when_thinking_enabled=wte)]) + _patch_factory(monkeypatch, cfg) + + FakeChatModel.captured_kwargs = {} + factory_module.create_chat_model(name="thinker", thinking_enabled=True) + + assert FakeChatModel.captured_kwargs.get("temperature") == 1.0 + assert FakeChatModel.captured_kwargs.get("max_tokens") == 16000 + + +# --------------------------------------------------------------------------- +# thinking_enabled=False — disable logic +# --------------------------------------------------------------------------- + + +def test_thinking_disabled_openai_gateway_format(monkeypatch): + """When thinking is configured via extra_body (OpenAI-compatible gateway), + disabling must inject extra_body.thinking.type=disabled and reasoning_effort=minimal.""" + wte = {"extra_body": {"thinking": {"type": "enabled", "budget_tokens": 10000}}} + cfg = _make_app_config( + [ + _make_model( + "openai-gw", + supports_thinking=True, + supports_reasoning_effort=True, + when_thinking_enabled=wte, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="openai-gw", thinking_enabled=False) + + assert captured.get("extra_body") == {"thinking": {"type": "disabled"}} + assert captured.get("reasoning_effort") == "minimal" + assert "thinking" not in captured # must NOT set the direct thinking param + + +def test_thinking_disabled_langchain_anthropic_format(monkeypatch): + """When thinking is configured as a direct param (langchain_anthropic), + disabling must inject thinking.type=disabled WITHOUT touching extra_body or reasoning_effort.""" + wte = {"thinking": {"type": "enabled", "budget_tokens": 8000}} + cfg = _make_app_config( + [ + _make_model( + "anthropic-native", + use="langchain_anthropic:ChatAnthropic", + supports_thinking=True, + supports_reasoning_effort=False, + when_thinking_enabled=wte, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="anthropic-native", thinking_enabled=False) + + assert captured.get("thinking") == {"type": "disabled"} + assert "extra_body" not in captured + # reasoning_effort must be cleared (supports_reasoning_effort=False) + assert captured.get("reasoning_effort") is None + + +def test_thinking_disabled_no_when_thinking_enabled_does_nothing(monkeypatch): + """If when_thinking_enabled is not set, disabling thinking must not inject any kwargs.""" + cfg = _make_app_config([_make_model("plain", supports_thinking=True, when_thinking_enabled=None)]) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="plain", thinking_enabled=False) + + assert "extra_body" not in captured + assert "thinking" not in captured + # reasoning_effort not forced (supports_reasoning_effort defaults to False → cleared) + assert captured.get("reasoning_effort") is None + + +# --------------------------------------------------------------------------- +# reasoning_effort stripping +# --------------------------------------------------------------------------- + + +def test_reasoning_effort_cleared_when_not_supported(monkeypatch): + cfg = _make_app_config([_make_model("no-effort", supports_reasoning_effort=False)]) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="no-effort", thinking_enabled=False) + + assert captured.get("reasoning_effort") is None + + +def test_reasoning_effort_preserved_when_supported(monkeypatch): + wte = {"extra_body": {"thinking": {"type": "enabled", "budget_tokens": 5000}}} + cfg = _make_app_config( + [ + _make_model( + "effort-model", + supports_thinking=True, + supports_reasoning_effort=True, + when_thinking_enabled=wte, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="effort-model", thinking_enabled=False) + + # When supports_reasoning_effort=True, it should NOT be cleared to None + # The disable path sets it to "minimal"; supports_reasoning_effort=True keeps it + assert captured.get("reasoning_effort") == "minimal" + + +# --------------------------------------------------------------------------- +# thinking shortcut field +# --------------------------------------------------------------------------- + + +def test_thinking_shortcut_enables_thinking_when_thinking_enabled(monkeypatch): + """thinking shortcut alone should act as when_thinking_enabled with a `thinking` key.""" + thinking_settings = {"type": "enabled", "budget_tokens": 8000} + cfg = _make_app_config( + [ + _make_model( + "shortcut-model", + use="langchain_anthropic:ChatAnthropic", + supports_thinking=True, + thinking=thinking_settings, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="shortcut-model", thinking_enabled=True) + + assert captured.get("thinking") == thinking_settings + + +def test_thinking_shortcut_disables_thinking_when_thinking_disabled(monkeypatch): + """thinking shortcut should participate in the disable path (langchain_anthropic format).""" + thinking_settings = {"type": "enabled", "budget_tokens": 8000} + cfg = _make_app_config( + [ + _make_model( + "shortcut-disable", + use="langchain_anthropic:ChatAnthropic", + supports_thinking=True, + supports_reasoning_effort=False, + thinking=thinking_settings, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="shortcut-disable", thinking_enabled=False) + + assert captured.get("thinking") == {"type": "disabled"} + assert "extra_body" not in captured + + +def test_thinking_shortcut_merges_with_when_thinking_enabled(monkeypatch): + """thinking shortcut should be merged into when_thinking_enabled when both are provided.""" + thinking_settings = {"type": "enabled", "budget_tokens": 8000} + wte = {"max_tokens": 16000} + cfg = _make_app_config( + [ + _make_model( + "merge-model", + use="langchain_anthropic:ChatAnthropic", + supports_thinking=True, + thinking=thinking_settings, + when_thinking_enabled=wte, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="merge-model", thinking_enabled=True) + + # Both the thinking shortcut and when_thinking_enabled settings should be applied + assert captured.get("thinking") == thinking_settings + assert captured.get("max_tokens") == 16000 + + +def test_thinking_shortcut_not_leaked_into_model_when_disabled(monkeypatch): + """thinking shortcut must not be passed raw to the model constructor (excluded from model_dump).""" + thinking_settings = {"type": "enabled", "budget_tokens": 8000} + cfg = _make_app_config( + [ + _make_model( + "no-leak", + use="langchain_anthropic:ChatAnthropic", + supports_thinking=True, + supports_reasoning_effort=False, + thinking=thinking_settings, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="no-leak", thinking_enabled=False) + + # The disable path should have set thinking to disabled (not the raw enabled shortcut) + assert captured.get("thinking") == {"type": "disabled"} diff --git a/config.example.yaml b/config.example.yaml index fabc861..4429f1f 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -27,7 +27,7 @@ models: # extra_body: # thinking: # type: enabled - + # Example: OpenAI model # - name: gpt-4 # display_name: GPT-4 @@ -46,6 +46,9 @@ models: # api_key: $ANTHROPIC_API_KEY # max_tokens: 8192 # supports_vision: true # Enable vision support for view_image tool + # when_thinking_enabled: + # thinking: + # type: enabled # Example: Google Gemini model # - name: gemini-2.5-pro