From a138d5388ab807b85d6f03c20d2ba59a764d84cf Mon Sep 17 00:00:00 2001 From: Zhiyunyao Date: Mon, 2 Mar 2026 20:49:41 +0800 Subject: [PATCH] feat: add reasoning_effort configuration support for Doubao/GPT-5 models (#947) * feat: Add reasoning effort configuration support * Add `reasoning_effort` parameter to model config and agent initialization * Support reasoning effort levels (minimal/low/medium/high) for Doubao/GPT-5 models * Add UI controls in input box for reasoning effort selection * Update doubao-seed-1.8 example config with reasoning effort support Fixes & Cleanup: * Ensure UTF-8 encoding for file operations * Remove unused imports * fix: set reasoning_effort to None for unsupported models * fix: unit test error * Update frontend/src/components/workspace/input-box.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Willem Jiang --- .gitignore | 1 + backend/src/agents/lead_agent/agent.py | 7 +- backend/src/client.py | 2 + backend/src/config/app_config.py | 2 +- backend/src/config/extensions_config.py | 2 +- backend/src/config/model_config.py | 1 + backend/src/gateway/routers/models.py | 3 + backend/src/models/factory.py | 6 + backend/src/reflection/resolvers.py | 3 - backend/tests/test_client.py | 5 + .../tests/test_lead_agent_model_resolution.py | 3 +- config.example.yaml | 25 ++-- .../app/workspace/chats/[thread_id]/page.tsx | 8 +- .../src/components/workspace/input-box.tsx | 140 +++++++++++++++++- .../workspace/messages/message-list.tsx | 6 +- frontend/src/core/i18n/locales/en-US.ts | 9 ++ frontend/src/core/i18n/locales/types.ts | 9 ++ frontend/src/core/i18n/locales/zh-CN.ts | 9 ++ frontend/src/core/models/types.ts | 1 + frontend/src/core/settings/local.ts | 2 + frontend/src/core/threads/types.ts | 1 + 21 files changed, 212 insertions(+), 33 deletions(-) diff --git a/.gitignore b/.gitignore index 1ec225f..abfa399 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ extensions_config.json # IDE .idea/ +.vscode/ # Coverage report coverage.xml diff --git a/backend/src/agents/lead_agent/agent.py b/backend/src/agents/lead_agent/agent.py index 7b4028d..2212e2e 100644 --- a/backend/src/agents/lead_agent/agent.py +++ b/backend/src/agents/lead_agent/agent.py @@ -256,6 +256,7 @@ def make_lead_agent(config: RunnableConfig): from src.tools import get_available_tools thinking_enabled = config.get("configurable", {}).get("thinking_enabled", True) + reasoning_effort = config.get("configurable", {}).get("reasoning_effort", None) requested_model_name = config.get("configurable", {}).get("model_name") or config.get("configurable", {}).get("model") model_name = _resolve_model_name(requested_model_name) if model_name is None: @@ -274,8 +275,9 @@ def make_lead_agent(config: RunnableConfig): thinking_enabled = False logger.info( - "thinking_enabled: %s, model_name: %s, is_plan_mode: %s, subagent_enabled: %s, max_concurrent_subagents: %s", + "thinking_enabled: %s, reasoning_effort: %s, model_name: %s, is_plan_mode: %s, subagent_enabled: %s, max_concurrent_subagents: %s", thinking_enabled, + reasoning_effort, model_name, is_plan_mode, subagent_enabled, @@ -289,13 +291,14 @@ def make_lead_agent(config: RunnableConfig): { "model_name": model_name or "default", "thinking_enabled": thinking_enabled, + "reasoning_effort": reasoning_effort, "is_plan_mode": is_plan_mode, "subagent_enabled": subagent_enabled, } ) return create_agent( - model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled), + model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, reasoning_effort=reasoning_effort), tools=get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled), middleware=_build_middlewares(config, model_name=model_name), system_prompt=apply_prompt_template(subagent_enabled=subagent_enabled, max_concurrent_subagents=max_concurrent_subagents), diff --git a/backend/src/client.py b/backend/src/client.py index 6639eab..010cbd7 100644 --- a/backend/src/client.py +++ b/backend/src/client.py @@ -400,6 +400,7 @@ class DeerFlowClient: "display_name": getattr(model, "display_name", None), "description": getattr(model, "description", None), "supports_thinking": getattr(model, "supports_thinking", False), + "supports_reasoning_effort": getattr(model, "supports_reasoning_effort", False), } for model in self._app_config.models ] @@ -458,6 +459,7 @@ class DeerFlowClient: "display_name": getattr(model, "display_name", None), "description": getattr(model, "description", None), "supports_thinking": getattr(model, "supports_thinking", False), + "supports_reasoning_effort": getattr(model, "supports_reasoning_effort", False), } # ------------------------------------------------------------------ diff --git a/backend/src/config/app_config.py b/backend/src/config/app_config.py index 3c219d9..2232115 100644 --- a/backend/src/config/app_config.py +++ b/backend/src/config/app_config.py @@ -72,7 +72,7 @@ class AppConfig(BaseModel): AppConfig: The loaded config. """ resolved_path = cls.resolve_config_path(config_path) - with open(resolved_path) as f: + with open(resolved_path, encoding="utf-8") as f: config_data = yaml.safe_load(f) config_data = cls.resolve_env_variables(config_data) diff --git a/backend/src/config/extensions_config.py b/backend/src/config/extensions_config.py index be32925..134b4fe 100644 --- a/backend/src/config/extensions_config.py +++ b/backend/src/config/extensions_config.py @@ -133,7 +133,7 @@ class ExtensionsConfig(BaseModel): # Return empty config if extensions config file is not found return cls(mcp_servers={}, skills={}) - with open(resolved_path) as f: + with open(resolved_path, encoding="utf-8") as f: config_data = json.load(f) cls.resolve_env_variables(config_data) diff --git a/backend/src/config/model_config.py b/backend/src/config/model_config.py index 277de2e..2efb4a0 100644 --- a/backend/src/config/model_config.py +++ b/backend/src/config/model_config.py @@ -14,6 +14,7 @@ class ModelConfig(BaseModel): model: str = Field(..., description="Model name") model_config = ConfigDict(extra="allow") supports_thinking: bool = Field(default_factory=lambda: False, description="Whether the model supports thinking") + supports_reasoning_effort: bool = Field(default_factory=lambda: False, description="Whether the model supports reasoning effort") when_thinking_enabled: dict | None = Field( default_factory=lambda: None, description="Extra settings to be passed to the model when thinking is enabled", diff --git a/backend/src/gateway/routers/models.py b/backend/src/gateway/routers/models.py index 39d0229..e158e7b 100644 --- a/backend/src/gateway/routers/models.py +++ b/backend/src/gateway/routers/models.py @@ -13,6 +13,7 @@ class ModelResponse(BaseModel): display_name: str | None = Field(None, description="Human-readable name") description: str | None = Field(None, description="Model description") supports_thinking: bool = Field(default=False, description="Whether model supports thinking mode") + supports_reasoning_effort: bool = Field(default=False, description="Whether model supports reasoning effort") class ModelsListResponse(BaseModel): @@ -63,6 +64,7 @@ async def list_models() -> ModelsListResponse: display_name=model.display_name, description=model.description, supports_thinking=model.supports_thinking, + supports_reasoning_effort=model.supports_reasoning_effort, ) for model in config.models ] @@ -107,4 +109,5 @@ async def get_model(model_name: str) -> ModelResponse: display_name=model.display_name, description=model.description, supports_thinking=model.supports_thinking, + supports_reasoning_effort=model.supports_reasoning_effort, ) diff --git a/backend/src/models/factory.py b/backend/src/models/factory.py index f705e92..dcd5f02 100644 --- a/backend/src/models/factory.py +++ b/backend/src/models/factory.py @@ -32,6 +32,7 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, * "display_name", "description", "supports_thinking", + "supports_reasoning_effort", "when_thinking_enabled", "supports_vision", }, @@ -40,6 +41,11 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, * if not model_config.supports_thinking: raise ValueError(f"Model {name} does not support thinking. Set `supports_thinking` to true in the `config.yaml` to enable thinking.") from None model_settings_from_config.update(model_config.when_thinking_enabled) + if not thinking_enabled and model_config.when_thinking_enabled and model_config.when_thinking_enabled.get("extra_body", {}).get("thinking", {}).get("type"): + kwargs.update({"extra_body": {"thinking": {"type": "disabled"}}}) + kwargs.update({"reasoning_effort": "minimal"}) + if not model_config.supports_reasoning_effort: + kwargs.update({"reasoning_effort": None}) model_instance = model_class(**kwargs, **model_settings_from_config) if is_tracing_enabled(): diff --git a/backend/src/reflection/resolvers.py b/backend/src/reflection/resolvers.py index 9f17a7d..e940c6c 100644 --- a/backend/src/reflection/resolvers.py +++ b/backend/src/reflection/resolvers.py @@ -1,7 +1,4 @@ from importlib import import_module -from typing import TypeVar - -T = TypeVar("T") def resolve_variable[T]( diff --git a/backend/tests/test_client.py b/backend/tests/test_client.py index cb30d4e..68b79d6 100644 --- a/backend/tests/test_client.py +++ b/backend/tests/test_client.py @@ -25,6 +25,8 @@ def mock_app_config(): """Provide a minimal AppConfig mock.""" model = MagicMock() model.name = "test-model" + model.supports_thinking = False + model.supports_reasoning_effort = False model.model_dump.return_value = {"name": "test-model", "use": "langchain_openai:ChatOpenAI"} config = MagicMock() @@ -379,6 +381,7 @@ class TestGetModel: model_cfg.display_name = "Test Model" model_cfg.description = "A test model" model_cfg.supports_thinking = True + model_cfg.supports_reasoning_effort = True client._app_config.get_model_config.return_value = model_cfg result = client.get_model("test-model") @@ -387,6 +390,7 @@ class TestGetModel: "display_name": "Test Model", "description": "A test model", "supports_thinking": True, + "supports_reasoning_effort": True, } def test_not_found(self, client): @@ -928,6 +932,7 @@ class TestScenarioConfigManagement: model_cfg.display_name = None model_cfg.description = None model_cfg.supports_thinking = False + model_cfg.supports_reasoning_effort = False client._app_config.get_model_config.return_value = model_cfg detail = client.get_model(model_name) assert detail["name"] == model_name diff --git a/backend/tests/test_lead_agent_model_resolution.py b/backend/tests/test_lead_agent_model_resolution.py index b79829f..0d7d77a 100644 --- a/backend/tests/test_lead_agent_model_resolution.py +++ b/backend/tests/test_lead_agent_model_resolution.py @@ -84,9 +84,10 @@ def test_make_lead_agent_disables_thinking_when_model_does_not_support_it(monkey captured: dict[str, object] = {} - def _fake_create_chat_model(*, name, thinking_enabled): + def _fake_create_chat_model(*, name, thinking_enabled, reasoning_effort=None): captured["name"] = name captured["thinking_enabled"] = thinking_enabled + captured["reasoning_effort"] = reasoning_effort return object() monkeypatch.setattr(lead_agent_module, "create_chat_model", _fake_create_chat_model) diff --git a/config.example.yaml b/config.example.yaml index b59d2a3..c64d37e 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -65,18 +65,19 @@ models: # type: enabled # Example: Volcengine (Doubao) model - # - name: doubao-seed-1.8 - # display_name: Doubao 1.8 (Thinking) - # use: langchain_deepseek:ChatDeepSeek - # model: ep-m-20260106111913-xxxxx - # api_base: https://ark.cn-beijing.volces.com/api/v3 - # api_key: $VOLCENGINE_API_KEY - # supports_thinking: true - # supports_vision: false # Check your specific model's capabilities - # when_thinking_enabled: - # extra_body: - # thinking: - # type: enabled + - name: doubao-seed-1.8 + display_name: Doubao-Seed-1.8 + use: src.models.patched_deepseek:PatchedChatDeepSeek + model: doubao-seed-1-8-251228 + api_base: https://ark.cn-beijing.volces.com/api/v3 + api_key: $VOLCENGINE_API_KEY + supports_thinking: true + supports_vision: true + supports_reasoning_effort: true + when_thinking_enabled: + extra_body: + thinking: + type: enabled # Example: Kimi K2.5 model # - name: kimi-k2.5 diff --git a/frontend/src/app/workspace/chats/[thread_id]/page.tsx b/frontend/src/app/workspace/chats/[thread_id]/page.tsx index f5faa32..d414c31 100644 --- a/frontend/src/app/workspace/chats/[thread_id]/page.tsx +++ b/frontend/src/app/workspace/chats/[thread_id]/page.tsx @@ -177,6 +177,7 @@ export default function ChatPage() { is_plan_mode: settings.context.mode === "pro" || settings.context.mode === "ultra", subagent_enabled: settings.context.mode === "ultra", + reasoning_effort: settings.context.reasoning_effort, }, afterSubmit() { router.push(pathOfThread(threadId!)); @@ -236,10 +237,9 @@ export default function ChatPage() { className={cn("size-full", !isNewThread && "pt-10")} threadId={threadId} thread={thread} - messagesOverride={ - !thread.isLoading && finalState?.messages - ? (finalState.messages as Message[]) - : undefined + messages={ + (finalState?.messages as Message[]) + ?? thread.messages } paddingBottom={todoListCollapsed ? 160 : 280} /> diff --git a/frontend/src/components/workspace/input-box.tsx b/frontend/src/components/workspace/input-box.tsx index 217f1f5..5f997a1 100644 --- a/frontend/src/components/workspace/input-box.tsx +++ b/frontend/src/components/workspace/input-box.tsx @@ -106,6 +106,7 @@ export function InputBox({ "thread_id" | "is_plan_mode" | "thinking_enabled" | "subagent_enabled" > & { mode: "flash" | "thinking" | "pro" | "ultra" | undefined; + reasoning_effort?: "minimal" | "low" | "medium" | "high"; }; extraHeader?: React.ReactNode; isNewThread?: boolean; @@ -116,6 +117,7 @@ export function InputBox({ "thread_id" | "is_plan_mode" | "thinking_enabled" | "subagent_enabled" > & { mode: "flash" | "thinking" | "pro" | "ultra" | undefined; + reasoning_effort?: "minimal" | "low" | "medium" | "high"; }, ) => void; onSubmit?: (message: PromptInputMessage) => void; @@ -159,6 +161,11 @@ export function InputBox({ [selectedModel], ); + const supportReasoningEffort = useMemo( + () => selectedModel?.supports_reasoning_effort ?? false, + [selectedModel], + ); + const handleModelSelect = useCallback( (model_name: string) => { const model = models.find((m) => m.name === model_name); @@ -169,6 +176,7 @@ export function InputBox({ ...context, model_name, mode: getResolvedMode(context.mode, model.supports_thinking ?? false), + reasoning_effort: context.reasoning_effort, }); setModelDialogOpen(false); }, @@ -180,10 +188,22 @@ export function InputBox({ onContextChange?.({ ...context, mode: getResolvedMode(mode, supportThinking), + reasoning_effort: mode === "ultra" ? "high" : mode === "pro" ? "medium" : mode === "thinking" ? "low" : "minimal", }); }, [onContextChange, context, supportThinking], ); + + const handleReasoningEffortSelect = useCallback( + (effort: "minimal" | "low" | "medium" | "high") => { + onContextChange?.({ + ...context, + reasoning_effort: effort, + }); + }, + [onContextChange, context], + ); + const handleSubmit = useCallback( async (message: PromptInputMessage) => { if (status === "streaming") { @@ -244,9 +264,9 @@ export function InputBox({ {t.inputBox.flashMode} @@ -327,7 +347,7 @@ export function InputBox({ className={cn( "mr-2 size-4", context.mode === "thinking" && - "text-accent-foreground", + "text-accent-foreground", )} /> {t.inputBox.reasoningMode} @@ -409,6 +429,116 @@ export function InputBox({ + {supportReasoningEffort && context.mode !== "flash" && ( + + +
+ {t.inputBox.reasoningEffort}: + {context.reasoning_effort === "minimal" && " " + t.inputBox.reasoningEffortMinimal} + {context.reasoning_effort === "low" && " " + t.inputBox.reasoningEffortLow} + {context.reasoning_effort === "medium" && " " + t.inputBox.reasoningEffortMedium} + {context.reasoning_effort === "high" && " " + t.inputBox.reasoningEffortHigh} +
+
+ + + + {t.inputBox.reasoningEffort} + + + handleReasoningEffortSelect("minimal")} + > +
+
+ {t.inputBox.reasoningEffortMinimal} +
+
+ {t.inputBox.reasoningEffortMinimalDescription} +
+
+ {context.reasoning_effort === "minimal" ? ( + + ) : ( +
+ )} + + handleReasoningEffortSelect("low")} + > +
+
+ {t.inputBox.reasoningEffortLow} +
+
+ {t.inputBox.reasoningEffortLowDescription} +
+
+ {context.reasoning_effort === "low" ? ( + + ) : ( +
+ )} + + handleReasoningEffortSelect("medium")} + > +
+
+ {t.inputBox.reasoningEffortMedium} +
+
+ {t.inputBox.reasoningEffortMediumDescription} +
+
+ {context.reasoning_effort === "medium" || !context.reasoning_effort ? ( + + ) : ( +
+ )} + + handleReasoningEffortSelect("high")} + > +
+
+ {t.inputBox.reasoningEffortHigh} +
+
+ {t.inputBox.reasoningEffortHighDescription} +
+
+ {context.reasoning_effort === "high" ? ( + + ) : ( +
+ )} + + + + + + )} ; - /** When set (e.g. from onFinish), use instead of thread.messages so SSE end shows complete state. */ - messagesOverride?: Message[]; + messages: Message[]; paddingBottom?: number; }) { const { t } = useI18n(); const rehypePlugins = useRehypeSplitWordsIntoSpans(thread.isLoading); const updateSubtask = useUpdateSubtask(); - const messages = messagesOverride ?? thread.messages; if (thread.isThreadLoading) { return ; } diff --git a/frontend/src/core/i18n/locales/en-US.ts b/frontend/src/core/i18n/locales/en-US.ts index 1bc60a9..8c40930 100644 --- a/frontend/src/core/i18n/locales/en-US.ts +++ b/frontend/src/core/i18n/locales/en-US.ts @@ -82,6 +82,15 @@ export const enUS: Translations = { ultraMode: "Ultra", ultraModeDescription: "Pro mode with subagents to divide work; best for complex multi-step tasks", + reasoningEffort: "Reasoning Effort", + reasoningEffortMinimal: "Minimal", + reasoningEffortMinimalDescription: "Retrieval + Direct Output", + reasoningEffortLow: "Low", + reasoningEffortLowDescription: "Simple Logic Check + Shallow Deduction", + reasoningEffortMedium: "Medium", + reasoningEffortMediumDescription: "Multi-layer Logic Analysis + Basic Verification", + reasoningEffortHigh: "High", + reasoningEffortHighDescription: "Full-dimensional Logic Deduction + Multi-path Verification + Backward Check", searchModels: "Search models...", surpriseMe: "Surprise", surpriseMePrompt: "Surprise me", diff --git a/frontend/src/core/i18n/locales/types.ts b/frontend/src/core/i18n/locales/types.ts index 7213efa..cfc362d 100644 --- a/frontend/src/core/i18n/locales/types.ts +++ b/frontend/src/core/i18n/locales/types.ts @@ -64,6 +64,15 @@ export interface Translations { proModeDescription: string; ultraMode: string; ultraModeDescription: string; + reasoningEffort: string; + reasoningEffortMinimal: string; + reasoningEffortMinimalDescription: string; + reasoningEffortLow: string; + reasoningEffortLowDescription: string; + reasoningEffortMedium: string; + reasoningEffortMediumDescription: string; + reasoningEffortHigh: string; + reasoningEffortHighDescription: string; searchModels: string; surpriseMe: string; surpriseMePrompt: string; diff --git a/frontend/src/core/i18n/locales/zh-CN.ts b/frontend/src/core/i18n/locales/zh-CN.ts index a3d399b..6ec3bde 100644 --- a/frontend/src/core/i18n/locales/zh-CN.ts +++ b/frontend/src/core/i18n/locales/zh-CN.ts @@ -80,6 +80,15 @@ export const zhCN: Translations = { ultraMode: "Ultra", ultraModeDescription: "继承自 Pro 模式,可调用子代理分工协作,适合复杂多步骤任务,能力最强", + reasoningEffort: "推理深度", + reasoningEffortMinimal: "最低", + reasoningEffortMinimalDescription: "检索 + 直接输出", + reasoningEffortLow: "低", + reasoningEffortLowDescription: "简单逻辑校验 + 浅层推演", + reasoningEffortMedium: "中", + reasoningEffortMediumDescription: "多层逻辑分析 + 基础验证", + reasoningEffortHigh: "高", + reasoningEffortHighDescription: "全维度逻辑推演 + 多路径验证 + 反推校验", searchModels: "搜索模型...", surpriseMe: "小惊喜", surpriseMePrompt: "给我一个小惊喜吧", diff --git a/frontend/src/core/models/types.ts b/frontend/src/core/models/types.ts index f923d2d..19404ae 100644 --- a/frontend/src/core/models/types.ts +++ b/frontend/src/core/models/types.ts @@ -4,4 +4,5 @@ export interface Model { display_name: string; description?: string | null; supports_thinking?: boolean; + supports_reasoning_effort?: boolean; } diff --git a/frontend/src/core/settings/local.ts b/frontend/src/core/settings/local.ts index 9bdcf32..d560e7b 100644 --- a/frontend/src/core/settings/local.ts +++ b/frontend/src/core/settings/local.ts @@ -7,6 +7,7 @@ export const DEFAULT_LOCAL_SETTINGS: LocalSettings = { context: { model_name: undefined, mode: undefined, + reasoning_effort: undefined, }, layout: { sidebar_collapsed: false, @@ -24,6 +25,7 @@ export interface LocalSettings { "thread_id" | "is_plan_mode" | "thinking_enabled" | "subagent_enabled" > & { mode: "flash" | "thinking" | "pro" | "ultra" | undefined; + reasoning_effort?: "minimal" | "low" | "medium" | "high"; }; layout: { sidebar_collapsed: boolean; diff --git a/frontend/src/core/threads/types.ts b/frontend/src/core/threads/types.ts index 106ef8a..76d8cde 100644 --- a/frontend/src/core/threads/types.ts +++ b/frontend/src/core/threads/types.ts @@ -18,4 +18,5 @@ export interface AgentThreadContext extends Record { thinking_enabled: boolean; is_plan_mode: boolean; subagent_enabled: boolean; + reasoning_effort?: "minimal" | "low" | "medium" | "high"; }