feat: add Claude Code OAuth and Codex CLI as LLM providers (#1166)

* feat: add Claude Code OAuth and Codex CLI providers Port of bytedance/deer-flow#1136 from @solanian's feat/cli-oauth-providers branch.\n\nCarries the feature forward on top of current main without the original CLA-blocked commit metadata, while preserving attribution in the commit message for review. * fix: harden CLI credential loading Align Codex auth loading with the current ~/.codex/auth.json shape, make Docker credential mounts directory-based to avoid broken file binds on hosts without exported credential files, and add focused loader tests. * refactor: tighten codex auth typing Replace the temporary Any return type in CodexChatModel._load_codex_auth with the concrete CodexCliCredential type after the credential loader was stabilized. * fix: load Claude Code OAuth from Keychain Match Claude Code's macOS storage strategy more closely by checking the Keychain-backed credentials store before falling back to ~/.claude/.credentials.json. Keep explicit file overrides and add focused tests for the Keychain path. * fix: require explicit Claude OAuth handoff * style: format thread hooks reasoning request * docs: document CLI-backed auth providers * fix: address provider review feedback * fix: harden provider edge cases * Fix deferred tools, Codex message normalization, and local sandbox paths * chore: narrow PR scope to OAuth providers * chore: remove unrelated frontend changes * chore: reapply OAuth branch frontend scope cleanup * fix: preserve upload guards with reasoning effort wiring --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
2026-04-03 06:12:14 +08:00 · 2026-03-22 07:39:50 -07:00
parent e119dc74ae
commit 835ba041f8
12 changed files with 1546 additions and 0 deletions
--- a/backend/tests/test_cli_auth_providers.py
+++ b/backend/tests/test_cli_auth_providers.py
@@ -0,0 +1,151 @@
+from __future__ import annotations
+
+import json
+
+import pytest
+from langchain_core.messages import HumanMessage, SystemMessage
+
+from deerflow.models.claude_provider import ClaudeChatModel
+from deerflow.models.credential_loader import CodexCliCredential
+from deerflow.models.openai_codex_provider import CodexChatModel
+
+
+def test_codex_provider_rejects_non_positive_retry_attempts():
+    with pytest.raises(ValueError, match="retry_max_attempts must be >= 1"):
+        CodexChatModel(retry_max_attempts=0)
+
+
+def test_codex_provider_requires_credentials(monkeypatch):
+    monkeypatch.setattr(CodexChatModel, "_load_codex_auth", lambda self: None)
+
+    with pytest.raises(ValueError, match="Codex CLI credential not found"):
+        CodexChatModel()
+
+
+def test_codex_provider_concatenates_multiple_system_messages(monkeypatch):
+    monkeypatch.setattr(
+        CodexChatModel,
+        "_load_codex_auth",
+        lambda self: CodexCliCredential(access_token="token", account_id="acct"),
+    )
+
+    model = CodexChatModel()
+    instructions, input_items = model._convert_messages(
+        [
+            SystemMessage(content="First system prompt."),
+            SystemMessage(content="Second system prompt."),
+            HumanMessage(content="Hello"),
+        ]
+    )
+
+    assert instructions == "First system prompt.\n\nSecond system prompt."
+    assert input_items == [{"role": "user", "content": "Hello"}]
+
+
+def test_codex_provider_flattens_structured_text_blocks(monkeypatch):
+    monkeypatch.setattr(
+        CodexChatModel,
+        "_load_codex_auth",
+        lambda self: CodexCliCredential(access_token="token", account_id="acct"),
+    )
+
+    model = CodexChatModel()
+    instructions, input_items = model._convert_messages(
+        [
+            HumanMessage(content=[{"type": "text", "text": "Hello from blocks"}]),
+        ]
+    )
+
+    assert instructions == "You are a helpful assistant."
+    assert input_items == [{"role": "user", "content": "Hello from blocks"}]
+
+
+def test_claude_provider_rejects_non_positive_retry_attempts():
+    with pytest.raises(ValueError, match="retry_max_attempts must be >= 1"):
+        ClaudeChatModel(model="claude-sonnet-4-6", retry_max_attempts=0)
+
+
+def test_codex_provider_skips_terminal_sse_markers(monkeypatch):
+    monkeypatch.setattr(
+        CodexChatModel,
+        "_load_codex_auth",
+        lambda self: CodexCliCredential(access_token="token", account_id="acct"),
+    )
+
+    model = CodexChatModel()
+
+    assert model._parse_sse_data_line("data: [DONE]") is None
+    assert model._parse_sse_data_line("event: response.completed") is None
+
+
+def test_codex_provider_skips_non_json_sse_frames(monkeypatch):
+    monkeypatch.setattr(
+        CodexChatModel,
+        "_load_codex_auth",
+        lambda self: CodexCliCredential(access_token="token", account_id="acct"),
+    )
+
+    model = CodexChatModel()
+
+    assert model._parse_sse_data_line("data: not-json") is None
+
+
+def test_codex_provider_marks_invalid_tool_call_arguments(monkeypatch):
+    monkeypatch.setattr(
+        CodexChatModel,
+        "_load_codex_auth",
+        lambda self: CodexCliCredential(access_token="token", account_id="acct"),
+    )
+
+    model = CodexChatModel()
+    result = model._parse_response(
+        {
+            "model": "gpt-5.4",
+            "output": [
+                {
+                    "type": "function_call",
+                    "name": "bash",
+                    "arguments": "{invalid",
+                    "call_id": "tc-1",
+                }
+            ],
+            "usage": {},
+        }
+    )
+
+    message = result.generations[0].message
+    assert message.tool_calls == []
+    assert len(message.invalid_tool_calls) == 1
+    assert message.invalid_tool_calls[0]["type"] == "invalid_tool_call"
+    assert message.invalid_tool_calls[0]["name"] == "bash"
+    assert message.invalid_tool_calls[0]["args"] == "{invalid"
+    assert message.invalid_tool_calls[0]["id"] == "tc-1"
+    assert "Failed to parse tool arguments" in message.invalid_tool_calls[0]["error"]
+
+
+def test_codex_provider_parses_valid_tool_arguments(monkeypatch):
+    monkeypatch.setattr(
+        CodexChatModel,
+        "_load_codex_auth",
+        lambda self: CodexCliCredential(access_token="token", account_id="acct"),
+    )
+
+    model = CodexChatModel()
+    result = model._parse_response(
+        {
+            "model": "gpt-5.4",
+            "output": [
+                {
+                    "type": "function_call",
+                    "name": "bash",
+                    "arguments": json.dumps({"cmd": "pwd"}),
+                    "call_id": "tc-1",
+                }
+            ],
+            "usage": {},
+        }
+    )
+
+    assert result.generations[0].message.tool_calls == [
+        {"name": "bash", "args": {"cmd": "pwd"}, "id": "tc-1", "type": "tool_call"}
+    ]
--- a/backend/tests/test_credential_loader.py
+++ b/backend/tests/test_credential_loader.py
@@ -0,0 +1,156 @@
+import json
+import os
+
+from deerflow.models.credential_loader import (
+    load_claude_code_credential,
+    load_codex_cli_credential,
+)
+
+
+def _clear_claude_code_env(monkeypatch) -> None:
+    for env_var in (
+        "CLAUDE_CODE_OAUTH_TOKEN",
+        "ANTHROPIC_AUTH_TOKEN",
+        "CLAUDE_CODE_OAUTH_TOKEN_FILE_DESCRIPTOR",
+        "CLAUDE_CODE_CREDENTIALS_PATH",
+    ):
+        monkeypatch.delenv(env_var, raising=False)
+
+
+def test_load_claude_code_credential_from_direct_env(monkeypatch):
+    _clear_claude_code_env(monkeypatch)
+    monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "  sk-ant-oat01-env  ")
+
+    cred = load_claude_code_credential()
+
+    assert cred is not None
+    assert cred.access_token == "sk-ant-oat01-env"
+    assert cred.refresh_token == ""
+    assert cred.source == "claude-cli-env"
+
+
+def test_load_claude_code_credential_from_anthropic_auth_env(monkeypatch):
+    _clear_claude_code_env(monkeypatch)
+    monkeypatch.setenv("ANTHROPIC_AUTH_TOKEN", "sk-ant-oat01-anthropic-auth")
+
+    cred = load_claude_code_credential()
+
+    assert cred is not None
+    assert cred.access_token == "sk-ant-oat01-anthropic-auth"
+    assert cred.source == "claude-cli-env"
+
+
+def test_load_claude_code_credential_from_file_descriptor(monkeypatch):
+    _clear_claude_code_env(monkeypatch)
+
+    read_fd, write_fd = os.pipe()
+    try:
+        os.write(write_fd, b"sk-ant-oat01-fd")
+        os.close(write_fd)
+        monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN_FILE_DESCRIPTOR", str(read_fd))
+
+        cred = load_claude_code_credential()
+    finally:
+        os.close(read_fd)
+
+    assert cred is not None
+    assert cred.access_token == "sk-ant-oat01-fd"
+    assert cred.refresh_token == ""
+    assert cred.source == "claude-cli-fd"
+
+
+def test_load_claude_code_credential_from_override_path(tmp_path, monkeypatch):
+    _clear_claude_code_env(monkeypatch)
+    cred_path = tmp_path / "claude-credentials.json"
+    cred_path.write_text(
+        json.dumps(
+            {
+                "claudeAiOauth": {
+                    "accessToken": "sk-ant-oat01-test",
+                    "refreshToken": "sk-ant-ort01-test",
+                    "expiresAt": 4_102_444_800_000,
+                }
+            }
+        )
+    )
+    monkeypatch.setenv("CLAUDE_CODE_CREDENTIALS_PATH", str(cred_path))
+
+    cred = load_claude_code_credential()
+
+    assert cred is not None
+    assert cred.access_token == "sk-ant-oat01-test"
+    assert cred.refresh_token == "sk-ant-ort01-test"
+    assert cred.source == "claude-cli-file"
+
+
+def test_load_claude_code_credential_ignores_directory_path(tmp_path, monkeypatch):
+    _clear_claude_code_env(monkeypatch)
+    cred_dir = tmp_path / "claude-creds-dir"
+    cred_dir.mkdir()
+    monkeypatch.setenv("CLAUDE_CODE_CREDENTIALS_PATH", str(cred_dir))
+
+    assert load_claude_code_credential() is None
+
+
+def test_load_claude_code_credential_falls_back_to_default_file_when_override_is_invalid(tmp_path, monkeypatch):
+    _clear_claude_code_env(monkeypatch)
+    monkeypatch.setenv("HOME", str(tmp_path))
+
+    cred_dir = tmp_path / "claude-creds-dir"
+    cred_dir.mkdir()
+    monkeypatch.setenv("CLAUDE_CODE_CREDENTIALS_PATH", str(cred_dir))
+
+    default_path = tmp_path / ".claude" / ".credentials.json"
+    default_path.parent.mkdir()
+    default_path.write_text(
+        json.dumps(
+            {
+                "claudeAiOauth": {
+                    "accessToken": "sk-ant-oat01-default",
+                    "refreshToken": "sk-ant-ort01-default",
+                    "expiresAt": 4_102_444_800_000,
+                }
+            }
+        )
+    )
+
+    cred = load_claude_code_credential()
+
+    assert cred is not None
+    assert cred.access_token == "sk-ant-oat01-default"
+    assert cred.refresh_token == "sk-ant-ort01-default"
+    assert cred.source == "claude-cli-file"
+
+
+def test_load_codex_cli_credential_supports_nested_tokens_shape(tmp_path, monkeypatch):
+    auth_path = tmp_path / "auth.json"
+    auth_path.write_text(
+        json.dumps(
+            {
+                "tokens": {
+                    "access_token": "codex-access-token",
+                    "account_id": "acct_123",
+                }
+            }
+        )
+    )
+    monkeypatch.setenv("CODEX_AUTH_PATH", str(auth_path))
+
+    cred = load_codex_cli_credential()
+
+    assert cred is not None
+    assert cred.access_token == "codex-access-token"
+    assert cred.account_id == "acct_123"
+    assert cred.source == "codex-cli"
+
+
+def test_load_codex_cli_credential_supports_legacy_top_level_shape(tmp_path, monkeypatch):
+    auth_path = tmp_path / "auth.json"
+    auth_path.write_text(json.dumps({"access_token": "legacy-access-token"}))
+    monkeypatch.setenv("CODEX_AUTH_PATH", str(auth_path))
+
+    cred = load_codex_cli_credential()
+
+    assert cred is not None
+    assert cred.access_token == "legacy-access-token"
+    assert cred.account_id == ""
--- a/backend/tests/test_model_factory.py
+++ b/backend/tests/test_model_factory.py
@@ -9,6 +9,7 @@ from deerflow.config.app_config import AppConfig
 from deerflow.config.model_config import ModelConfig
 from deerflow.config.sandbox_config import SandboxConfig
 from deerflow.models import factory as factory_module
+from deerflow.models import openai_codex_provider as codex_provider_module

 # ---------------------------------------------------------------------------
 # Helpers
@@ -30,6 +31,7 @@ def _make_model(
    supports_reasoning_effort: bool = False,
    when_thinking_enabled: dict | None = None,
    thinking: dict | None = None,
+    max_tokens: int | None = None,
 ) -> ModelConfig:
    return ModelConfig(
        name=name,
@@ -37,6 +39,7 @@ def _make_model(
        description=None,
        use=use,
        model=name,
+        max_tokens=max_tokens,
        supports_thinking=supports_thinking,
        supports_reasoning_effort=supports_reasoning_effort,
        when_thinking_enabled=when_thinking_enabled,
@@ -500,6 +503,96 @@ def test_openai_compatible_provider_multiple_models(monkeypatch):
    assert captured.get("model") == "MiniMax-M2.5-highspeed"


+# ---------------------------------------------------------------------------
+# Codex provider reasoning_effort mapping
+# ---------------------------------------------------------------------------
+
+
+class FakeCodexChatModel(FakeChatModel):
+    pass
+
+
+def test_codex_provider_disables_reasoning_when_thinking_disabled(monkeypatch):
+    cfg = _make_app_config(
+        [
+            _make_model(
+                "codex",
+                use="deerflow.models.openai_codex_provider:CodexChatModel",
+                supports_thinking=True,
+                supports_reasoning_effort=True,
+            )
+        ]
+    )
+    _patch_factory(monkeypatch, cfg, model_class=FakeCodexChatModel)
+    monkeypatch.setattr(codex_provider_module, "CodexChatModel", FakeCodexChatModel)
+
+    FakeChatModel.captured_kwargs = {}
+    factory_module.create_chat_model(name="codex", thinking_enabled=False)
+
+    assert FakeChatModel.captured_kwargs.get("reasoning_effort") == "none"
+
+
+def test_codex_provider_preserves_explicit_reasoning_effort(monkeypatch):
+    cfg = _make_app_config(
+        [
+            _make_model(
+                "codex",
+                use="deerflow.models.openai_codex_provider:CodexChatModel",
+                supports_thinking=True,
+                supports_reasoning_effort=True,
+            )
+        ]
+    )
+    _patch_factory(monkeypatch, cfg, model_class=FakeCodexChatModel)
+    monkeypatch.setattr(codex_provider_module, "CodexChatModel", FakeCodexChatModel)
+
+    FakeChatModel.captured_kwargs = {}
+    factory_module.create_chat_model(name="codex", thinking_enabled=True, reasoning_effort="high")
+
+    assert FakeChatModel.captured_kwargs.get("reasoning_effort") == "high"
+
+
+def test_codex_provider_defaults_reasoning_effort_to_medium(monkeypatch):
+    cfg = _make_app_config(
+        [
+            _make_model(
+                "codex",
+                use="deerflow.models.openai_codex_provider:CodexChatModel",
+                supports_thinking=True,
+                supports_reasoning_effort=True,
+            )
+        ]
+    )
+    _patch_factory(monkeypatch, cfg, model_class=FakeCodexChatModel)
+    monkeypatch.setattr(codex_provider_module, "CodexChatModel", FakeCodexChatModel)
+
+    FakeChatModel.captured_kwargs = {}
+    factory_module.create_chat_model(name="codex", thinking_enabled=True)
+
+    assert FakeChatModel.captured_kwargs.get("reasoning_effort") == "medium"
+
+
+def test_codex_provider_strips_unsupported_max_tokens(monkeypatch):
+    cfg = _make_app_config(
+        [
+            _make_model(
+                "codex",
+                use="deerflow.models.openai_codex_provider:CodexChatModel",
+                supports_thinking=True,
+                supports_reasoning_effort=True,
+                max_tokens=4096,
+            )
+        ]
+    )
+    _patch_factory(monkeypatch, cfg, model_class=FakeCodexChatModel)
+    monkeypatch.setattr(codex_provider_module, "CodexChatModel", FakeCodexChatModel)
+
+    FakeChatModel.captured_kwargs = {}
+    factory_module.create_chat_model(name="codex", thinking_enabled=True)
+
+    assert "max_tokens" not in FakeChatModel.captured_kwargs
+    
+    
 def test_openai_responses_api_settings_are_passed_to_chatopenai(monkeypatch):
    model = ModelConfig(
        name="gpt-5-responses",