mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-08 00:10:21 +08:00
feat: Add llms to support the latest Open Source SOTA models (#497)
* fix: update README and configuration guide for new model support and reasoning capabilities * fix: format code for consistency in agent and node files * fix: update test cases for environment variable handling in llm configuration * fix: refactor message chunk conversion functions for improved clarity and maintainability * refactor: remove enable_thinking parameter from LLM configuration functions * chore: update agent-LLM mapping for consistency * chore: update LLM configuration handling for improved clarity * test: add unit tests for Dashscope message chunk conversion and LLM configuration * test: add unit tests for message chunk conversion in Dashscope * test: add unit tests for message chunk conversion in Dashscope * chore: remove unused imports from test_dashscope.py --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
@@ -193,7 +193,7 @@ DeerFlow support private knowledgebase such as ragflow and vikingdb, so that you
|
||||
|
||||
- 🤖 **LLM Integration**
|
||||
- It supports the integration of most models through [litellm](https://docs.litellm.ai/docs/providers).
|
||||
- Support for open source models like Qwen
|
||||
- Support for open source models like Qwen, you need to read the [configuration](docs/configuration_guide.md) for more details.
|
||||
- OpenAI-compatible API interface
|
||||
- Multi-tier LLM system for different task complexities
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ In DeerFlow, we currently only support non-reasoning models. This means models l
|
||||
|
||||
### Supported Models
|
||||
|
||||
`doubao-1.5-pro-32k-250115`, `gpt-4o`, `qwen-max-latest`, `gemini-2.0-flash`, `deepseek-v3`, and theoretically any other non-reasoning chat models that implement the OpenAI API specification.
|
||||
`doubao-1.5-pro-32k-250115`, `gpt-4o`, `qwen-max-latest`,`qwen3-235b-a22b`,`qwen3-coder`, `gemini-2.0-flash`, `deepseek-v3`, and theoretically any other non-reasoning chat models that implement the OpenAI API specification.
|
||||
|
||||
> [!NOTE]
|
||||
> The Deep Research process requires the model to have a **longer context window**, which is not supported by all models.
|
||||
@@ -57,7 +57,47 @@ BASIC_MODEL:
|
||||
model: "gemini-2.0-flash"
|
||||
api_key: YOUR_API_KEY
|
||||
```
|
||||
The following is a configuration example of `conf.yaml` for using best opensource OpenAI-Compatible models:
|
||||
```yaml
|
||||
# Use latest deepseek-v3 to handle basic tasks, the open source SOTA model for basic tasks
|
||||
BASIC_MODEL:
|
||||
base_url: https://api.deepseek.com
|
||||
model: "deepseek-v3"
|
||||
api_key: YOUR_API_KEY
|
||||
temperature: 0.6
|
||||
top_p: 0.90
|
||||
# Use qwen3-235b-a22b to handle reasoning tasks, the open source SOTA model for reasoning
|
||||
REASONING_MODEL:
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
model: "qwen3-235b-a22b-thinking-2507"
|
||||
api_key: YOUR_API_KEY
|
||||
temperature: 0.6
|
||||
top_p: 0.90
|
||||
# Use qwen3-coder-480b-a35b-instruct to handle coding tasks, the open source SOTA model for coding
|
||||
CODE_MODEL:
|
||||
base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
|
||||
model: "qwen3-coder-480b-a35b-instruct"
|
||||
api_key: YOUR_API_KEY
|
||||
temperature: 0.6
|
||||
top_p: 0.90
|
||||
```
|
||||
In addition, you need to set the `AGENT_LLM_MAP` in `src/config/agents.py` to use the correct model for each agent. For example:
|
||||
|
||||
```python
|
||||
# Define agent-LLM mapping
|
||||
AGENT_LLM_MAP: dict[str, LLMType] = {
|
||||
"coordinator": "reasoning",
|
||||
"planner": "reasoning",
|
||||
"researcher": "reasoning",
|
||||
"coder": "basic",
|
||||
"reporter": "basic",
|
||||
"podcast_script_writer": "basic",
|
||||
"ppt_composer": "basic",
|
||||
"prose_writer": "basic",
|
||||
"prompt_enhancer": "basic",
|
||||
}
|
||||
|
||||
```
|
||||
### How to use models with self-signed SSL certificates?
|
||||
|
||||
If your LLM server uses self-signed SSL certificates, you can disable SSL certificate verification by adding the `verify_ssl: false` parameter to your model configuration:
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
from typing import Literal
|
||||
|
||||
# Define available LLM types
|
||||
LLMType = Literal["basic", "reasoning", "vision"]
|
||||
LLMType = Literal["basic", "reasoning", "vision", "code"]
|
||||
|
||||
# Define agent-LLM mapping
|
||||
AGENT_LLM_MAP: dict[str, LLMType] = {
|
||||
|
||||
@@ -13,6 +13,7 @@ from typing import get_args
|
||||
|
||||
from src.config import load_yaml_config
|
||||
from src.config.agents import LLMType
|
||||
from src.llms.providers.dashscope import ChatDashscope
|
||||
|
||||
# Cache for LLM instances
|
||||
_llm_cache: dict[LLMType, BaseChatModel] = {}
|
||||
@@ -29,6 +30,7 @@ def _get_llm_type_config_keys() -> dict[str, str]:
|
||||
"reasoning": "REASONING_MODEL",
|
||||
"basic": "BASIC_MODEL",
|
||||
"vision": "VISION_MODEL",
|
||||
"code": "CODE_MODEL",
|
||||
}
|
||||
|
||||
|
||||
@@ -72,9 +74,6 @@ def _create_llm_use_conf(llm_type: LLMType, conf: Dict[str, Any]) -> BaseChatMod
|
||||
if "max_retries" not in merged_conf:
|
||||
merged_conf["max_retries"] = 3
|
||||
|
||||
if llm_type == "reasoning":
|
||||
merged_conf["api_base"] = merged_conf.pop("base_url", None)
|
||||
|
||||
# Handle SSL verification settings
|
||||
verify_ssl = merged_conf.pop("verify_ssl", True)
|
||||
|
||||
@@ -87,15 +86,23 @@ def _create_llm_use_conf(llm_type: LLMType, conf: Dict[str, Any]) -> BaseChatMod
|
||||
|
||||
if "azure_endpoint" in merged_conf or os.getenv("AZURE_OPENAI_ENDPOINT"):
|
||||
return AzureChatOpenAI(**merged_conf)
|
||||
|
||||
# Check if base_url is dashscope endpoint
|
||||
if "base_url" in merged_conf and "dashscope." in merged_conf["base_url"]:
|
||||
if llm_type == "reasoning":
|
||||
merged_conf["extra_body"] = {"enable_thinking": True}
|
||||
else:
|
||||
merged_conf["extra_body"] = {"enable_thinking": False}
|
||||
return ChatDashscope(**merged_conf)
|
||||
|
||||
if llm_type == "reasoning":
|
||||
merged_conf["api_base"] = merged_conf.pop("base_url", None)
|
||||
return ChatDeepSeek(**merged_conf)
|
||||
else:
|
||||
return ChatOpenAI(**merged_conf)
|
||||
|
||||
|
||||
def get_llm_by_type(
|
||||
llm_type: LLMType,
|
||||
) -> BaseChatModel:
|
||||
def get_llm_by_type(llm_type: LLMType) -> BaseChatModel:
|
||||
"""
|
||||
Get LLM instance by type. Returns cached instance if available.
|
||||
"""
|
||||
|
||||
321
src/llms/providers/dashscope.py
Normal file
321
src/llms/providers/dashscope.py
Normal file
@@ -0,0 +1,321 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
# Standard library imports
|
||||
from typing import Any, Dict, Iterator, List, Mapping, Optional, Type, Union, cast
|
||||
|
||||
# Third-party imports
|
||||
import openai
|
||||
from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
from langchain_core.messages import (
|
||||
AIMessageChunk,
|
||||
BaseMessage,
|
||||
BaseMessageChunk,
|
||||
ChatMessageChunk,
|
||||
FunctionMessageChunk,
|
||||
HumanMessageChunk,
|
||||
SystemMessageChunk,
|
||||
ToolMessageChunk,
|
||||
)
|
||||
from langchain_core.messages.ai import UsageMetadata
|
||||
from langchain_core.messages.tool import tool_call_chunk
|
||||
from langchain_core.outputs import ChatGenerationChunk, ChatResult
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain_openai.chat_models.base import (
|
||||
_create_usage_metadata,
|
||||
_handle_openai_bad_request,
|
||||
warnings,
|
||||
)
|
||||
|
||||
|
||||
def _convert_delta_to_message_chunk(
|
||||
delta_dict: Mapping[str, Any], default_class: Type[BaseMessageChunk]
|
||||
) -> BaseMessageChunk:
|
||||
"""Convert a delta dictionary to a message chunk.
|
||||
|
||||
Args:
|
||||
delta_dict: Dictionary containing delta information from OpenAI response
|
||||
default_class: Default message chunk class to use if role is not specified
|
||||
|
||||
Returns:
|
||||
BaseMessageChunk: Appropriate message chunk based on role and content
|
||||
|
||||
Raises:
|
||||
KeyError: If required keys are missing from the delta dictionary
|
||||
"""
|
||||
message_id = delta_dict.get("id")
|
||||
role = cast(str, delta_dict.get("role", ""))
|
||||
content = cast(str, delta_dict.get("content") or "")
|
||||
additional_kwargs: Dict[str, Any] = {}
|
||||
|
||||
# Handle function calls
|
||||
if function_call_data := delta_dict.get("function_call"):
|
||||
function_call = dict(function_call_data)
|
||||
if "name" in function_call and function_call["name"] is None:
|
||||
function_call["name"] = ""
|
||||
additional_kwargs["function_call"] = function_call
|
||||
|
||||
# Handle tool calls
|
||||
tool_call_chunks = []
|
||||
if raw_tool_calls := delta_dict.get("tool_calls"):
|
||||
additional_kwargs["tool_calls"] = raw_tool_calls
|
||||
try:
|
||||
tool_call_chunks = [
|
||||
tool_call_chunk(
|
||||
name=rtc.get("function", {}).get("name"),
|
||||
args=rtc.get("function", {}).get("arguments"),
|
||||
id=rtc.get("id"),
|
||||
index=rtc.get("index", 0),
|
||||
)
|
||||
for rtc in raw_tool_calls
|
||||
if rtc.get("function") # Ensure function key exists
|
||||
]
|
||||
except (KeyError, TypeError):
|
||||
# Log the error but continue processing
|
||||
pass
|
||||
|
||||
# Return appropriate message chunk based on role
|
||||
if role == "user" or default_class == HumanMessageChunk:
|
||||
return HumanMessageChunk(content=content, id=message_id)
|
||||
elif role == "assistant" or default_class == AIMessageChunk:
|
||||
# Handle reasoning content for OpenAI reasoning models
|
||||
if reasoning_content := delta_dict.get("reasoning_content"):
|
||||
additional_kwargs["reasoning_content"] = reasoning_content
|
||||
return AIMessageChunk(
|
||||
content=content,
|
||||
additional_kwargs=additional_kwargs,
|
||||
id=message_id,
|
||||
tool_call_chunks=tool_call_chunks, # type: ignore[arg-type]
|
||||
)
|
||||
elif role in ("system", "developer") or default_class == SystemMessageChunk:
|
||||
if role == "developer":
|
||||
additional_kwargs = {"__openai_role__": "developer"}
|
||||
return SystemMessageChunk(
|
||||
content=content, id=message_id, additional_kwargs=additional_kwargs
|
||||
)
|
||||
elif role == "function" or default_class == FunctionMessageChunk:
|
||||
function_name = delta_dict.get("name", "")
|
||||
return FunctionMessageChunk(content=content, name=function_name, id=message_id)
|
||||
elif role == "tool" or default_class == ToolMessageChunk:
|
||||
tool_call_id = delta_dict.get("tool_call_id", "")
|
||||
return ToolMessageChunk(
|
||||
content=content, tool_call_id=tool_call_id, id=message_id
|
||||
)
|
||||
elif role or default_class == ChatMessageChunk:
|
||||
return ChatMessageChunk(content=content, role=role, id=message_id)
|
||||
else:
|
||||
return default_class(content=content, id=message_id) # type: ignore
|
||||
|
||||
|
||||
def _convert_chunk_to_generation_chunk(
|
||||
chunk: Dict[str, Any],
|
||||
default_chunk_class: Type[BaseMessageChunk],
|
||||
base_generation_info: Optional[Dict[str, Any]],
|
||||
) -> Optional[ChatGenerationChunk]:
|
||||
"""Convert a streaming chunk to a generation chunk.
|
||||
|
||||
Args:
|
||||
chunk: Raw chunk data from OpenAI streaming response
|
||||
default_chunk_class: Default message chunk class to use
|
||||
base_generation_info: Base generation information to include
|
||||
|
||||
Returns:
|
||||
Optional[ChatGenerationChunk]: Generated chunk or None if chunk should be skipped
|
||||
"""
|
||||
# Skip content.delta type chunks from beta.chat.completions.stream
|
||||
if chunk.get("type") == "content.delta":
|
||||
return None
|
||||
|
||||
token_usage = chunk.get("usage")
|
||||
choices = (
|
||||
chunk.get("choices", [])
|
||||
# Handle chunks from beta.chat.completions.stream format
|
||||
or chunk.get("chunk", {}).get("choices", [])
|
||||
)
|
||||
|
||||
usage_metadata: Optional[UsageMetadata] = (
|
||||
_create_usage_metadata(token_usage) if token_usage else None
|
||||
)
|
||||
|
||||
# Handle empty choices
|
||||
if not choices:
|
||||
generation_chunk = ChatGenerationChunk(
|
||||
message=default_chunk_class(content="", usage_metadata=usage_metadata)
|
||||
)
|
||||
return generation_chunk
|
||||
|
||||
choice = choices[0]
|
||||
if choice.get("delta") is None:
|
||||
return None
|
||||
|
||||
message_chunk = _convert_delta_to_message_chunk(
|
||||
choice["delta"], default_chunk_class
|
||||
)
|
||||
generation_info = dict(base_generation_info) if base_generation_info else {}
|
||||
|
||||
# Add finish reason and model info if available
|
||||
if finish_reason := choice.get("finish_reason"):
|
||||
generation_info["finish_reason"] = finish_reason
|
||||
if model_name := chunk.get("model"):
|
||||
generation_info["model_name"] = model_name
|
||||
if system_fingerprint := chunk.get("system_fingerprint"):
|
||||
generation_info["system_fingerprint"] = system_fingerprint
|
||||
|
||||
# Add log probabilities if available
|
||||
if logprobs := choice.get("logprobs"):
|
||||
generation_info["logprobs"] = logprobs
|
||||
|
||||
# Attach usage metadata to AI message chunks
|
||||
if usage_metadata and isinstance(message_chunk, AIMessageChunk):
|
||||
message_chunk.usage_metadata = usage_metadata
|
||||
|
||||
generation_chunk = ChatGenerationChunk(
|
||||
message=message_chunk, generation_info=generation_info or None
|
||||
)
|
||||
return generation_chunk
|
||||
|
||||
|
||||
class ChatDashscope(ChatOpenAI):
|
||||
"""Extended ChatOpenAI model with reasoning capabilities.
|
||||
|
||||
This class extends the base ChatOpenAI model to support OpenAI's reasoning models
|
||||
that include reasoning_content in their responses. It handles the extraction and
|
||||
preservation of reasoning content during both streaming and non-streaming operations.
|
||||
"""
|
||||
|
||||
def _create_chat_result(
|
||||
self,
|
||||
response: Union[Dict[str, Any], openai.BaseModel],
|
||||
generation_info: Optional[Dict[str, Any]] = None,
|
||||
) -> ChatResult:
|
||||
"""Create a chat result from the OpenAI response.
|
||||
|
||||
Args:
|
||||
response: The response from OpenAI API
|
||||
generation_info: Additional generation information
|
||||
|
||||
Returns:
|
||||
ChatResult: The formatted chat result with reasoning content if available
|
||||
"""
|
||||
chat_result = super()._create_chat_result(response, generation_info)
|
||||
|
||||
# Only process BaseModel responses (not raw dict responses)
|
||||
if not isinstance(response, openai.BaseModel):
|
||||
return chat_result
|
||||
|
||||
# Extract reasoning content if available
|
||||
try:
|
||||
if (
|
||||
hasattr(response, "choices")
|
||||
and response.choices
|
||||
and hasattr(response.choices[0], "message")
|
||||
and hasattr(response.choices[0].message, "reasoning_content")
|
||||
):
|
||||
|
||||
reasoning_content = response.choices[0].message.reasoning_content
|
||||
if reasoning_content and chat_result.generations:
|
||||
chat_result.generations[0].message.additional_kwargs[
|
||||
"reasoning_content"
|
||||
] = reasoning_content
|
||||
except (IndexError, AttributeError):
|
||||
# If reasoning content extraction fails, continue without it
|
||||
pass
|
||||
|
||||
return chat_result
|
||||
|
||||
def _stream(
|
||||
self,
|
||||
messages: List[BaseMessage],
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[ChatGenerationChunk]:
|
||||
"""Create a streaming generator for chat completions.
|
||||
|
||||
Args:
|
||||
messages: List of messages to send to the model
|
||||
stop: Optional list of stop sequences
|
||||
run_manager: Optional callback manager for LLM runs
|
||||
**kwargs: Additional keyword arguments for the API call
|
||||
|
||||
Yields:
|
||||
ChatGenerationChunk: Individual chunks from the streaming response
|
||||
|
||||
Raises:
|
||||
openai.BadRequestError: If the API request is invalid
|
||||
"""
|
||||
kwargs["stream"] = True
|
||||
payload = self._get_request_payload(messages, stop=stop, **kwargs)
|
||||
default_chunk_class: Type[BaseMessageChunk] = AIMessageChunk
|
||||
base_generation_info: Dict[str, Any] = {}
|
||||
|
||||
# Handle response format for beta completions
|
||||
if "response_format" in payload:
|
||||
if self.include_response_headers:
|
||||
warnings.warn(
|
||||
"Cannot currently include response headers when response_format is "
|
||||
"specified."
|
||||
)
|
||||
payload.pop("stream")
|
||||
response_stream = self.root_client.beta.chat.completions.stream(**payload)
|
||||
context_manager = response_stream
|
||||
else:
|
||||
# Handle regular streaming with optional response headers
|
||||
if self.include_response_headers:
|
||||
raw_response = self.client.with_raw_response.create(**payload)
|
||||
response = raw_response.parse()
|
||||
base_generation_info = {"headers": dict(raw_response.headers)}
|
||||
else:
|
||||
response = self.client.create(**payload)
|
||||
context_manager = response
|
||||
|
||||
try:
|
||||
with context_manager as response:
|
||||
is_first_chunk = True
|
||||
for chunk in response:
|
||||
# Convert chunk to dict if it's a model object
|
||||
if not isinstance(chunk, dict):
|
||||
chunk = chunk.model_dump()
|
||||
|
||||
generation_chunk = _convert_chunk_to_generation_chunk(
|
||||
chunk,
|
||||
default_chunk_class,
|
||||
base_generation_info if is_first_chunk else {},
|
||||
)
|
||||
|
||||
if generation_chunk is None:
|
||||
continue
|
||||
|
||||
# Update default chunk class for subsequent chunks
|
||||
default_chunk_class = generation_chunk.message.__class__
|
||||
|
||||
# Handle log probabilities for callback
|
||||
logprobs = (generation_chunk.generation_info or {}).get("logprobs")
|
||||
if run_manager:
|
||||
run_manager.on_llm_new_token(
|
||||
generation_chunk.text,
|
||||
chunk=generation_chunk,
|
||||
logprobs=logprobs,
|
||||
)
|
||||
|
||||
is_first_chunk = False
|
||||
yield generation_chunk
|
||||
|
||||
except openai.BadRequestError as e:
|
||||
_handle_openai_bad_request(e)
|
||||
|
||||
# Handle final completion for response_format requests
|
||||
if hasattr(response, "get_final_completion") and "response_format" in payload:
|
||||
try:
|
||||
final_completion = response.get_final_completion()
|
||||
generation_chunk = self._get_generation_chunk_from_completion(
|
||||
final_completion
|
||||
)
|
||||
if run_manager:
|
||||
run_manager.on_llm_new_token(
|
||||
generation_chunk.text, chunk=generation_chunk
|
||||
)
|
||||
yield generation_chunk
|
||||
except AttributeError:
|
||||
# If get_final_completion method doesn't exist, continue without it
|
||||
pass
|
||||
307
tests/unit/llms/test_dashscope.py
Normal file
307
tests/unit/llms/test_dashscope.py
Normal file
@@ -0,0 +1,307 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_core.messages import (
|
||||
AIMessageChunk,
|
||||
HumanMessageChunk,
|
||||
SystemMessageChunk,
|
||||
FunctionMessageChunk,
|
||||
ToolMessageChunk,
|
||||
)
|
||||
|
||||
from src.llms import llm as llm_module
|
||||
from langchain_core.messages import ChatMessageChunk
|
||||
from src.llms.providers import dashscope as dashscope_module
|
||||
|
||||
from src.llms.providers.dashscope import (
|
||||
ChatDashscope,
|
||||
_convert_delta_to_message_chunk,
|
||||
_convert_chunk_to_generation_chunk,
|
||||
)
|
||||
|
||||
|
||||
class DummyChatDashscope:
|
||||
def __init__(self, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dashscope_conf():
|
||||
return {
|
||||
"BASIC_MODEL": {
|
||||
"api_key": "k",
|
||||
"base_url": "https://dashscope.aliyuncs.com/v1",
|
||||
"model": "qwen3-235b-a22b-instruct-2507",
|
||||
},
|
||||
"REASONING_MODEL": {
|
||||
"api_key": "rk",
|
||||
"base_url": "https://dashscope.aliyuncs.com/v1",
|
||||
"model": "qwen3-235b-a22b-thinking-2507",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_convert_delta_to_message_chunk_roles_and_extras():
|
||||
# Assistant with reasoning + tool calls
|
||||
delta = {
|
||||
"role": "assistant",
|
||||
"content": "Hello",
|
||||
"reasoning_content": "Think...",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_1",
|
||||
"index": 0,
|
||||
"function": {"name": "lookup", "arguments": '{\\"q\\":\\"x\\"}'},
|
||||
}
|
||||
],
|
||||
}
|
||||
msg = _convert_delta_to_message_chunk(delta, AIMessageChunk)
|
||||
assert isinstance(msg, AIMessageChunk)
|
||||
assert msg.content == "Hello"
|
||||
assert msg.additional_kwargs.get("reasoning_content") == "Think..."
|
||||
# tool_call_chunks should be present
|
||||
assert getattr(msg, "tool_call_chunks", None)
|
||||
|
||||
# Human
|
||||
delta = {"role": "user", "content": "Hi"}
|
||||
msg = _convert_delta_to_message_chunk(delta, HumanMessageChunk)
|
||||
assert isinstance(msg, HumanMessageChunk)
|
||||
|
||||
# System
|
||||
delta = {"role": "system", "content": "Rules"}
|
||||
msg = _convert_delta_to_message_chunk(delta, SystemMessageChunk)
|
||||
assert isinstance(msg, SystemMessageChunk)
|
||||
|
||||
# Function
|
||||
delta = {"role": "function", "name": "f", "content": "{}"}
|
||||
msg = _convert_delta_to_message_chunk(delta, FunctionMessageChunk)
|
||||
assert isinstance(msg, FunctionMessageChunk)
|
||||
|
||||
# Tool
|
||||
delta = {"role": "tool", "tool_call_id": "t1", "content": "ok"}
|
||||
msg = _convert_delta_to_message_chunk(delta, ToolMessageChunk)
|
||||
assert isinstance(msg, ToolMessageChunk)
|
||||
|
||||
|
||||
def test_convert_chunk_to_generation_chunk_skip_and_usage():
|
||||
# Skips content.delta type
|
||||
assert (
|
||||
_convert_chunk_to_generation_chunk(
|
||||
{"type": "content.delta"}, AIMessageChunk, None
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
# Proper chunk with usage and finish info
|
||||
chunk = {
|
||||
"choices": [
|
||||
{
|
||||
"delta": {"role": "assistant", "content": "Hi"},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"model": "qwen3-235b-a22b-instruct-2507",
|
||||
"system_fingerprint": "fp",
|
||||
"usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3},
|
||||
}
|
||||
gen = _convert_chunk_to_generation_chunk(chunk, AIMessageChunk, None)
|
||||
assert gen is not None
|
||||
assert isinstance(gen.message, AIMessageChunk)
|
||||
assert gen.message.content == "Hi"
|
||||
# usage metadata should attach to AI message
|
||||
assert getattr(gen.message, "usage_metadata", None) is not None
|
||||
assert gen.generation_info.get("finish_reason") == "stop"
|
||||
assert gen.generation_info.get("model_name") == "qwen3-235b-a22b-instruct-2507"
|
||||
assert gen.generation_info.get("system_fingerprint") == "fp"
|
||||
|
||||
|
||||
def test_llm_selects_dashscope_and_sets_enable_thinking(monkeypatch, dashscope_conf):
|
||||
# Use dummy class to capture kwargs on construction
|
||||
monkeypatch.setattr(llm_module, "ChatDashscope", DummyChatDashscope)
|
||||
|
||||
# basic -> enable_thinking False
|
||||
inst = llm_module._create_llm_use_conf("basic", dashscope_conf)
|
||||
assert isinstance(inst, DummyChatDashscope)
|
||||
assert inst.kwargs["extra_body"]["enable_thinking"] is False
|
||||
assert inst.kwargs["base_url"].find("dashscope.") > 0
|
||||
|
||||
# reasoning -> enable_thinking True
|
||||
inst2 = llm_module._create_llm_use_conf("reasoning", dashscope_conf)
|
||||
assert isinstance(inst2, DummyChatDashscope)
|
||||
assert inst2.kwargs["extra_body"]["enable_thinking"] is True
|
||||
|
||||
|
||||
def test_llm_verify_ssl_false_adds_http_clients(monkeypatch, dashscope_conf):
|
||||
monkeypatch.setattr(llm_module, "ChatDashscope", DummyChatDashscope)
|
||||
# turn off ssl
|
||||
dashscope_conf = {**dashscope_conf}
|
||||
dashscope_conf["BASIC_MODEL"] = {
|
||||
**dashscope_conf["BASIC_MODEL"],
|
||||
"verify_ssl": False,
|
||||
}
|
||||
|
||||
inst = llm_module._create_llm_use_conf("basic", dashscope_conf)
|
||||
assert "http_client" in inst.kwargs
|
||||
assert "http_async_client" in inst.kwargs
|
||||
|
||||
|
||||
def test_convert_delta_to_message_chunk_developer_and_function_call_and_tool_calls():
|
||||
# developer role -> SystemMessageChunk with __openai_role__
|
||||
delta = {"role": "developer", "content": "dev rules"}
|
||||
msg = _convert_delta_to_message_chunk(delta, SystemMessageChunk)
|
||||
assert isinstance(msg, SystemMessageChunk)
|
||||
assert msg.additional_kwargs.get("__openai_role__") == "developer"
|
||||
|
||||
# function_call name None -> empty string
|
||||
delta = {"role": "assistant", "function_call": {"name": None, "arguments": "{}"}}
|
||||
msg = _convert_delta_to_message_chunk(delta, AIMessageChunk)
|
||||
assert isinstance(msg, AIMessageChunk)
|
||||
assert msg.additional_kwargs["function_call"]["name"] == ""
|
||||
|
||||
# tool_calls: one valid, one missing function -> should not crash and create one chunk
|
||||
delta = {
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{"id": "t1", "index": 0, "function": {"name": "f", "arguments": "{}"}},
|
||||
{"id": "t2", "index": 1}, # missing function key
|
||||
],
|
||||
}
|
||||
msg = _convert_delta_to_message_chunk(delta, AIMessageChunk)
|
||||
assert isinstance(msg, AIMessageChunk)
|
||||
# tool_calls copied as-is
|
||||
assert msg.additional_kwargs["tool_calls"][0]["id"] == "t1"
|
||||
# tool_call_chunks only for valid one
|
||||
assert getattr(msg, "tool_call_chunks") and len(msg.tool_call_chunks) == 1
|
||||
|
||||
|
||||
def test_convert_delta_to_message_chunk_default_class_and_unknown_role():
|
||||
# No role, default human -> HumanMessageChunk
|
||||
delta = {"content": "hey"}
|
||||
msg = _convert_delta_to_message_chunk(delta, HumanMessageChunk)
|
||||
assert isinstance(msg, HumanMessageChunk)
|
||||
|
||||
# Unknown role -> ChatMessageChunk with that role
|
||||
delta = {"role": "observer", "content": "hmm"}
|
||||
msg = _convert_delta_to_message_chunk(delta, ChatMessageChunk)
|
||||
assert isinstance(msg, ChatMessageChunk)
|
||||
assert msg.role == "observer"
|
||||
|
||||
|
||||
def test_convert_chunk_to_generation_chunk_empty_choices_and_usage():
|
||||
chunk = {
|
||||
"choices": [],
|
||||
"usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3},
|
||||
}
|
||||
gen = _convert_chunk_to_generation_chunk(chunk, AIMessageChunk, None)
|
||||
assert gen is not None
|
||||
assert isinstance(gen.message, AIMessageChunk)
|
||||
assert gen.message.content == ""
|
||||
assert getattr(gen.message, "usage_metadata", None) is not None
|
||||
assert gen.generation_info is None
|
||||
|
||||
|
||||
def test_convert_chunk_to_generation_chunk_includes_base_info_and_logprobs():
|
||||
chunk = {
|
||||
"choices": [
|
||||
{
|
||||
"delta": {"role": "assistant", "content": "T"},
|
||||
"logprobs": {"content": [{"token": "T", "logprob": -0.1}]},
|
||||
}
|
||||
]
|
||||
}
|
||||
base_info = {"headers": {"a": "b"}}
|
||||
gen = _convert_chunk_to_generation_chunk(chunk, AIMessageChunk, base_info)
|
||||
assert gen is not None
|
||||
assert gen.message.content == "T"
|
||||
assert gen.generation_info.get("headers") == {"a": "b"}
|
||||
assert "logprobs" in gen.generation_info
|
||||
|
||||
|
||||
def test_convert_chunk_to_generation_chunk_beta_stream_format():
|
||||
chunk = {
|
||||
"chunk": {
|
||||
"choices": [
|
||||
{"delta": {"role": "assistant", "content": "From beta stream format"}}
|
||||
]
|
||||
}
|
||||
}
|
||||
gen = _convert_chunk_to_generation_chunk(chunk, AIMessageChunk, None)
|
||||
assert gen is not None
|
||||
assert gen.message.content == "From beta stream format"
|
||||
|
||||
|
||||
def test_chatdashscope_create_chat_result_adds_reasoning_content(monkeypatch):
|
||||
# Dummy objects for the super() return
|
||||
class DummyMsg:
|
||||
def __init__(self):
|
||||
self.additional_kwargs = {}
|
||||
|
||||
class DummyGen:
|
||||
def __init__(self):
|
||||
self.message = DummyMsg()
|
||||
|
||||
class DummyChatResult:
|
||||
def __init__(self):
|
||||
self.generations = [DummyGen()]
|
||||
|
||||
# Patch super()._create_chat_result to return our dummy structure
|
||||
def fake_super_create(self, response, generation_info=None):
|
||||
return DummyChatResult()
|
||||
|
||||
monkeypatch.setattr(
|
||||
dashscope_module.ChatOpenAI, "_create_chat_result", fake_super_create
|
||||
)
|
||||
|
||||
# Patch openai.BaseModel in the module under test
|
||||
class DummyBaseModel:
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(dashscope_module.openai, "BaseModel", DummyBaseModel)
|
||||
|
||||
# Build a fake OpenAI-like response with reasoning_content
|
||||
class RMsg:
|
||||
def __init__(self, rc):
|
||||
self.reasoning_content = rc
|
||||
|
||||
class Choice:
|
||||
def __init__(self, rc):
|
||||
self.message = RMsg(rc)
|
||||
|
||||
class FakeResponse(DummyBaseModel):
|
||||
def __init__(self):
|
||||
self.choices = [Choice("Reasoning...")]
|
||||
|
||||
llm = ChatDashscope(model="dummy", api_key="k")
|
||||
result = llm._create_chat_result(FakeResponse())
|
||||
assert (
|
||||
result.generations[0].message.additional_kwargs.get("reasoning_content")
|
||||
== "Reasoning..."
|
||||
)
|
||||
|
||||
|
||||
def test_chatdashscope_create_chat_result_dict_passthrough(monkeypatch):
|
||||
class DummyMsg:
|
||||
def __init__(self):
|
||||
self.additional_kwargs = {}
|
||||
|
||||
class DummyGen:
|
||||
def __init__(self):
|
||||
self.message = DummyMsg()
|
||||
|
||||
class DummyChatResult:
|
||||
def __init__(self):
|
||||
self.generations = [DummyGen()]
|
||||
|
||||
def fake_super_create(self, response, generation_info=None):
|
||||
return DummyChatResult()
|
||||
|
||||
monkeypatch.setattr(
|
||||
dashscope_module.ChatOpenAI, "_create_chat_result", fake_super_create
|
||||
)
|
||||
|
||||
llm = ChatDashscope(model="dummy", api_key="k")
|
||||
result = llm._create_chat_result({"raw": "dict"})
|
||||
# Should not inject reasoning_content for dict responses
|
||||
assert "reasoning_content" not in result.generations[0].message.additional_kwargs
|
||||
Reference in New Issue
Block a user