mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-03 06:12:14 +08:00
feat: Implement Milvus retriver for RAG (#516)
* feat: Implement MilvusRetriever with embedding model and resource management * chore: Update configuration and loader files for consistency * chore: Clean up test_milvus.py for improved readability and organization * feat: Add tests for DashscopeEmbeddings query and document embedding methods * feat: Add tests for embedding model initialization and example file loading in MilvusProvider * chore: Remove unused imports and clean up test_milvus.py for better readability * chore: Clean up test_milvus.py for improved readability and organization * chore: Clean up test_milvus.py for improved readability and organization * fix: replace print statements with logging in recursion limit function * Implement feature X to enhance user experience and optimize performance * refactor: clean up unused imports and comments in AboutTab component * Implement feature X to enhance user experience and fix bug Y in module Z --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
23
.env.example
23
.env.example
@@ -41,6 +41,29 @@ TAVILY_API_KEY=tvly-xxx
|
||||
# RAGFLOW_RETRIEVAL_SIZE=10
|
||||
# RAGFLOW_CROSS_LANGUAGES=English,Chinese,Spanish,French,German,Japanese,Korean # Optional. To use RAGFlow's cross-language search, please separate each language with a single comma
|
||||
|
||||
|
||||
# RAG_PROVIDER: milvus (using free milvus instance on zilliz cloud: https://docs.zilliz.com/docs/quick-start )
|
||||
# RAG_PROVIDER=milvus
|
||||
# MILVUS_URI=<endpoint_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
# MILVUS_USER=<username_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
# MILVUS_PASSWORD=<password_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
# MILVUS_COLLECTION=documents
|
||||
# MILVUS_EMBEDDING_PROVIDER=openai # support openai,dashscope
|
||||
# MILVUS_EMBEDDING_BASE_URL=
|
||||
# MILVUS_EMBEDDING_MODEL=
|
||||
# MILVUS_EMBEDDING_API_KEY=
|
||||
# MILVUS_AUTO_LOAD_EXAMPLES=true
|
||||
|
||||
# RAG_PROVIDER: milvus (using milvus lite on Mac or Linux)
|
||||
# RAG_PROVIDER=milvus
|
||||
# MILVUS_URI=./milvus_demo.db
|
||||
# MILVUS_COLLECTION=documents
|
||||
# MILVUS_EMBEDDING_PROVIDER=openai # support openai,dashscope
|
||||
# MILVUS_EMBEDDING_BASE_URL=
|
||||
# MILVUS_EMBEDDING_MODEL=
|
||||
# MILVUS_EMBEDDING_API_KEY=
|
||||
# MILVUS_AUTO_LOAD_EXAMPLES=true
|
||||
|
||||
# Optional, volcengine TTS for generating podcast
|
||||
VOLCENGINE_TTS_APPID=xxx
|
||||
VOLCENGINE_TTS_ACCESS_TOKEN=xxx
|
||||
|
||||
@@ -179,4 +179,40 @@ SEARCH_ENGINE:
|
||||
exclude_domains:
|
||||
- unreliable-site.com
|
||||
- spam-domain.net
|
||||
```
|
||||
|
||||
## RAG (Retrieval-Augmented Generation) Configuration
|
||||
|
||||
DeerFlow supports multiple RAG providers for document retrieval. Configure the RAG provider by setting environment variables.
|
||||
|
||||
### Supported RAG Providers
|
||||
|
||||
- **RAGFlow**: Document retrieval using RAGFlow API
|
||||
- **VikingDB Knowledge Base**: ByteDance's VikingDB knowledge base service
|
||||
- **Milvus**: Open-source vector database for similarity search
|
||||
|
||||
### Milvus Configuration
|
||||
|
||||
To use Milvus as your RAG provider, set the following environment variables:
|
||||
|
||||
```bash
|
||||
# RAG_PROVIDER: milvus (using free milvus instance on zilliz cloud: https://docs.zilliz.com/docs/quick-start )
|
||||
RAG_PROVIDER=milvus
|
||||
MILVUS_URI=<endpoint_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
MILVUS_USER=<username_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
MILVUS_PASSWORD=<password_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
MILVUS_COLLECTION=documents
|
||||
MILVUS_EMBEDDING_PROVIDER=openai
|
||||
MILVUS_EMBEDDING_BASE_URL=
|
||||
MILVUS_EMBEDDING_MODEL=
|
||||
MILVUS_EMBEDDING_API_KEY=
|
||||
|
||||
# RAG_PROVIDER: milvus (using milvus lite on Mac or Linux)
|
||||
RAG_PROVIDER=milvus
|
||||
MILVUS_URI=./milvus_demo.db
|
||||
MILVUS_COLLECTION=documents
|
||||
MILVUS_EMBEDDING_PROVIDER=openai
|
||||
MILVUS_EMBEDDING_BASE_URL=
|
||||
MILVUS_EMBEDDING_MODEL=
|
||||
MILVUS_EMBEDDING_API_KEY=
|
||||
```
|
||||
|
||||
@@ -37,6 +37,8 @@ dependencies = [
|
||||
"langchain-tavily<0.3",
|
||||
"langgraph-checkpoint-mongodb>=0.1.4",
|
||||
"langgraph-checkpoint-postgres==2.0.21",
|
||||
"pymilvus>=2.3.0",
|
||||
"langchain-milvus>=0.2.1",
|
||||
"psycopg[binary]>=3.2.9",
|
||||
]
|
||||
|
||||
|
||||
@@ -10,36 +10,10 @@ from langchain_core.runnables import RunnableConfig
|
||||
|
||||
from src.config.report_style import ReportStyle
|
||||
from src.rag.retriever import Resource
|
||||
from src.config.loader import get_str_env, get_int_env, get_bool_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TRUTHY = {"1", "true", "yes", "y", "on"}
|
||||
|
||||
|
||||
def get_bool_env(name: str, default: bool = False) -> bool:
|
||||
val = os.getenv(name)
|
||||
if val is None:
|
||||
return default
|
||||
return str(val).strip().lower() in _TRUTHY
|
||||
|
||||
|
||||
def get_str_env(name: str, default: str = "") -> str:
|
||||
val = os.getenv(name)
|
||||
return default if val is None else str(val).strip()
|
||||
|
||||
|
||||
def get_int_env(name: str, default: int = 0) -> int:
|
||||
val = os.getenv(name)
|
||||
if val is None:
|
||||
return default
|
||||
try:
|
||||
return int(val.strip())
|
||||
except ValueError:
|
||||
logger.warning(
|
||||
f"Invalid integer value for {name}: {val}. Using default {default}."
|
||||
)
|
||||
return default
|
||||
|
||||
|
||||
def get_recursion_limit(default: int = 25) -> int:
|
||||
"""Get the recursion limit from environment variable or use default.
|
||||
|
||||
@@ -7,6 +7,29 @@ from typing import Any, Dict
|
||||
import yaml
|
||||
|
||||
|
||||
def get_bool_env(name: str, default: bool = False) -> bool:
|
||||
val = os.getenv(name)
|
||||
if val is None:
|
||||
return default
|
||||
return str(val).strip().lower() in {"1", "true", "yes", "y", "on"}
|
||||
|
||||
|
||||
def get_str_env(name: str, default: str = "") -> str:
|
||||
val = os.getenv(name)
|
||||
return default if val is None else str(val).strip()
|
||||
|
||||
|
||||
def get_int_env(name: str, default: int = 0) -> int:
|
||||
val = os.getenv(name)
|
||||
if val is None:
|
||||
return default
|
||||
try:
|
||||
return int(val.strip())
|
||||
except ValueError:
|
||||
print(f"Invalid integer value for {name}: {val}. Using default {default}.")
|
||||
return default
|
||||
|
||||
|
||||
def replace_env_vars(value: str) -> str:
|
||||
"""Replace environment variables in string values."""
|
||||
if not isinstance(value, str):
|
||||
|
||||
@@ -24,6 +24,7 @@ SELECTED_SEARCH_ENGINE = os.getenv("SEARCH_API", SearchEngine.TAVILY.value)
|
||||
class RAGProvider(enum.Enum):
|
||||
RAGFLOW = "ragflow"
|
||||
VIKINGDB_KNOWLEDGE_BASE = "vikingdb_knowledge_base"
|
||||
MILVUS = "milvus"
|
||||
|
||||
|
||||
SELECTED_RAG_PROVIDER = os.getenv("RAG_PROVIDER")
|
||||
|
||||
@@ -10,7 +10,7 @@ import psycopg
|
||||
from psycopg.rows import dict_row
|
||||
from pymongo import MongoClient
|
||||
from langgraph.store.memory import InMemoryStore
|
||||
from src.config.configuration import get_bool_env, get_str_env
|
||||
from src.config.loader import get_bool_env, get_str_env
|
||||
|
||||
|
||||
class ChatStreamManager:
|
||||
|
||||
@@ -5,6 +5,7 @@ from src.config.tools import SELECTED_RAG_PROVIDER, RAGProvider
|
||||
from src.rag.ragflow import RAGFlowProvider
|
||||
from src.rag.retriever import Retriever
|
||||
from src.rag.vikingdb_knowledge_base import VikingDBKnowledgeBaseProvider
|
||||
from src.rag.milvus import MilvusProvider
|
||||
|
||||
|
||||
def build_retriever() -> Retriever | None:
|
||||
@@ -12,6 +13,8 @@ def build_retriever() -> Retriever | None:
|
||||
return RAGFlowProvider()
|
||||
elif SELECTED_RAG_PROVIDER == RAGProvider.VIKINGDB_KNOWLEDGE_BASE.value:
|
||||
return VikingDBKnowledgeBaseProvider()
|
||||
elif SELECTED_RAG_PROVIDER == RAGProvider.MILVUS.value:
|
||||
return MilvusProvider()
|
||||
elif SELECTED_RAG_PROVIDER:
|
||||
raise ValueError(f"Unsupported RAG provider: {SELECTED_RAG_PROVIDER}")
|
||||
return None
|
||||
|
||||
785
src/rag/milvus.py
Normal file
785
src/rag/milvus.py
Normal file
@@ -0,0 +1,785 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set
|
||||
|
||||
from langchain_milvus.vectorstores import Milvus as LangchainMilvus
|
||||
from pymilvus import MilvusClient, CollectionSchema, FieldSchema, DataType
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from openai import OpenAI
|
||||
from src.rag.retriever import Chunk, Document, Resource, Retriever
|
||||
from src.config.loader import get_bool_env, get_str_env, get_int_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DashscopeEmbeddings:
|
||||
"""OpenAI-compatible embeddings wrapper."""
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
self._client: OpenAI = OpenAI(
|
||||
api_key=kwargs.get("api_key", ""), base_url=kwargs.get("base_url", "")
|
||||
)
|
||||
self._model: str = kwargs.get("model", "")
|
||||
self._encoding_format: str = kwargs.get("encoding_format", "float")
|
||||
|
||||
def _embed(self, texts: Sequence[str]) -> List[List[float]]:
|
||||
"""Internal helper performing the embedding API call."""
|
||||
clean_texts = [t if isinstance(t, str) else str(t) for t in texts]
|
||||
if not clean_texts:
|
||||
return []
|
||||
resp = self._client.embeddings.create(
|
||||
model=self._model,
|
||||
input=clean_texts,
|
||||
encoding_format=self._encoding_format,
|
||||
)
|
||||
return [d.embedding for d in resp.data]
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Return embedding for a given text."""
|
||||
embeddings = self._embed([text])
|
||||
return embeddings[0] if embeddings else []
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
"""Return embeddings for multiple documents (LangChain interface)."""
|
||||
return self._embed(texts)
|
||||
|
||||
|
||||
class MilvusRetriever(Retriever):
|
||||
"""Retriever implementation backed by a Milvus vector store.
|
||||
Responsibilities:
|
||||
* Initialize / lazily connect to Milvus (local Lite or remote server).
|
||||
* Provide methods for inserting content chunks & querying similarity.
|
||||
* Optionally surface example markdown resources found in the project.
|
||||
Environment variables (selected):
|
||||
MILVUS_URI: Connection URI or local *.db path for Milvus Lite.
|
||||
MILVUS_COLLECTION: Target collection name (default: documents).
|
||||
MILVUS_TOP_K: Result set size (default: 10).
|
||||
MILVUS_EMBEDDING_PROVIDER: openai | dashscope (default: openai).
|
||||
MILVUS_EMBEDDING_MODEL: Embedding model name.
|
||||
MILVUS_EMBEDDING_DIM: Override embedding dimensionality.
|
||||
MILVUS_AUTO_LOAD_EXAMPLES: Load example *.md files if true.
|
||||
MILVUS_EXAMPLES_DIR: Folder containing example markdown files.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
# --- Connection / collection configuration ---
|
||||
self.uri: str = get_str_env("MILVUS_URI", "http://localhost:19530")
|
||||
self.user: str = get_str_env("MILVUS_USER")
|
||||
self.password: str = get_str_env("MILVUS_PASSWORD")
|
||||
self.collection_name: str = get_str_env("MILVUS_COLLECTION", "documents")
|
||||
|
||||
# --- Search configuration ---
|
||||
top_k_raw = get_str_env("MILVUS_TOP_K", "10")
|
||||
self.top_k: int = int(top_k_raw) if top_k_raw.isdigit() else 10
|
||||
|
||||
# --- Vector field names ---
|
||||
self.vector_field: str = get_str_env("MILVUS_VECTOR_FIELD", "embedding")
|
||||
self.id_field: str = get_str_env("MILVUS_ID_FIELD", "id")
|
||||
self.content_field: str = get_str_env("MILVUS_CONTENT_FIELD", "content")
|
||||
self.title_field: str = get_str_env("MILVUS_TITLE_FIELD", "title")
|
||||
self.url_field: str = get_str_env("MILVUS_URL_FIELD", "url")
|
||||
self.metadata_field: str = get_str_env("MILVUS_METADATA_FIELD", "metadata")
|
||||
|
||||
# --- Embedding configuration ---
|
||||
self.embedding_model = get_str_env("MILVUS_EMBEDDING_MODEL")
|
||||
self.embedding_api_key = get_str_env("MILVUS_EMBEDDING_API_KEY")
|
||||
self.embedding_base_url = get_str_env("MILVUS_EMBEDDING_BASE_URL")
|
||||
self.embedding_dim: int = self._get_embedding_dimension(self.embedding_model)
|
||||
self.embedding_provider = get_str_env("MILVUS_EMBEDDING_PROVIDER", "openai")
|
||||
|
||||
# --- Examples / auto-load configuration ---
|
||||
self.auto_load_examples: bool = get_bool_env("MILVUS_AUTO_LOAD_EXAMPLES", True)
|
||||
self.examples_dir: str = get_str_env("MILVUS_EXAMPLES_DIR", "examples")
|
||||
# chunk size
|
||||
self.chunk_size: int = get_int_env("MILVUS_CHUNK_SIZE", 4000)
|
||||
|
||||
# --- Embedding model initialization ---
|
||||
self._init_embedding_model()
|
||||
|
||||
# Client (MilvusClient or LangchainMilvus) created lazily
|
||||
self.client: Any = None
|
||||
|
||||
def _init_embedding_model(self) -> None:
|
||||
"""Initialize the embedding model based on configuration."""
|
||||
kwargs = {
|
||||
"api_key": self.embedding_api_key,
|
||||
"model": self.embedding_model,
|
||||
"base_url": self.embedding_base_url,
|
||||
"encoding_format": "float",
|
||||
"dimensions": self.embedding_dim,
|
||||
}
|
||||
if self.embedding_provider.lower() == "openai":
|
||||
self.embedding_model = OpenAIEmbeddings(**kwargs)
|
||||
elif self.embedding_provider.lower() == "dashscope":
|
||||
self.embedding_model = DashscopeEmbeddings(**kwargs)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported embedding provider: {self.embedding_provider}. "
|
||||
"Supported providers: openai,dashscope"
|
||||
)
|
||||
|
||||
def _get_embedding_dimension(self, model_name: str) -> int:
|
||||
"""Return embedding dimension for the supplied model name."""
|
||||
# Common OpenAI embedding model dimensions
|
||||
embedding_dims = {
|
||||
"text-embedding-ada-002": 1536,
|
||||
"text-embedding-v4": 2048,
|
||||
}
|
||||
|
||||
# Check if user has explicitly set the dimension
|
||||
explicit_dim = get_int_env("MILVUS_EMBEDDING_DIM", 0)
|
||||
if explicit_dim > 0:
|
||||
return explicit_dim
|
||||
# Return the dimension for the specified model
|
||||
return embedding_dims.get(model_name, 1536) # Default to 1536
|
||||
|
||||
def _create_collection_schema(self) -> CollectionSchema:
|
||||
"""Build and return a Milvus ``CollectionSchema`` object with metadata field.
|
||||
Attempts to use a JSON field for metadata; falls back to VARCHAR if JSON
|
||||
type isn't supported in the deployment.
|
||||
"""
|
||||
fields = [
|
||||
FieldSchema(
|
||||
name=self.id_field,
|
||||
dtype=DataType.VARCHAR,
|
||||
max_length=512,
|
||||
is_primary=True,
|
||||
auto_id=False,
|
||||
),
|
||||
FieldSchema(
|
||||
name=self.vector_field,
|
||||
dtype=DataType.FLOAT_VECTOR,
|
||||
dim=self.embedding_dim,
|
||||
),
|
||||
FieldSchema(
|
||||
name=self.content_field, dtype=DataType.VARCHAR, max_length=65535
|
||||
),
|
||||
FieldSchema(name=self.title_field, dtype=DataType.VARCHAR, max_length=512),
|
||||
FieldSchema(name=self.url_field, dtype=DataType.VARCHAR, max_length=1024),
|
||||
]
|
||||
|
||||
schema = CollectionSchema(
|
||||
fields=fields,
|
||||
description=f"Collection for DeerFlow RAG documents: {self.collection_name}",
|
||||
enable_dynamic_field=True, # Allow additional dynamic metadata fields
|
||||
)
|
||||
return schema
|
||||
|
||||
def _ensure_collection_exists(self) -> None:
|
||||
"""Ensure the configured collection exists (create if missing).
|
||||
For Milvus Lite we create the collection manually; for the remote
|
||||
(LangChain) client we rely on LangChain's internal logic.
|
||||
"""
|
||||
if self._is_milvus_lite():
|
||||
# For Milvus Lite, use MilvusClient
|
||||
try:
|
||||
# Check if collection exists
|
||||
collections = self.client.list_collections()
|
||||
if self.collection_name not in collections:
|
||||
# Create collection
|
||||
schema = self._create_collection_schema()
|
||||
self.client.create_collection(
|
||||
collection_name=self.collection_name,
|
||||
schema=schema,
|
||||
index_params={
|
||||
"field_name": self.vector_field,
|
||||
"index_type": "IVF_FLAT",
|
||||
"metric_type": "IP",
|
||||
"params": {"nlist": 1024},
|
||||
},
|
||||
)
|
||||
logger.info("Created Milvus collection: %s", self.collection_name)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Could not ensure collection exists: %s", e)
|
||||
else:
|
||||
# For LangChain Milvus, collection creation is handled automatically
|
||||
logger.warning(
|
||||
"Could not ensure collection exists: %s", self.collection_name
|
||||
)
|
||||
|
||||
def _load_example_files(self) -> None:
|
||||
"""Load example markdown files into the collection (idempotent).
|
||||
Each markdown file is split into chunks and inserted only if a chunk
|
||||
with the derived document id hasn't been previously stored.
|
||||
"""
|
||||
try:
|
||||
# Get the project root directory
|
||||
current_file = Path(__file__)
|
||||
project_root = current_file.parent.parent.parent # Go up to project root
|
||||
examples_path = project_root / self.examples_dir
|
||||
|
||||
if not examples_path.exists():
|
||||
logger.info("Examples directory not found: %s", examples_path)
|
||||
return
|
||||
|
||||
logger.info("Loading example files from: %s", examples_path)
|
||||
|
||||
# Find all markdown files
|
||||
md_files = list(examples_path.glob("*.md"))
|
||||
if not md_files:
|
||||
logger.info("No markdown files found in examples directory")
|
||||
return
|
||||
# Check if files are already loaded
|
||||
existing_docs = self._get_existing_document_ids()
|
||||
loaded_count = 0
|
||||
for md_file in md_files:
|
||||
doc_id = self._generate_doc_id(md_file)
|
||||
|
||||
# Skip if already loaded
|
||||
if doc_id in existing_docs:
|
||||
continue
|
||||
try:
|
||||
# Read and process the file
|
||||
content = md_file.read_text(encoding="utf-8")
|
||||
title = self._extract_title_from_markdown(content, md_file.name)
|
||||
|
||||
# Split content into chunks if it's too long
|
||||
chunks = self._split_content(content)
|
||||
|
||||
# Insert each chunk
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunk_id = f"{doc_id}_chunk_{i}" if len(chunks) > 1 else doc_id
|
||||
self._insert_document_chunk(
|
||||
doc_id=chunk_id,
|
||||
content=chunk,
|
||||
title=title,
|
||||
url=f"milvus://{self.collection_name}/{md_file.name}",
|
||||
metadata={"source": "examples", "file": md_file.name},
|
||||
)
|
||||
|
||||
loaded_count += 1
|
||||
logger.debug("Loaded example markdown: %s", md_file.name)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Error loading %s: %s", md_file.name, e)
|
||||
|
||||
logger.info(
|
||||
"Successfully loaded %d example files into Milvus", loaded_count
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error loading example files: %s", e)
|
||||
|
||||
def _generate_doc_id(self, file_path: Path) -> str:
|
||||
"""Return a stable identifier derived from name, size & mtime hash."""
|
||||
# Use file name and size for a simple but effective ID
|
||||
file_stat = file_path.stat()
|
||||
content_hash = hashlib.md5(
|
||||
f"{file_path.name}_{file_stat.st_size}_{file_stat.st_mtime}".encode()
|
||||
).hexdigest()[:8]
|
||||
return f"example_{file_path.stem}_{content_hash}"
|
||||
|
||||
def _extract_title_from_markdown(self, content: str, filename: str) -> str:
|
||||
"""Extract the first level-1 heading; else derive from file name."""
|
||||
lines = content.split("\n")
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line.startswith("# "):
|
||||
return line[2:].strip()
|
||||
|
||||
# Fallback to filename without extension
|
||||
return filename.replace(".md", "").replace("_", " ").title()
|
||||
|
||||
def _split_content(self, content: str) -> List[str]:
|
||||
"""Split long markdown text into paragraph-based chunks."""
|
||||
if len(content) <= self.chunk_size:
|
||||
return [content]
|
||||
|
||||
chunks = []
|
||||
paragraphs = content.split("\n\n")
|
||||
current_chunk = ""
|
||||
|
||||
for paragraph in paragraphs:
|
||||
if len(current_chunk) + len(paragraph) <= self.chunk_size:
|
||||
current_chunk += paragraph + "\n\n"
|
||||
else:
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk.strip())
|
||||
current_chunk = paragraph + "\n\n"
|
||||
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk.strip())
|
||||
|
||||
return chunks
|
||||
|
||||
def _get_existing_document_ids(self) -> Set[str]:
|
||||
"""Return set of existing document identifiers in the collection."""
|
||||
try:
|
||||
if self._is_milvus_lite():
|
||||
results = self.client.query(
|
||||
collection_name=self.collection_name,
|
||||
filter="",
|
||||
output_fields=[self.id_field],
|
||||
limit=10000,
|
||||
)
|
||||
return {
|
||||
result.get(self.id_field, "")
|
||||
for result in results
|
||||
if result.get(self.id_field)
|
||||
}
|
||||
else:
|
||||
# For LangChain Milvus, we can't easily query all IDs
|
||||
# Return empty set to allow re-insertion (LangChain will handle duplicates)
|
||||
return set()
|
||||
except Exception:
|
||||
return set()
|
||||
|
||||
def _insert_document_chunk(
|
||||
self, doc_id: str, content: str, title: str, url: str, metadata: Dict[str, Any]
|
||||
) -> None:
|
||||
"""Insert a single content chunk into Milvus."""
|
||||
try:
|
||||
# Generate embedding
|
||||
embedding = self._get_embedding(content)
|
||||
|
||||
if self._is_milvus_lite():
|
||||
# For Milvus Lite, use MilvusClient
|
||||
data = [
|
||||
{
|
||||
self.id_field: doc_id,
|
||||
self.vector_field: embedding,
|
||||
self.content_field: content,
|
||||
self.title_field: title,
|
||||
self.url_field: url,
|
||||
**metadata, # Add metadata fields
|
||||
}
|
||||
]
|
||||
self.client.insert(collection_name=self.collection_name, data=data)
|
||||
else:
|
||||
# For LangChain Milvus, use add_texts
|
||||
self.client.add_texts(
|
||||
texts=[content],
|
||||
metadatas=[
|
||||
{
|
||||
self.id_field: doc_id,
|
||||
self.title_field: title,
|
||||
self.url_field: url,
|
||||
**metadata,
|
||||
}
|
||||
],
|
||||
)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to insert document chunk: {str(e)}")
|
||||
|
||||
def _connect(self) -> None:
|
||||
"""Create the underlying Milvus client (idempotent)."""
|
||||
try:
|
||||
# Check if using Milvus Lite (file-based) vs server-based Milvus
|
||||
if self._is_milvus_lite():
|
||||
# Use MilvusClient for Milvus Lite (local file database)
|
||||
self.client = MilvusClient(self.uri)
|
||||
# Ensure collection exists
|
||||
self._ensure_collection_exists()
|
||||
else:
|
||||
connection_args = {
|
||||
"uri": self.uri,
|
||||
}
|
||||
# Add user/password only if provided
|
||||
if self.user:
|
||||
connection_args["user"] = self.user
|
||||
if self.password:
|
||||
connection_args["password"] = self.password
|
||||
|
||||
# Create LangChain client (it will handle collection creation automatically)
|
||||
self.client = LangchainMilvus(
|
||||
embedding_function=self.embedding_model,
|
||||
collection_name=self.collection_name,
|
||||
connection_args=connection_args,
|
||||
# optional (if collection already exists with different schema, be careful)
|
||||
drop_old=False,
|
||||
)
|
||||
except Exception as e:
|
||||
raise ConnectionError(f"Failed to connect to Milvus: {str(e)}")
|
||||
|
||||
def _is_milvus_lite(self) -> bool:
|
||||
"""Return True if the URI points to a local Milvus Lite file.
|
||||
Milvus Lite uses local file paths (often ``*.db``) without an HTTP/HTTPS
|
||||
scheme. We treat any path not containing a protocol and not starting
|
||||
with an HTTP(S) prefix as a Lite instance.
|
||||
"""
|
||||
return self.uri.endswith(".db") or (
|
||||
not self.uri.startswith(("http://", "https://")) and "://" not in self.uri
|
||||
)
|
||||
|
||||
def _get_embedding(self, text: str) -> List[float]:
|
||||
"""Return embedding for a given text."""
|
||||
try:
|
||||
# Validate input
|
||||
if not isinstance(text, str):
|
||||
raise ValueError(f"Text must be a string, got {type(text)}")
|
||||
|
||||
if not text.strip():
|
||||
raise ValueError("Text cannot be empty or only whitespace")
|
||||
# Unified embedding interface (OpenAIEmbeddings or DashscopeEmbeddings wrapper)
|
||||
embeddings = self.embedding_model.embed_query(text=text.strip())
|
||||
|
||||
# Validate output
|
||||
if not isinstance(embeddings, list) or not embeddings:
|
||||
raise ValueError(f"Invalid embedding format: {type(embeddings)}")
|
||||
|
||||
return embeddings
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to generate embedding: {str(e)}")
|
||||
|
||||
def list_resources(self, query: Optional[str] = None) -> List[Resource]:
|
||||
"""List available resource summaries.
|
||||
|
||||
Strategy:
|
||||
1. If connected to Milvus Lite: query stored document metadata.
|
||||
2. If LangChain client: perform a lightweight similarity search
|
||||
using either the provided ``query`` or a zero vector to fetch
|
||||
candidate docs (mocked in tests).
|
||||
3. Append local markdown example titles (non-ingested) for user
|
||||
discoverability.
|
||||
|
||||
Args:
|
||||
query: Optional search text to bias resource ordering.
|
||||
|
||||
Returns:
|
||||
List of ``Resource`` objects.
|
||||
"""
|
||||
resources: List[Resource] = []
|
||||
|
||||
# Ensure connection established
|
||||
if not self.client:
|
||||
try:
|
||||
self._connect()
|
||||
except Exception:
|
||||
# Fall back to only local examples if connection fails
|
||||
return self._list_local_markdown_resources()
|
||||
|
||||
try:
|
||||
if self._is_milvus_lite():
|
||||
# Query limited metadata. Empty filter returns up to limit docs.
|
||||
results = self.client.query(
|
||||
collection_name=self.collection_name,
|
||||
filter="source == 'examples'",
|
||||
output_fields=[self.id_field, self.title_field, self.url_field],
|
||||
limit=100,
|
||||
)
|
||||
for r in results:
|
||||
resources.append(
|
||||
Resource(
|
||||
uri=r.get(self.url_field, "")
|
||||
or f"milvus://{r.get(self.id_field,'')}",
|
||||
title=r.get(self.title_field, "")
|
||||
or r.get(self.id_field, "Unnamed"),
|
||||
description="Stored Milvus document",
|
||||
)
|
||||
)
|
||||
else:
|
||||
# Use similarity_search_by_vector for lightweight listing.
|
||||
# If a query is provided embed it; else use a zero vector.
|
||||
docs: Iterable[Any] = self.client.similarity_search(
|
||||
query, k=100, expr="source == 'examples'" # Limit to 100 results
|
||||
)
|
||||
for d in docs:
|
||||
meta = getattr(d, "metadata", {}) or {}
|
||||
# check if the resource is in the list of resources
|
||||
if resources and any(
|
||||
r.uri == meta.get(self.url_field, "")
|
||||
or r.uri == f"milvus://{meta.get(self.id_field,'')}"
|
||||
for r in resources
|
||||
):
|
||||
continue
|
||||
resources.append(
|
||||
Resource(
|
||||
uri=meta.get(self.url_field, "")
|
||||
or f"milvus://{meta.get(self.id_field,'')}",
|
||||
title=meta.get(self.title_field, "")
|
||||
or meta.get(self.id_field, "Unnamed"),
|
||||
description="Stored Milvus document",
|
||||
)
|
||||
)
|
||||
logger.info(
|
||||
"Succeed listed %d resources from Milvus collection: %s",
|
||||
len(resources),
|
||||
self.collection_name,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to query Milvus for resources, falling back to local examples."
|
||||
)
|
||||
# Fall back to only local examples if connection fails
|
||||
return self._list_local_markdown_resources()
|
||||
return resources
|
||||
|
||||
def _list_local_markdown_resources(self) -> List[Resource]:
|
||||
"""Return local example markdown files as ``Resource`` objects.
|
||||
|
||||
These are surfaced even when not ingested so users can choose to load
|
||||
them. Controlled by directory presence only (lightweight)."""
|
||||
current_file = Path(__file__)
|
||||
project_root = current_file.parent.parent.parent # up to project root
|
||||
examples_path = project_root / self.examples_dir
|
||||
if not examples_path.exists():
|
||||
return []
|
||||
|
||||
md_files = list(examples_path.glob("*.md"))
|
||||
resources: list[Resource] = []
|
||||
for md_file in md_files:
|
||||
try:
|
||||
content = md_file.read_text(encoding="utf-8", errors="ignore")
|
||||
title = self._extract_title_from_markdown(content, md_file.name)
|
||||
uri = f"milvus://{self.collection_name}/{md_file.name}"
|
||||
resources.append(
|
||||
Resource(
|
||||
uri=uri,
|
||||
title=title,
|
||||
description="Local markdown example (not yet ingested)",
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
return resources
|
||||
|
||||
def query_relevant_documents(
|
||||
self, query: str, resources: Optional[List[Resource]] = None
|
||||
) -> List[Document]:
|
||||
"""Perform vector similarity search returning rich ``Document`` objects.
|
||||
|
||||
Args:
|
||||
query: Natural language query string.
|
||||
resources: Optional subset filter of ``Resource`` objects; if
|
||||
provided, only documents whose id/url appear in the list will
|
||||
be included.
|
||||
|
||||
Returns:
|
||||
List of aggregated ``Document`` objects; each contains one or more
|
||||
``Chunk`` instances (one per matched piece of content).
|
||||
|
||||
Raises:
|
||||
RuntimeError: On underlying search errors.
|
||||
"""
|
||||
resources = resources or []
|
||||
try:
|
||||
if not self.client:
|
||||
self._connect()
|
||||
|
||||
# Get embeddings for the query
|
||||
query_embedding = self._get_embedding(query)
|
||||
|
||||
# For Milvus Lite, use MilvusClient directly
|
||||
if self._is_milvus_lite():
|
||||
# Perform vector search
|
||||
search_results = self.client.search(
|
||||
collection_name=self.collection_name,
|
||||
data=[query_embedding],
|
||||
anns_field=self.vector_field,
|
||||
param={"metric_type": "IP", "params": {"nprobe": 10}},
|
||||
limit=self.top_k,
|
||||
output_fields=[
|
||||
self.id_field,
|
||||
self.content_field,
|
||||
self.title_field,
|
||||
self.url_field,
|
||||
],
|
||||
)
|
||||
|
||||
documents = {}
|
||||
|
||||
for result_list in search_results:
|
||||
for result in result_list:
|
||||
entity = result.get("entity", {})
|
||||
doc_id = entity.get(self.id_field, "")
|
||||
content = entity.get(self.content_field, "")
|
||||
title = entity.get(self.title_field, "")
|
||||
url = entity.get(self.url_field, "")
|
||||
score = result.get("distance", 0.0)
|
||||
|
||||
# Skip if resource filtering is requested and this doc is not in the list
|
||||
if resources:
|
||||
doc_in_resources = False
|
||||
for resource in resources:
|
||||
if (
|
||||
url and url in resource.uri
|
||||
) or doc_id in resource.uri:
|
||||
doc_in_resources = True
|
||||
break
|
||||
if not doc_in_resources:
|
||||
continue
|
||||
|
||||
# Create or update document
|
||||
if doc_id not in documents:
|
||||
documents[doc_id] = Document(
|
||||
id=doc_id, url=url, title=title, chunks=[]
|
||||
)
|
||||
|
||||
# Add chunk to document
|
||||
chunk = Chunk(content=content, similarity=score)
|
||||
documents[doc_id].chunks.append(chunk)
|
||||
|
||||
return list(documents.values())
|
||||
|
||||
else:
|
||||
# For LangChain Milvus, use similarity search
|
||||
search_results = self.client.similarity_search_with_score(
|
||||
query=query, k=self.top_k
|
||||
)
|
||||
|
||||
documents = {}
|
||||
|
||||
for doc, score in search_results:
|
||||
metadata = doc.metadata or {}
|
||||
doc_id = metadata.get(self.id_field, "")
|
||||
title = metadata.get(self.title_field, "")
|
||||
url = metadata.get(self.url_field, "")
|
||||
content = doc.page_content
|
||||
|
||||
# Skip if resource filtering is requested and this doc is not in the list
|
||||
if resources:
|
||||
doc_in_resources = False
|
||||
for resource in resources:
|
||||
if (url and url in resource.uri) or doc_id in resource.uri:
|
||||
doc_in_resources = True
|
||||
break
|
||||
if not doc_in_resources:
|
||||
continue
|
||||
|
||||
# Create or update document
|
||||
if doc_id not in documents:
|
||||
documents[doc_id] = Document(
|
||||
id=doc_id, url=url, title=title, chunks=[]
|
||||
)
|
||||
|
||||
# Add chunk to document
|
||||
chunk = Chunk(content=content, similarity=score)
|
||||
documents[doc_id].chunks.append(chunk)
|
||||
|
||||
return list(documents.values())
|
||||
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to query documents from Milvus: {str(e)}")
|
||||
|
||||
def create_collection(self) -> None:
|
||||
"""Public hook ensuring collection exists (explicit initialization)."""
|
||||
if not self.client:
|
||||
self._connect()
|
||||
else:
|
||||
# If we're using Milvus Lite, ensure collection exists
|
||||
if self._is_milvus_lite():
|
||||
self._ensure_collection_exists()
|
||||
|
||||
def load_examples(self, force_reload: bool = False) -> None:
|
||||
"""Load example markdown files, optionally clearing existing ones.
|
||||
|
||||
Args:
|
||||
force_reload: If True existing example documents are deleted first.
|
||||
"""
|
||||
if not self.client:
|
||||
self._connect()
|
||||
|
||||
if force_reload:
|
||||
# Clear existing examples
|
||||
self._clear_example_documents()
|
||||
|
||||
self._load_example_files()
|
||||
|
||||
def _clear_example_documents(self) -> None:
|
||||
"""Delete previously ingested example documents (Milvus Lite only)."""
|
||||
try:
|
||||
if self._is_milvus_lite():
|
||||
# For Milvus Lite, delete documents with source='examples'
|
||||
# Note: Milvus doesn't support direct delete by filter in all versions
|
||||
# So we'll query and delete by IDs
|
||||
results = self.client.query(
|
||||
collection_name=self.collection_name,
|
||||
filter="source == 'examples'",
|
||||
output_fields=[self.id_field],
|
||||
limit=10000,
|
||||
)
|
||||
|
||||
if results:
|
||||
doc_ids = [result[self.id_field] for result in results]
|
||||
self.client.delete(
|
||||
collection_name=self.collection_name, ids=doc_ids
|
||||
)
|
||||
logger.info("Cleared %d existing example documents", len(doc_ids))
|
||||
else:
|
||||
# For LangChain Milvus, we can't easily delete by metadata
|
||||
logger.info(
|
||||
"Clearing existing examples not supported for LangChain Milvus client"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Could not clear existing examples: %s", e)
|
||||
|
||||
def get_loaded_examples(self) -> List[Dict[str, str]]:
|
||||
"""Return metadata for previously ingested example documents."""
|
||||
try:
|
||||
if not self.client:
|
||||
self._connect()
|
||||
|
||||
if self._is_milvus_lite():
|
||||
results = self.client.query(
|
||||
collection_name=self.collection_name,
|
||||
filter="source == 'examples'",
|
||||
output_fields=[
|
||||
self.id_field,
|
||||
self.title_field,
|
||||
self.url_field,
|
||||
"source",
|
||||
"file",
|
||||
],
|
||||
limit=1000,
|
||||
)
|
||||
|
||||
examples = []
|
||||
for result in results:
|
||||
examples.append(
|
||||
{
|
||||
"id": result.get(self.id_field, ""),
|
||||
"title": result.get(self.title_field, ""),
|
||||
"file": result.get("file", ""),
|
||||
"url": result.get(self.url_field, ""),
|
||||
}
|
||||
)
|
||||
|
||||
return examples
|
||||
else:
|
||||
# For LangChain Milvus, we can't easily filter by metadata
|
||||
logger.info(
|
||||
"Getting loaded examples not supported for LangChain Milvus client"
|
||||
)
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error getting loaded examples: %s", e)
|
||||
return []
|
||||
|
||||
def close(self) -> None:
|
||||
"""Release underlying client resources (idempotent)."""
|
||||
if hasattr(self, "client") and self.client:
|
||||
try:
|
||||
# For Milvus Lite (MilvusClient), close the connection
|
||||
if self._is_milvus_lite() and hasattr(self.client, "close"):
|
||||
self.client.close()
|
||||
# For LangChain Milvus, no explicit close method needed
|
||||
self.client = None
|
||||
except Exception:
|
||||
# Ignore errors during cleanup
|
||||
pass
|
||||
|
||||
def __del__(self) -> None: # pragma: no cover - best-effort cleanup
|
||||
"""Best-effort cleanup when instance is garbage collected."""
|
||||
self.close()
|
||||
|
||||
|
||||
# Backwards compatibility export (original class name kept for external imports)
|
||||
class MilvusProvider(MilvusRetriever):
|
||||
"""Backward compatible alias for ``MilvusRetriever`` (original name)."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def load_examples() -> None:
|
||||
auto_load_examples = get_bool_env("MILVUS_AUTO_LOAD_EXAMPLES", False)
|
||||
rag_provider = get_str_env("RAG_PROVIDER", "")
|
||||
if rag_provider == "milvus" and auto_load_examples:
|
||||
provider = MilvusProvider()
|
||||
provider.load_examples()
|
||||
@@ -17,7 +17,8 @@ from langgraph.checkpoint.mongodb import AsyncMongoDBSaver
|
||||
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
|
||||
from psycopg_pool import AsyncConnectionPool
|
||||
|
||||
from src.config.configuration import get_recursion_limit, get_bool_env, get_str_env
|
||||
from src.config.configuration import get_recursion_limit
|
||||
from src.config.loader import get_bool_env, get_str_env
|
||||
from src.config.report_style import ReportStyle
|
||||
from src.config.tools import SELECTED_RAG_PROVIDER
|
||||
from src.graph.builder import build_graph_with_memory
|
||||
@@ -27,6 +28,7 @@ from src.ppt.graph.builder import build_graph as build_ppt_graph
|
||||
from src.prompt_enhancer.graph.builder import build_graph as build_prompt_enhancer_graph
|
||||
from src.prose.graph.builder import build_graph as build_prose_graph
|
||||
from src.rag.builder import build_retriever
|
||||
from src.rag.milvus import load_examples
|
||||
from src.rag.retriever import Resource
|
||||
from src.server.chat_request import (
|
||||
ChatRequest,
|
||||
@@ -73,6 +75,10 @@ app.add_middleware(
|
||||
allow_methods=["GET", "POST", "OPTIONS"], # Use the configured list of methods
|
||||
allow_headers=["*"], # Now allow all headers, but can be restricted further
|
||||
)
|
||||
|
||||
# Load examples into Milvus if configured
|
||||
load_examples()
|
||||
|
||||
in_memory_store = InMemoryStore()
|
||||
graph = build_graph_with_memory()
|
||||
|
||||
|
||||
824
tests/unit/rag/test_milvus.py
Normal file
824
tests/unit/rag/test_milvus.py
Normal file
@@ -0,0 +1,824 @@
|
||||
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from __future__ import annotations
|
||||
from uuid import uuid4
|
||||
from types import SimpleNamespace
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
import src.rag.milvus as milvus_mod
|
||||
from src.rag.milvus import MilvusProvider
|
||||
from src.rag.retriever import Resource
|
||||
|
||||
|
||||
class DummyEmbedding:
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
|
||||
def embed_query(self, text: str):
|
||||
return [0.1, 0.2, 0.3]
|
||||
|
||||
def embed_documents(self, texts):
|
||||
return [[0.1, 0.2, 0.3] for _ in texts]
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def patch_embeddings(monkeypatch):
|
||||
# Prevent network / external API usage during __init__
|
||||
monkeypatch.setenv("MILVUS_EMBEDDING_PROVIDER", "openai")
|
||||
monkeypatch.setenv("MILVUS_EMBEDDING_MODEL", "text-embedding-ada-002")
|
||||
monkeypatch.setenv("MILVUS_COLLECTION", "documents")
|
||||
monkeypatch.setenv("MILVUS_URI", "./milvus_demo.db") # default lite
|
||||
monkeypatch.setattr(milvus_mod, "OpenAIEmbeddings", DummyEmbedding)
|
||||
monkeypatch.setattr(milvus_mod, "DashscopeEmbeddings", DummyEmbedding)
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def project_root():
|
||||
# Mirror logic from implementation: current_file.parent.parent.parent
|
||||
return Path(milvus_mod.__file__).parent.parent.parent
|
||||
|
||||
|
||||
def _patch_init(monkeypatch):
|
||||
"""Patch retriever initialization to use dummy embedding model."""
|
||||
monkeypatch.setattr(
|
||||
MilvusProvider,
|
||||
"_init_embedding_model",
|
||||
lambda self: setattr(self, "embedding_model", DummyEmbedding()),
|
||||
)
|
||||
|
||||
|
||||
def test_list_local_markdown_resources_missing_dir(project_root):
|
||||
retriever = MilvusProvider()
|
||||
# Point to a non-existent examples dir
|
||||
retriever.examples_dir = f"missing_examples_{uuid4().hex}"
|
||||
resources = retriever._list_local_markdown_resources()
|
||||
assert resources == []
|
||||
|
||||
|
||||
def test_list_local_markdown_resources_populated(project_root):
|
||||
retriever = MilvusProvider()
|
||||
examples_dir = f"examples_test_{uuid4().hex}"
|
||||
retriever.examples_dir = examples_dir
|
||||
target_dir = project_root / examples_dir
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# File with heading
|
||||
(target_dir / "file1.md").write_text(
|
||||
"# Title One\n\nContent body.", encoding="utf-8"
|
||||
)
|
||||
# File without heading -> fallback title
|
||||
(target_dir / "file_two.md").write_text("No heading here.", encoding="utf-8")
|
||||
# Non-markdown file should be ignored
|
||||
(target_dir / "ignore.txt").write_text("Should not be picked up.", encoding="utf-8")
|
||||
|
||||
resources = retriever._list_local_markdown_resources()
|
||||
# Order not guaranteed; sort by uri for assertions
|
||||
resources.sort(key=lambda r: r.uri)
|
||||
|
||||
# Expect two resources
|
||||
assert len(resources) == 2
|
||||
uris = {r.uri for r in resources}
|
||||
assert uris == {
|
||||
f"milvus://{retriever.collection_name}/file1.md",
|
||||
f"milvus://{retriever.collection_name}/file_two.md",
|
||||
}
|
||||
|
||||
res_map = {r.uri: r for r in resources}
|
||||
r1 = res_map[f"milvus://{retriever.collection_name}/file1.md"]
|
||||
assert isinstance(r1, Resource)
|
||||
assert r1.title == "Title One"
|
||||
assert r1.description == "Local markdown example (not yet ingested)"
|
||||
|
||||
r2 = res_map[f"milvus://{retriever.collection_name}/file_two.md"]
|
||||
# Fallback logic: filename -> "file_two" -> "file two" -> title case -> "File Two"
|
||||
assert r2.title == "File Two"
|
||||
assert r2.description == "Local markdown example (not yet ingested)"
|
||||
|
||||
|
||||
def test_list_local_markdown_resources_read_error(monkeypatch, project_root):
|
||||
retriever = MilvusProvider()
|
||||
examples_dir = f"examples_error_{uuid4().hex}"
|
||||
retriever.examples_dir = examples_dir
|
||||
target_dir = project_root / examples_dir
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
bad_file = target_dir / "bad.md"
|
||||
good_file = target_dir / "good.md"
|
||||
good_file.write_text("# Good Title\n\nBody.", encoding="utf-8")
|
||||
bad_file.write_text("Broken", encoding="utf-8")
|
||||
|
||||
# Patch Path.read_text to raise for bad.md only
|
||||
original_read_text = Path.read_text
|
||||
|
||||
def fake_read_text(self, *args, **kwargs):
|
||||
if self == bad_file:
|
||||
raise OSError("Cannot read file")
|
||||
return original_read_text(self, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(Path, "read_text", fake_read_text)
|
||||
|
||||
resources = retriever._list_local_markdown_resources()
|
||||
# Only good.md should appear
|
||||
assert len(resources) == 1
|
||||
r = resources[0]
|
||||
assert r.title == "Good Title"
|
||||
assert r.uri == f"milvus://{retriever.collection_name}/good.md"
|
||||
|
||||
|
||||
def test_create_collection_schema_fields(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
schema = retriever._create_collection_schema()
|
||||
field_names = {f.name for f in schema.fields}
|
||||
# Core fields must be present
|
||||
assert {
|
||||
retriever.id_field,
|
||||
retriever.vector_field,
|
||||
retriever.content_field,
|
||||
} <= field_names
|
||||
# Dynamic field enabled for extra metadata
|
||||
assert schema.enable_dynamic_field is True
|
||||
|
||||
|
||||
def test_generate_doc_id_stable(monkeypatch, tmp_path):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
test_file = tmp_path / "example.md"
|
||||
test_file.write_text("# Title\nBody", encoding="utf-8")
|
||||
doc_id1 = retriever._generate_doc_id(test_file)
|
||||
doc_id2 = retriever._generate_doc_id(test_file)
|
||||
assert doc_id1 == doc_id2 # deterministic given unchanged file metadata
|
||||
|
||||
|
||||
def test_extract_title_from_markdown(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
heading = retriever._extract_title_from_markdown("# Heading\nBody", "ignored.md")
|
||||
assert heading == "Heading"
|
||||
fallback = retriever._extract_title_from_markdown("Body only", "my_file_name.md")
|
||||
assert fallback == "My File Name"
|
||||
|
||||
|
||||
def test_split_content_chunking(monkeypatch):
|
||||
monkeypatch.setenv("MILVUS_CHUNK_SIZE", "40") # small to force split
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
long_content = (
|
||||
"Para1 text here.\n\nPara2 second block.\n\nPara3 final." # 3 paragraphs
|
||||
)
|
||||
chunks = retriever._split_content(long_content)
|
||||
assert len(chunks) >= 2 # forced split
|
||||
assert all(chunks) # no empty chunks
|
||||
|
||||
|
||||
def test_get_embedding_invalid_inputs(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
# Non-string value
|
||||
with pytest.raises(RuntimeError):
|
||||
retriever._get_embedding(123) # type: ignore[arg-type]
|
||||
# Whitespace only
|
||||
with pytest.raises(RuntimeError):
|
||||
retriever._get_embedding(" ")
|
||||
|
||||
|
||||
def test_list_resources_remote_success_and_dedup(monkeypatch):
|
||||
monkeypatch.setenv("MILVUS_URI", "http://remote")
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
|
||||
class DocObj:
|
||||
def __init__(self, content: str, meta: dict):
|
||||
self.page_content = content
|
||||
self.metadata = meta
|
||||
|
||||
calls = {"similarity_search": 0}
|
||||
|
||||
class RemoteClient:
|
||||
def similarity_search(self, query, k, expr): # noqa: D401
|
||||
calls["similarity_search"] += 1
|
||||
# Two docs with identical id to test dedup
|
||||
meta1 = {
|
||||
retriever.id_field: "d1",
|
||||
retriever.title_field: "T1",
|
||||
retriever.url_field: "u1",
|
||||
}
|
||||
meta2 = {
|
||||
retriever.id_field: "d1",
|
||||
retriever.title_field: "T1_dup",
|
||||
retriever.url_field: "u1",
|
||||
}
|
||||
return [DocObj("c1", meta1), DocObj("c1_dup", meta2)]
|
||||
|
||||
retriever.client = RemoteClient()
|
||||
resources = retriever.list_resources("query text")
|
||||
assert len(resources) == 1 # dedup applied
|
||||
assert resources[0].title.startswith("T1")
|
||||
assert calls["similarity_search"] == 1
|
||||
|
||||
|
||||
def test_list_resources_lite_success(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
|
||||
class DummyMilvusLite:
|
||||
def query(self, collection_name, filter, output_fields, limit): # noqa: D401
|
||||
return [
|
||||
{
|
||||
retriever.id_field: "idA",
|
||||
retriever.title_field: "Alpha",
|
||||
retriever.url_field: "u://a",
|
||||
},
|
||||
{
|
||||
retriever.id_field: "idB",
|
||||
retriever.title_field: "Beta",
|
||||
retriever.url_field: "u://b",
|
||||
},
|
||||
]
|
||||
|
||||
retriever.client = DummyMilvusLite()
|
||||
resources = retriever.list_resources()
|
||||
assert {r.title for r in resources} == {"Alpha", "Beta"}
|
||||
|
||||
|
||||
def test_query_relevant_documents_lite_success(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
|
||||
# Provide deterministic embedding output
|
||||
retriever.embedding_model.embed_query = lambda text: [0.1, 0.2, 0.3] # type: ignore
|
||||
|
||||
class DummyMilvusLite:
|
||||
def search(
|
||||
self, collection_name, data, anns_field, param, limit, output_fields
|
||||
): # noqa: D401
|
||||
# Simulate two result entries
|
||||
return [
|
||||
[
|
||||
{
|
||||
"entity": {
|
||||
retriever.id_field: "d1",
|
||||
retriever.content_field: "c1",
|
||||
retriever.title_field: "T1",
|
||||
retriever.url_field: "u1",
|
||||
},
|
||||
"distance": 0.9,
|
||||
},
|
||||
{
|
||||
"entity": {
|
||||
retriever.id_field: "d2",
|
||||
retriever.content_field: "c2",
|
||||
retriever.title_field: "T2",
|
||||
retriever.url_field: "u2",
|
||||
},
|
||||
"distance": 0.8,
|
||||
},
|
||||
]
|
||||
]
|
||||
|
||||
retriever.client = DummyMilvusLite()
|
||||
# Filter for only d2 via resource list
|
||||
docs = retriever.query_relevant_documents(
|
||||
"question", resources=[Resource(uri="milvus://d2", title="", description="")]
|
||||
)
|
||||
assert len(docs) == 1 and docs[0].id == "d2" and docs[0].chunks[0].similarity == 0.8
|
||||
|
||||
|
||||
def test_query_relevant_documents_remote_success(monkeypatch):
|
||||
monkeypatch.setenv("MILVUS_URI", "http://remote")
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
retriever.embedding_model.embed_query = lambda text: [0.1, 0.2, 0.3] # type: ignore
|
||||
|
||||
class DocObj:
|
||||
def __init__(self, content: str, meta: dict): # noqa: D401
|
||||
self.page_content = content
|
||||
self.metadata = meta
|
||||
|
||||
class RemoteClient:
|
||||
def similarity_search_with_score(self, query, k): # noqa: D401
|
||||
return [
|
||||
(
|
||||
DocObj(
|
||||
"c1",
|
||||
{
|
||||
retriever.id_field: "d1",
|
||||
retriever.title_field: "T1",
|
||||
retriever.url_field: "u1",
|
||||
},
|
||||
),
|
||||
0.7,
|
||||
),
|
||||
(
|
||||
DocObj(
|
||||
"c2",
|
||||
{
|
||||
retriever.id_field: "d2",
|
||||
retriever.title_field: "T2",
|
||||
retriever.url_field: "u2",
|
||||
},
|
||||
),
|
||||
0.6,
|
||||
),
|
||||
]
|
||||
|
||||
retriever.client = RemoteClient()
|
||||
# Filter to only d1
|
||||
docs = retriever.query_relevant_documents(
|
||||
"q", resources=[Resource(uri="milvus://d1", title="", description="")]
|
||||
)
|
||||
assert len(docs) == 1 and docs[0].id == "d1" and docs[0].chunks[0].similarity == 0.7
|
||||
|
||||
|
||||
def test_get_embedding_dimension_explicit(monkeypatch):
|
||||
monkeypatch.setenv("MILVUS_EMBEDDING_DIM", "777")
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
assert retriever.embedding_dim == 777
|
||||
|
||||
|
||||
def test_get_embedding_dimension_unknown_model(monkeypatch):
|
||||
monkeypatch.delenv("MILVUS_EMBEDDING_DIM", raising=False)
|
||||
monkeypatch.setenv("MILVUS_EMBEDDING_MODEL", "unknown-model-x")
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
# falls back to default 1536
|
||||
assert retriever.embedding_dim == 1536
|
||||
|
||||
|
||||
def test_is_milvus_lite_variants(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
monkeypatch.setenv("MILVUS_URI", "mydb.db")
|
||||
assert MilvusProvider()._is_milvus_lite() is True
|
||||
monkeypatch.setenv("MILVUS_URI", "relative_path_store")
|
||||
assert MilvusProvider()._is_milvus_lite() is True
|
||||
monkeypatch.setenv("MILVUS_URI", "http://host:19530")
|
||||
assert MilvusProvider()._is_milvus_lite() is False
|
||||
|
||||
|
||||
def test_create_collection_lite(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
created: dict = {}
|
||||
|
||||
class DummyMilvusLite:
|
||||
def list_collections(self): # noqa: D401
|
||||
return [] # empty triggers creation
|
||||
|
||||
def create_collection(
|
||||
self, collection_name, schema, index_params
|
||||
): # noqa: D401
|
||||
created["name"] = collection_name
|
||||
created["schema"] = schema
|
||||
created["index"] = index_params
|
||||
|
||||
retriever.client = DummyMilvusLite()
|
||||
retriever._ensure_collection_exists()
|
||||
assert created["name"] == retriever.collection_name
|
||||
|
||||
|
||||
def test_ensure_collection_exists_remote(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
monkeypatch.setenv("MILVUS_URI", "http://remote:19530")
|
||||
retriever = MilvusProvider()
|
||||
# remote path, nothing thrown
|
||||
retriever.client = SimpleNamespace()
|
||||
retriever._ensure_collection_exists()
|
||||
|
||||
|
||||
def test_get_existing_document_ids_lite(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
|
||||
class DummyMilvusLite:
|
||||
def query(self, collection_name, filter, output_fields, limit): # noqa: D401
|
||||
return [
|
||||
{retriever.id_field: "a"},
|
||||
{retriever.id_field: "b"},
|
||||
{"other": "ignored"},
|
||||
]
|
||||
|
||||
retriever.client = DummyMilvusLite()
|
||||
assert retriever._get_existing_document_ids() == {"a", "b"}
|
||||
|
||||
|
||||
def test_get_existing_document_ids_remote(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
monkeypatch.setenv("MILVUS_URI", "http://x")
|
||||
retriever = MilvusProvider()
|
||||
retriever.client = object()
|
||||
assert retriever._get_existing_document_ids() == set()
|
||||
|
||||
|
||||
def test_insert_document_chunk_lite_and_error(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
|
||||
captured = {}
|
||||
|
||||
class DummyMilvusLite:
|
||||
def insert(self, collection_name, data): # noqa: D401
|
||||
captured["data"] = data
|
||||
|
||||
retriever.client = DummyMilvusLite()
|
||||
retriever._insert_document_chunk(
|
||||
doc_id="id1", content="hello", title="T", url="u", metadata={"m": 1}
|
||||
)
|
||||
assert captured["data"][0][retriever.id_field] == "id1"
|
||||
|
||||
# error path: patch embedding to raise
|
||||
def bad_embed(text): # noqa: D401
|
||||
raise RuntimeError("boom")
|
||||
|
||||
retriever.embedding_model.embed_query = bad_embed # type: ignore[attr-defined]
|
||||
with pytest.raises(RuntimeError):
|
||||
retriever._insert_document_chunk(
|
||||
doc_id="id2", content="err", title="T", url="u", metadata={}
|
||||
)
|
||||
|
||||
|
||||
def test_insert_document_chunk_remote(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
monkeypatch.setenv("MILVUS_URI", "http://remote")
|
||||
retriever = MilvusProvider()
|
||||
added = {}
|
||||
|
||||
class RemoteClient:
|
||||
def add_texts(self, texts, metadatas): # noqa: D401
|
||||
added["texts"] = texts
|
||||
added["meta"] = metadatas
|
||||
|
||||
retriever.client = RemoteClient()
|
||||
retriever._insert_document_chunk(
|
||||
doc_id="idx", content="ct", title="Title", url="urlx", metadata={"k": 2}
|
||||
)
|
||||
assert added["meta"][0][retriever.id_field] == "idx"
|
||||
|
||||
|
||||
def test_connect_lite_and_error(monkeypatch):
|
||||
# patch MilvusClient to a dummy
|
||||
class FakeMilvusClient:
|
||||
def __init__(self, uri): # noqa: D401
|
||||
self.uri = uri
|
||||
|
||||
def list_collections(self): # noqa: D401
|
||||
return []
|
||||
|
||||
def create_collection(self, **kwargs): # noqa: D401
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(milvus_mod, "MilvusClient", FakeMilvusClient)
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
retriever._connect()
|
||||
assert isinstance(retriever.client, FakeMilvusClient)
|
||||
|
||||
# error path: patch MilvusClient to raise
|
||||
class BadClient:
|
||||
def __init__(self, uri): # noqa: D401
|
||||
raise RuntimeError("fail connect")
|
||||
|
||||
monkeypatch.setattr(milvus_mod, "MilvusClient", BadClient)
|
||||
retriever2 = MilvusProvider()
|
||||
with pytest.raises(ConnectionError):
|
||||
retriever2._connect()
|
||||
|
||||
|
||||
def test_connect_remote(monkeypatch):
|
||||
monkeypatch.setenv("MILVUS_URI", "http://remote")
|
||||
_patch_init(monkeypatch)
|
||||
created = {}
|
||||
|
||||
class FakeLangchainMilvus:
|
||||
def __init__(self, **kwargs): # noqa: D401
|
||||
created.update(kwargs)
|
||||
|
||||
monkeypatch.setattr(milvus_mod, "LangchainMilvus", FakeLangchainMilvus)
|
||||
retriever = MilvusProvider()
|
||||
retriever._connect()
|
||||
assert created["collection_name"] == retriever.collection_name
|
||||
|
||||
|
||||
def test_list_resources_remote_failure(monkeypatch):
|
||||
monkeypatch.setenv("MILVUS_URI", "http://remote")
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
|
||||
# Provide minimal working local examples dir (none -> returns [])
|
||||
monkeypatch.setattr(retriever, "_list_local_markdown_resources", lambda: [])
|
||||
|
||||
# patch client to raise inside similarity_search to trigger fallback path
|
||||
class BadClient:
|
||||
def similarity_search(self, *args, **kwargs): # noqa: D401
|
||||
raise RuntimeError("fail")
|
||||
|
||||
retriever.client = BadClient()
|
||||
# Should fallback to [] without raising
|
||||
assert retriever.list_resources() == []
|
||||
|
||||
|
||||
def test_list_local_markdown_resources_empty(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
monkeypatch.setenv("MILVUS_EXAMPLES_DIR", "nonexistent_dir")
|
||||
retriever.examples_dir = "nonexistent_dir"
|
||||
assert retriever._list_local_markdown_resources() == []
|
||||
|
||||
|
||||
def test_query_relevant_documents_error(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
retriever.embedding_model.embed_query = lambda text: ( # type: ignore
|
||||
_ for _ in ()
|
||||
).throw(RuntimeError("embed fail"))
|
||||
with pytest.raises(RuntimeError):
|
||||
retriever.query_relevant_documents("q")
|
||||
|
||||
|
||||
def test_create_collection_when_client_exists(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
retriever.client = SimpleNamespace(closed=False)
|
||||
# remote vs lite path difference handled by _is_milvus_lite
|
||||
retriever.create_collection() # should no-op gracefully
|
||||
|
||||
|
||||
def test_load_examples_force_reload(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
retriever.client = SimpleNamespace()
|
||||
called = {"clear": 0, "load": 0}
|
||||
monkeypatch.setattr(
|
||||
retriever, "_clear_example_documents", lambda: called.__setitem__("clear", 1)
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
retriever, "_load_example_files", lambda: called.__setitem__("load", 1)
|
||||
)
|
||||
retriever.load_examples(force_reload=True)
|
||||
assert called == {"clear": 1, "load": 1}
|
||||
|
||||
|
||||
def test_clear_example_documents_remote(monkeypatch):
|
||||
monkeypatch.setenv("MILVUS_URI", "http://remote")
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
retriever.client = SimpleNamespace()
|
||||
# Should just log and not raise
|
||||
retriever._clear_example_documents()
|
||||
|
||||
|
||||
def test_clear_example_documents_lite(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
deleted = {}
|
||||
|
||||
class DummyMilvusLite:
|
||||
def query(self, **kwargs): # noqa: D401
|
||||
return [
|
||||
{retriever.id_field: "ex1"},
|
||||
{retriever.id_field: "ex2"},
|
||||
]
|
||||
|
||||
def delete(self, collection_name, ids): # noqa: D401
|
||||
deleted["ids"] = ids
|
||||
|
||||
retriever.client = DummyMilvusLite()
|
||||
retriever._clear_example_documents()
|
||||
assert deleted["ids"] == ["ex1", "ex2"]
|
||||
|
||||
|
||||
def test_get_loaded_examples_lite_and_error(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
|
||||
class DummyMilvusLite:
|
||||
def query(self, **kwargs): # noqa: D401
|
||||
return [
|
||||
{
|
||||
retriever.id_field: "id1",
|
||||
retriever.title_field: "T1",
|
||||
retriever.url_field: "u1",
|
||||
"file": "f1",
|
||||
}
|
||||
]
|
||||
|
||||
retriever.client = DummyMilvusLite()
|
||||
loaded = retriever.get_loaded_examples()
|
||||
assert loaded[0]["id"] == "id1"
|
||||
|
||||
# error path
|
||||
class BadClient:
|
||||
def query(self, **kwargs): # noqa: D401
|
||||
raise RuntimeError("fail")
|
||||
|
||||
retriever.client = BadClient()
|
||||
assert retriever.get_loaded_examples() == []
|
||||
|
||||
|
||||
def test_get_loaded_examples_remote(monkeypatch):
|
||||
monkeypatch.setenv("MILVUS_URI", "http://remote")
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
retriever.client = SimpleNamespace()
|
||||
assert retriever.get_loaded_examples() == []
|
||||
|
||||
|
||||
def test_close_lite_and_remote(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
closed = {"c": 0}
|
||||
|
||||
class DummyMilvusLite:
|
||||
def close(self): # noqa: D401
|
||||
closed["c"] += 1
|
||||
|
||||
def list_collections(self): # noqa: D401
|
||||
return []
|
||||
|
||||
def create_collection(self, **kwargs): # noqa: D401
|
||||
pass
|
||||
|
||||
retriever.client = DummyMilvusLite()
|
||||
retriever.close()
|
||||
assert closed["c"] == 1
|
||||
|
||||
# remote path: no close attr usage expected
|
||||
monkeypatch.setenv("MILVUS_URI", "http://remote")
|
||||
retriever2 = MilvusProvider()
|
||||
retriever2.client = SimpleNamespace()
|
||||
retriever2.close() # should not raise
|
||||
|
||||
|
||||
def test_get_embedding_invalid_output(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
retriever = MilvusProvider()
|
||||
# patch embedding model to return invalid output (empty list)
|
||||
retriever.embedding_model.embed_query = lambda text: [] # type: ignore
|
||||
with pytest.raises(RuntimeError):
|
||||
retriever._get_embedding("text")
|
||||
|
||||
|
||||
def test_dashscope_embeddings_empty_inputs_short_circuit(monkeypatch):
|
||||
# Use real class but swap _client to ensure create is never called
|
||||
emb = milvus_mod.DashscopeEmbeddings(model="m")
|
||||
|
||||
class FailingClient:
|
||||
class _Emb:
|
||||
def create(self, *a, **k):
|
||||
raise AssertionError("Should not be called for empty input")
|
||||
|
||||
embeddings = _Emb()
|
||||
|
||||
emb._client = FailingClient() # type: ignore
|
||||
assert emb.embed_documents([]) == []
|
||||
|
||||
|
||||
# Tests for _init_embedding_model provider selection logic
|
||||
def test_init_embedding_model_openai(monkeypatch):
|
||||
monkeypatch.setenv("MILVUS_EMBEDDING_PROVIDER", "openai")
|
||||
monkeypatch.setenv("MILVUS_EMBEDDING_MODEL", "text-embedding-ada-002")
|
||||
captured = {}
|
||||
|
||||
class CapturingOpenAI:
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
monkeypatch.setattr(milvus_mod, "OpenAIEmbeddings", CapturingOpenAI)
|
||||
prov = MilvusProvider()
|
||||
assert isinstance(prov.embedding_model, CapturingOpenAI)
|
||||
# kwargs forwarded
|
||||
assert captured["model"] == "text-embedding-ada-002"
|
||||
assert captured["encoding_format"] == "float"
|
||||
assert captured["dimensions"] == prov.embedding_dim
|
||||
|
||||
|
||||
def test_init_embedding_model_dashscope(monkeypatch):
|
||||
monkeypatch.setenv("MILVUS_EMBEDDING_PROVIDER", "dashscope")
|
||||
monkeypatch.setenv("MILVUS_EMBEDDING_MODEL", "text-embedding-ada-002")
|
||||
captured = {}
|
||||
|
||||
class CapturingDashscope:
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
monkeypatch.setattr(milvus_mod, "DashscopeEmbeddings", CapturingDashscope)
|
||||
prov = MilvusProvider()
|
||||
assert isinstance(prov.embedding_model, CapturingDashscope)
|
||||
assert captured["model"] == "text-embedding-ada-002"
|
||||
assert captured["encoding_format"] == "float"
|
||||
assert captured["dimensions"] == prov.embedding_dim
|
||||
|
||||
|
||||
def test_init_embedding_model_invalid_provider(monkeypatch):
|
||||
monkeypatch.setenv("MILVUS_EMBEDDING_PROVIDER", "not_a_provider")
|
||||
with pytest.raises(ValueError):
|
||||
MilvusProvider()
|
||||
|
||||
|
||||
def test_load_example_files_directory_missing(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
missing_dir = "examples_dir_does_not_exist_xyz"
|
||||
monkeypatch.setenv("MILVUS_EXAMPLES_DIR", missing_dir)
|
||||
retriever = MilvusProvider()
|
||||
retriever.examples_dir = missing_dir
|
||||
called = {"insert": 0}
|
||||
monkeypatch.setattr(
|
||||
retriever,
|
||||
"_insert_document_chunk",
|
||||
lambda **kwargs: (_ for _ in ()).throw(AssertionError("should not insert")),
|
||||
)
|
||||
retriever._load_example_files()
|
||||
assert called["insert"] == 0 # sanity (no insertion attempted)
|
||||
|
||||
|
||||
def test_load_example_files_loads_and_skips_existing(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
project_root = Path(milvus_mod.__file__).parent.parent.parent
|
||||
examples_dir_name = "examples_test_load_skip"
|
||||
examples_path = project_root / examples_dir_name
|
||||
examples_path.mkdir(exist_ok=True)
|
||||
|
||||
file1 = examples_path / "file1.md"
|
||||
file2 = examples_path / "file2.md"
|
||||
file1.write_text("# Title One\nContent A", encoding="utf-8")
|
||||
file2.write_text("# Title Two\nContent B", encoding="utf-8")
|
||||
|
||||
monkeypatch.setenv("MILVUS_EXAMPLES_DIR", examples_dir_name)
|
||||
retriever = MilvusProvider()
|
||||
retriever.examples_dir = examples_dir_name
|
||||
|
||||
# Compute doc ids using real method
|
||||
doc_id_file1 = retriever._generate_doc_id(file1)
|
||||
doc_id_file2 = retriever._generate_doc_id(file2)
|
||||
|
||||
# Existing docs contains file1 so it is skipped
|
||||
monkeypatch.setattr(retriever, "_get_existing_document_ids", lambda: {doc_id_file1})
|
||||
# Force two chunks for any file to test suffix logic
|
||||
monkeypatch.setattr(retriever, "_split_content", lambda content: ["part1", "part2"])
|
||||
|
||||
calls = []
|
||||
|
||||
def record_insert(doc_id, content, title, url, metadata):
|
||||
calls.append(
|
||||
{
|
||||
"doc_id": doc_id,
|
||||
"content": content,
|
||||
"title": title,
|
||||
"url": url,
|
||||
"metadata": metadata,
|
||||
}
|
||||
)
|
||||
|
||||
monkeypatch.setattr(retriever, "_insert_document_chunk", record_insert)
|
||||
|
||||
retriever._load_example_files()
|
||||
|
||||
# Only file2 processed -> two chunk inserts
|
||||
assert len(calls) == 2
|
||||
expected_ids = {f"{doc_id_file2}_chunk_0", f"{doc_id_file2}_chunk_1"}
|
||||
assert {c["doc_id"] for c in calls} == expected_ids
|
||||
assert all(c["metadata"]["file"] == "file2.md" for c in calls)
|
||||
assert all(c["metadata"]["source"] == "examples" for c in calls)
|
||||
assert all(c["title"] == "Title Two" for c in calls)
|
||||
|
||||
|
||||
def test_load_example_files_single_chunk_no_suffix(monkeypatch):
|
||||
_patch_init(monkeypatch)
|
||||
project_root = Path(milvus_mod.__file__).parent.parent.parent
|
||||
examples_dir_name = "examples_test_single_chunk"
|
||||
examples_path = project_root / examples_dir_name
|
||||
examples_path.mkdir(exist_ok=True)
|
||||
|
||||
file_single = examples_path / "single.md"
|
||||
file_single.write_text(
|
||||
"# Single Title\nOnly one small paragraph.", encoding="utf-8"
|
||||
)
|
||||
|
||||
monkeypatch.setenv("MILVUS_EXAMPLES_DIR", examples_dir_name)
|
||||
retriever = MilvusProvider()
|
||||
retriever.examples_dir = examples_dir_name
|
||||
|
||||
base_doc_id = retriever._generate_doc_id(file_single)
|
||||
|
||||
monkeypatch.setattr(retriever, "_get_existing_document_ids", lambda: set())
|
||||
monkeypatch.setattr(retriever, "_split_content", lambda content: ["onlychunk"])
|
||||
|
||||
captured = {}
|
||||
|
||||
def capture(doc_id, content, title, url, metadata):
|
||||
captured["doc_id"] = doc_id
|
||||
captured["title"] = title
|
||||
captured["metadata"] = metadata
|
||||
|
||||
monkeypatch.setattr(retriever, "_insert_document_chunk", capture)
|
||||
|
||||
retriever._load_example_files()
|
||||
|
||||
assert captured["doc_id"] == base_doc_id # no _chunk_ suffix
|
||||
assert captured["title"] == "Single Title"
|
||||
assert captured["metadata"]["file"] == "single.md"
|
||||
assert captured["metadata"]["source"] == "examples"
|
||||
154
uv.lock
generated
154
uv.lock
generated
@@ -1,5 +1,5 @@
|
||||
version = 1
|
||||
revision = 2
|
||||
revision = 3
|
||||
requires-python = ">=3.12"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.13'",
|
||||
@@ -397,6 +397,7 @@ dependencies = [
|
||||
{ name = "langchain-deepseek" },
|
||||
{ name = "langchain-experimental" },
|
||||
{ name = "langchain-mcp-adapters" },
|
||||
{ name = "langchain-milvus" },
|
||||
{ name = "langchain-openai" },
|
||||
{ name = "langchain-tavily" },
|
||||
{ name = "langgraph" },
|
||||
@@ -408,6 +409,7 @@ dependencies = [
|
||||
{ name = "numpy" },
|
||||
{ name = "pandas" },
|
||||
{ name = "psycopg", extra = ["binary"] },
|
||||
{ name = "pymilvus" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "readabilipy" },
|
||||
{ name = "socksio" },
|
||||
@@ -445,6 +447,7 @@ requires-dist = [
|
||||
{ name = "langchain-deepseek", specifier = ">=0.1.3" },
|
||||
{ name = "langchain-experimental", specifier = ">=0.3.4" },
|
||||
{ name = "langchain-mcp-adapters", specifier = ">=0.0.9" },
|
||||
{ name = "langchain-milvus", specifier = ">=0.2.1" },
|
||||
{ name = "langchain-openai", specifier = ">=0.3.8" },
|
||||
{ name = "langchain-tavily", specifier = "<0.3" },
|
||||
{ name = "langgraph", specifier = ">=0.3.5" },
|
||||
@@ -458,6 +461,7 @@ requires-dist = [
|
||||
{ name = "numpy", specifier = ">=2.2.3" },
|
||||
{ name = "pandas", specifier = ">=2.2.3" },
|
||||
{ name = "psycopg", extras = ["binary"], specifier = ">=3.2.9" },
|
||||
{ name = "pymilvus", specifier = ">=2.3.0" },
|
||||
{ name = "pytest", marker = "extra == 'test'", specifier = ">=7.4.0" },
|
||||
{ name = "pytest-asyncio", marker = "extra == 'test'", specifier = ">=1.0.0" },
|
||||
{ name = "pytest-cov", marker = "extra == 'test'", specifier = ">=4.1.0" },
|
||||
@@ -639,6 +643,34 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ac/38/08cc303ddddc4b3d7c628c3039a61a3aae36c241ed01393d00c2fd663473/greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6", size = 1142112, upload-time = "2024-09-20T17:09:28.753Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grpcio"
|
||||
version = "1.74.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/38/b4/35feb8f7cab7239c5b94bd2db71abb3d6adb5f335ad8f131abb6060840b6/grpcio-1.74.0.tar.gz", hash = "sha256:80d1f4fbb35b0742d3e3d3bb654b7381cd5f015f8497279a1e9c21ba623e01b1", size = 12756048, upload-time = "2025-07-24T18:54:23.039Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/4c/5d/e504d5d5c4469823504f65687d6c8fb97b7f7bf0b34873b7598f1df24630/grpcio-1.74.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:8533e6e9c5bd630ca98062e3a1326249e6ada07d05acf191a77bc33f8948f3d8", size = 5445551, upload-time = "2025-07-24T18:53:23.641Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/43/01/730e37056f96f2f6ce9f17999af1556df62ee8dab7fa48bceeaab5fd3008/grpcio-1.74.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:2918948864fec2a11721d91568effffbe0a02b23ecd57f281391d986847982f6", size = 10979810, upload-time = "2025-07-24T18:53:25.349Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/79/3d/09fd100473ea5c47083889ca47ffd356576173ec134312f6aa0e13111dee/grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:60d2d48b0580e70d2e1954d0d19fa3c2e60dd7cbed826aca104fff518310d1c5", size = 5941946, upload-time = "2025-07-24T18:53:27.387Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8a/99/12d2cca0a63c874c6d3d195629dcd85cdf5d6f98a30d8db44271f8a97b93/grpcio-1.74.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3601274bc0523f6dc07666c0e01682c94472402ac2fd1226fd96e079863bfa49", size = 6621763, upload-time = "2025-07-24T18:53:29.193Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9d/2c/930b0e7a2f1029bbc193443c7bc4dc2a46fedb0203c8793dcd97081f1520/grpcio-1.74.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:176d60a5168d7948539def20b2a3adcce67d72454d9ae05969a2e73f3a0feee7", size = 6180664, upload-time = "2025-07-24T18:53:30.823Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/d5/ff8a2442180ad0867717e670f5ec42bfd8d38b92158ad6bcd864e6d4b1ed/grpcio-1.74.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e759f9e8bc908aaae0412642afe5416c9f983a80499448fcc7fab8692ae044c3", size = 6301083, upload-time = "2025-07-24T18:53:32.454Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b0/ba/b361d390451a37ca118e4ec7dccec690422e05bc85fba2ec72b06cefec9f/grpcio-1.74.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9e7c4389771855a92934b2846bd807fc25a3dfa820fd912fe6bd8136026b2707", size = 6994132, upload-time = "2025-07-24T18:53:34.506Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/0c/3a5fa47d2437a44ced74141795ac0251bbddeae74bf81df3447edd767d27/grpcio-1.74.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cce634b10aeab37010449124814b05a62fb5f18928ca878f1bf4750d1f0c815b", size = 6489616, upload-time = "2025-07-24T18:53:36.217Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ae/95/ab64703b436d99dc5217228babc76047d60e9ad14df129e307b5fec81fd0/grpcio-1.74.0-cp312-cp312-win32.whl", hash = "sha256:885912559974df35d92219e2dc98f51a16a48395f37b92865ad45186f294096c", size = 3807083, upload-time = "2025-07-24T18:53:37.911Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/59/900aa2445891fc47a33f7d2f76e00ca5d6ae6584b20d19af9c06fa09bf9a/grpcio-1.74.0-cp312-cp312-win_amd64.whl", hash = "sha256:42f8fee287427b94be63d916c90399ed310ed10aadbf9e2e5538b3e497d269bc", size = 4490123, upload-time = "2025-07-24T18:53:39.528Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d4/d8/1004a5f468715221450e66b051c839c2ce9a985aa3ee427422061fcbb6aa/grpcio-1.74.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:2bc2d7d8d184e2362b53905cb1708c84cb16354771c04b490485fa07ce3a1d89", size = 5449488, upload-time = "2025-07-24T18:53:41.174Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/94/0e/33731a03f63740d7743dced423846c831d8e6da808fcd02821a4416df7fa/grpcio-1.74.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:c14e803037e572c177ba54a3e090d6eb12efd795d49327c5ee2b3bddb836bf01", size = 10974059, upload-time = "2025-07-24T18:53:43.066Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0d/c6/3d2c14d87771a421205bdca991467cfe473ee4c6a1231c1ede5248c62ab8/grpcio-1.74.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f6ec94f0e50eb8fa1744a731088b966427575e40c2944a980049798b127a687e", size = 5945647, upload-time = "2025-07-24T18:53:45.269Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/83/5a354c8aaff58594eef7fffebae41a0f8995a6258bbc6809b800c33d4c13/grpcio-1.74.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:566b9395b90cc3d0d0c6404bc8572c7c18786ede549cdb540ae27b58afe0fb91", size = 6626101, upload-time = "2025-07-24T18:53:47.015Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/ca/4fdc7bf59bf6994aa45cbd4ef1055cd65e2884de6113dbd49f75498ddb08/grpcio-1.74.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1ea6176d7dfd5b941ea01c2ec34de9531ba494d541fe2057c904e601879f249", size = 6182562, upload-time = "2025-07-24T18:53:48.967Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fd/48/2869e5b2c1922583686f7ae674937986807c2f676d08be70d0a541316270/grpcio-1.74.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:64229c1e9cea079420527fa8ac45d80fc1e8d3f94deaa35643c381fa8d98f362", size = 6303425, upload-time = "2025-07-24T18:53:50.847Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/0e/bac93147b9a164f759497bc6913e74af1cb632c733c7af62c0336782bd38/grpcio-1.74.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:0f87bddd6e27fc776aacf7ebfec367b6d49cad0455123951e4488ea99d9b9b8f", size = 6996533, upload-time = "2025-07-24T18:53:52.747Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/35/9f6b2503c1fd86d068b46818bbd7329db26a87cdd8c01e0d1a9abea1104c/grpcio-1.74.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3b03d8f2a07f0fea8c8f74deb59f8352b770e3900d143b3d1475effcb08eec20", size = 6491489, upload-time = "2025-07-24T18:53:55.06Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/75/33/a04e99be2a82c4cbc4039eb3a76f6c3632932b9d5d295221389d10ac9ca7/grpcio-1.74.0-cp313-cp313-win32.whl", hash = "sha256:b6a73b2ba83e663b2480a90b82fdae6a7aa6427f62bf43b29912c0cfd1aa2bfa", size = 3805811, upload-time = "2025-07-24T18:53:56.798Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/80/de3eb55eb581815342d097214bed4c59e806b05f1b3110df03b2280d6dfd/grpcio-1.74.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd3c71aeee838299c5887230b8a1822795325ddfea635edd82954c1eaa831e24", size = 4489214, upload-time = "2025-07-24T18:53:59.771Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h11"
|
||||
version = "0.14.0"
|
||||
@@ -984,6 +1016,19 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a4/eb/9e98822d3db22beff44449a8f61fca208d4f59d592a7ce67ce4c400b8f8f/langchain_mcp_adapters-0.1.9-py3-none-any.whl", hash = "sha256:fd131009c60c9e5a864f96576bbe757fc1809abd604891cb2e5d6e8aebd6975c", size = 15300, upload-time = "2025-07-09T15:56:13.316Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "langchain-milvus"
|
||||
version = "0.2.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "langchain-core" },
|
||||
{ name = "pymilvus" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/3e/dd/5e8b7f6f17da0e54205956feab3f7856cb7dc821dbe817f2990aa028e4cc/langchain_milvus-0.2.1.tar.gz", hash = "sha256:6e60e43959464ae2be9dadceb4fab6b3ddcec5bb1f2d29e898924f1c2651baf1", size = 32639, upload-time = "2025-06-28T09:59:53.826Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/60/b1/54e176cc8ac80df9a2c4ee9f726d6383fcf9818317c68532cfc90fa91b6c/langchain_milvus-0.2.1-py3-none-any.whl", hash = "sha256:faabf4685c15ef9651605172427073d6ffc52c0f36f3b88842977db883062c99", size = 36110, upload-time = "2025-06-28T09:59:52.965Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "langchain-mongodb"
|
||||
version = "0.6.2"
|
||||
@@ -1370,6 +1415,20 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/cf/3fd38cfe43962452e4bfadc6966b2ea0afaf8e0286cb3991c247c8c33ebd/mcp-1.12.2-py3-none-any.whl", hash = "sha256:b86d584bb60193a42bd78aef01882c5c42d614e416cbf0480149839377ab5a5f", size = 158473, upload-time = "2025-07-24T18:29:03.419Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "milvus-lite"
|
||||
version = "2.5.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "tqdm" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a9/b2/acc5024c8e8b6a0b034670b8e8af306ebd633ede777dcbf557eac4785937/milvus_lite-2.5.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:6b014453200ba977be37ba660cb2d021030375fa6a35bc53c2e1d92980a0c512", size = 27934713, upload-time = "2025-06-30T04:23:37.028Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/2e/746f5bb1d6facd1e73eb4af6dd5efda11125b0f29d7908a097485ca6cad9/milvus_lite-2.5.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a2e031088bf308afe5f8567850412d618cfb05a65238ed1a6117f60decccc95a", size = 24421451, upload-time = "2025-06-30T04:23:51.747Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2e/cf/3d1fee5c16c7661cf53977067a34820f7269ed8ba99fe9cf35efc1700866/milvus_lite-2.5.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:a13277e9bacc6933dea172e42231f7e6135bd3bdb073dd2688ee180418abd8d9", size = 45337093, upload-time = "2025-06-30T04:24:06.706Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d3/82/41d9b80f09b82e066894d9b508af07b7b0fa325ce0322980674de49106a0/milvus_lite-2.5.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:25ce13f4b8d46876dd2b7ac8563d7d8306da7ff3999bb0d14b116b30f71d706c", size = 55263911, upload-time = "2025-06-30T04:24:19.434Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mirakuru"
|
||||
version = "2.6.1"
|
||||
@@ -1750,6 +1809,20 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b5/35/6c4c6fc8774a9e3629cd750dc24a7a4fb090a25ccd5c3246d127b70f9e22/propcache-0.3.0-py3-none-any.whl", hash = "sha256:67dda3c7325691c2081510e92c561f465ba61b975f481735aefdfc845d2cd043", size = 12101, upload-time = "2025-02-20T19:03:27.202Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "protobuf"
|
||||
version = "6.32.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fa/a4/cc17347aa2897568beece2e674674359f911d6fe21b0b8d6268cd42727ac/protobuf-6.32.1.tar.gz", hash = "sha256:ee2469e4a021474ab9baafea6cd070e5bf27c7d29433504ddea1a4ee5850f68d", size = 440635, upload-time = "2025-09-11T21:38:42.935Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c0/98/645183ea03ab3995d29086b8bf4f7562ebd3d10c9a4b14ee3f20d47cfe50/protobuf-6.32.1-cp310-abi3-win32.whl", hash = "sha256:a8a32a84bc9f2aad712041b8b366190f71dde248926da517bde9e832e4412085", size = 424411, upload-time = "2025-09-11T21:38:27.427Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8c/f3/6f58f841f6ebafe076cebeae33fc336e900619d34b1c93e4b5c97a81fdfa/protobuf-6.32.1-cp310-abi3-win_amd64.whl", hash = "sha256:b00a7d8c25fa471f16bc8153d0e53d6c9e827f0953f3c09aaa4331c718cae5e1", size = 435738, upload-time = "2025-09-11T21:38:30.959Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/56/a8a3f4e7190837139e68c7002ec749190a163af3e330f65d90309145a210/protobuf-6.32.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8c7e6eb619ffdf105ee4ab76af5a68b60a9d0f66da3ea12d1640e6d8dab7281", size = 426454, upload-time = "2025-09-11T21:38:34.076Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/be/8dd0a927c559b37d7a6c8ab79034fd167dcc1f851595f2e641ad62be8643/protobuf-6.32.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:2f5b80a49e1eb7b86d85fcd23fe92df154b9730a725c3b38c4e43b9d77018bf4", size = 322874, upload-time = "2025-09-11T21:38:35.509Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5c/f6/88d77011b605ef979aace37b7703e4eefad066f7e84d935e5a696515c2dd/protobuf-6.32.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:b1864818300c297265c83a4982fd3169f97122c299f56a56e2445c3698d34710", size = 322013, upload-time = "2025-09-11T21:38:37.017Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/97/b7/15cc7d93443d6c6a84626ae3258a91f4c6ac8c0edd5df35ea7658f71b79c/protobuf-6.32.1-py3-none-any.whl", hash = "sha256:2601b779fc7d32a866c6b4404f9d42a3f67c5b9f3f15b4db3cccabe06b95c346", size = 169289, upload-time = "2025-09-11T21:38:41.234Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psutil"
|
||||
version = "7.0.0"
|
||||
@@ -1908,6 +1981,24 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pymilvus"
|
||||
version = "2.6.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "grpcio" },
|
||||
{ name = "milvus-lite", marker = "sys_platform != 'win32'" },
|
||||
{ name = "pandas" },
|
||||
{ name = "protobuf" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "setuptools" },
|
||||
{ name = "ujson" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/70/a9/b25af985972082d1bb0b26739fece8cea3f56370733b4b1de690c42a77cc/pymilvus-2.6.1.tar.gz", hash = "sha256:ef1d7f5039719398d131ca80c19e55bc2bccc7ab6609f2cca9a04217dcb0a7fb", size = 1322169, upload-time = "2025-08-29T10:03:50.523Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d4/1a/8b677e0f4ef683bbfb00d495960573fff0844ed509b3cf0abede79a48e90/pymilvus-2.6.1-py3-none-any.whl", hash = "sha256:e3d76d45ce04d3555a6849645a18a1e2992706e248d5b6dc58a00504d0b60165", size = 254252, upload-time = "2025-08-29T10:03:48.539Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pymongo"
|
||||
version = "4.12.1"
|
||||
@@ -2258,6 +2349,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/49/65/dea992c6a97074f6d8ff9eab34741298cac2ce23e2b6c74fb7d08afdf85c/sentinels-1.1.1-py3-none-any.whl", hash = "sha256:835d3b28f3b47f5284afa4bf2db6e00f2dc5f80f9923d4b7e7aeeeccf6146a11", size = 3744, upload-time = "2025-08-12T07:57:48.858Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "setuptools"
|
||||
version = "80.9.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sgmllib3k"
|
||||
version = "1.0.0"
|
||||
@@ -2474,6 +2574,58 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/0f/dd/84f10e23edd882c6f968c21c2434fe67bd4a528967067515feca9e611e5e/tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639", size = 346762, upload-time = "2025-01-21T19:49:37.187Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ujson"
|
||||
version = "5.11.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/43/d9/3f17e3c5773fb4941c68d9a37a47b1a79c9649d6c56aefbed87cc409d18a/ujson-5.11.0.tar.gz", hash = "sha256:e204ae6f909f099ba6b6b942131cee359ddda2b6e4ea39c12eb8b991fe2010e0", size = 7156583, upload-time = "2025-08-20T11:57:02.452Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b9/ef/a9cb1fce38f699123ff012161599fb9f2ff3f8d482b4b18c43a2dc35073f/ujson-5.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7895f0d2d53bd6aea11743bd56e3cb82d729980636cd0ed9b89418bf66591702", size = 55434, upload-time = "2025-08-20T11:55:34.987Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b1/05/dba51a00eb30bd947791b173766cbed3492269c150a7771d2750000c965f/ujson-5.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12b5e7e22a1fe01058000d1b317d3b65cc3daf61bd2ea7a2b76721fe160fa74d", size = 53190, upload-time = "2025-08-20T11:55:36.384Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/03/3c/fd11a224f73fbffa299fb9644e425f38b38b30231f7923a088dd513aabb4/ujson-5.11.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0180a480a7d099082501cad1fe85252e4d4bf926b40960fb3d9e87a3a6fbbc80", size = 57600, upload-time = "2025-08-20T11:55:37.692Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/55/b9/405103cae24899df688a3431c776e00528bd4799e7d68820e7ebcf824f92/ujson-5.11.0-cp312-cp312-manylinux_2_24_i686.manylinux_2_28_i686.whl", hash = "sha256:fa79fdb47701942c2132a9dd2297a1a85941d966d8c87bfd9e29b0cf423f26cc", size = 59791, upload-time = "2025-08-20T11:55:38.877Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/17/7b/2dcbc2bbfdbf68f2368fb21ab0f6735e872290bb604c75f6e06b81edcb3f/ujson-5.11.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8254e858437c00f17cb72e7a644fc42dad0ebb21ea981b71df6e84b1072aaa7c", size = 57356, upload-time = "2025-08-20T11:55:40.036Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/71/fea2ca18986a366c750767b694430d5ded6b20b6985fddca72f74af38a4c/ujson-5.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1aa8a2ab482f09f6c10fba37112af5f957689a79ea598399c85009f2f29898b5", size = 1036313, upload-time = "2025-08-20T11:55:41.408Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a3/bb/d4220bd7532eac6288d8115db51710fa2d7d271250797b0bfba9f1e755af/ujson-5.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a638425d3c6eed0318df663df44480f4a40dc87cc7c6da44d221418312f6413b", size = 1195782, upload-time = "2025-08-20T11:55:43.357Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/80/47/226e540aa38878ce1194454385701d82df538ccb5ff8db2cf1641dde849a/ujson-5.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7e3cff632c1d78023b15f7e3a81c3745cd3f94c044d1e8fa8efbd6b161997bbc", size = 1088817, upload-time = "2025-08-20T11:55:45.262Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7e/81/546042f0b23c9040d61d46ea5ca76f0cc5e0d399180ddfb2ae976ebff5b5/ujson-5.11.0-cp312-cp312-win32.whl", hash = "sha256:be6b0eaf92cae8cdee4d4c9e074bde43ef1c590ed5ba037ea26c9632fb479c88", size = 39757, upload-time = "2025-08-20T11:55:46.522Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/44/1b/27c05dc8c9728f44875d74b5bfa948ce91f6c33349232619279f35c6e817/ujson-5.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:b7b136cc6abc7619124fd897ef75f8e63105298b5ca9bdf43ebd0e1fa0ee105f", size = 43859, upload-time = "2025-08-20T11:55:47.987Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/22/2d/37b6557c97c3409c202c838aa9c960ca3896843b4295c4b7bb2bbd260664/ujson-5.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:6cd2df62f24c506a0ba322d5e4fe4466d47a9467b57e881ee15a31f7ecf68ff6", size = 38361, upload-time = "2025-08-20T11:55:49.122Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/ec/2de9dd371d52c377abc05d2b725645326c4562fc87296a8907c7bcdf2db7/ujson-5.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:109f59885041b14ee9569bf0bb3f98579c3fa0652317b355669939e5fc5ede53", size = 55435, upload-time = "2025-08-20T11:55:50.243Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5b/a4/f611f816eac3a581d8a4372f6967c3ed41eddbae4008d1d77f223f1a4e0a/ujson-5.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a31c6b8004438e8c20fc55ac1c0e07dad42941db24176fe9acf2815971f8e752", size = 53193, upload-time = "2025-08-20T11:55:51.373Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/c5/c161940967184de96f5cbbbcce45b562a4bf851d60f4c677704b1770136d/ujson-5.11.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78c684fb21255b9b90320ba7e199780f653e03f6c2528663768965f4126a5b50", size = 57603, upload-time = "2025-08-20T11:55:52.583Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2b/d6/c7b2444238f5b2e2d0e3dab300b9ddc3606e4b1f0e4bed5a48157cebc792/ujson-5.11.0-cp313-cp313-manylinux_2_24_i686.manylinux_2_28_i686.whl", hash = "sha256:4c9f5d6a27d035dd90a146f7761c2272cf7103de5127c9ab9c4cd39ea61e878a", size = 59794, upload-time = "2025-08-20T11:55:53.69Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fe/a3/292551f936d3d02d9af148f53e1bc04306b00a7cf1fcbb86fa0d1c887242/ujson-5.11.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:837da4d27fed5fdc1b630bd18f519744b23a0b5ada1bbde1a36ba463f2900c03", size = 57363, upload-time = "2025-08-20T11:55:54.843Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/90/a6/82cfa70448831b1a9e73f882225980b5c689bf539ec6400b31656a60ea46/ujson-5.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:787aff4a84da301b7f3bac09bc696e2e5670df829c6f8ecf39916b4e7e24e701", size = 1036311, upload-time = "2025-08-20T11:55:56.197Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/5c/96e2266be50f21e9b27acaee8ca8f23ea0b85cb998c33d4f53147687839b/ujson-5.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6dd703c3e86dc6f7044c5ac0b3ae079ed96bf297974598116aa5fb7f655c3a60", size = 1195783, upload-time = "2025-08-20T11:55:58.081Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8d/20/78abe3d808cf3bb3e76f71fca46cd208317bf461c905d79f0d26b9df20f1/ujson-5.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3772e4fe6b0c1e025ba3c50841a0ca4786825a4894c8411bf8d3afe3a8061328", size = 1088822, upload-time = "2025-08-20T11:55:59.469Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d8/50/8856e24bec5e2fc7f775d867aeb7a3f137359356200ac44658f1f2c834b2/ujson-5.11.0-cp313-cp313-win32.whl", hash = "sha256:8fa2af7c1459204b7a42e98263b069bd535ea0cd978b4d6982f35af5a04a4241", size = 39753, upload-time = "2025-08-20T11:56:01.345Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5b/d8/1baee0f4179a4d0f5ce086832147b6cc9b7731c24ca08e14a3fdb8d39c32/ujson-5.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:34032aeca4510a7c7102bd5933f59a37f63891f30a0706fb46487ab6f0edf8f0", size = 43866, upload-time = "2025-08-20T11:56:02.552Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a9/8c/6d85ef5be82c6d66adced3ec5ef23353ed710a11f70b0b6a836878396334/ujson-5.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:ce076f2df2e1aa62b685086fbad67f2b1d3048369664b4cdccc50707325401f9", size = 38363, upload-time = "2025-08-20T11:56:03.688Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/28/08/4518146f4984d112764b1dfa6fb7bad691c44a401adadaa5e23ccd930053/ujson-5.11.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:65724738c73645db88f70ba1f2e6fb678f913281804d5da2fd02c8c5839af302", size = 55462, upload-time = "2025-08-20T11:56:04.873Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/29/37/2107b9a62168867a692654d8766b81bd2fd1e1ba13e2ec90555861e02b0c/ujson-5.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29113c003ca33ab71b1b480bde952fbab2a0b6b03a4ee4c3d71687cdcbd1a29d", size = 53246, upload-time = "2025-08-20T11:56:06.054Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9b/f8/25583c70f83788edbe3ca62ce6c1b79eff465d78dec5eb2b2b56b3e98b33/ujson-5.11.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c44c703842024d796b4c78542a6fcd5c3cb948b9fc2a73ee65b9c86a22ee3638", size = 57631, upload-time = "2025-08-20T11:56:07.374Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/ca/19b3a632933a09d696f10dc1b0dfa1d692e65ad507d12340116ce4f67967/ujson-5.11.0-cp314-cp314-manylinux_2_24_i686.manylinux_2_28_i686.whl", hash = "sha256:e750c436fb90edf85585f5c62a35b35082502383840962c6983403d1bd96a02c", size = 59877, upload-time = "2025-08-20T11:56:08.534Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/55/7a/4572af5324ad4b2bfdd2321e898a527050290147b4ea337a79a0e4e87ec7/ujson-5.11.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f278b31a7c52eb0947b2db55a5133fbc46b6f0ef49972cd1a80843b72e135aba", size = 57363, upload-time = "2025-08-20T11:56:09.758Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7b/71/a2b8c19cf4e1efe53cf439cdf7198ac60ae15471d2f1040b490c1f0f831f/ujson-5.11.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ab2cb8351d976e788669c8281465d44d4e94413718af497b4e7342d7b2f78018", size = 1036394, upload-time = "2025-08-20T11:56:11.168Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7a/3e/7b98668cba3bb3735929c31b999b374ebc02c19dfa98dfebaeeb5c8597ca/ujson-5.11.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:090b4d11b380ae25453100b722d0609d5051ffe98f80ec52853ccf8249dfd840", size = 1195837, upload-time = "2025-08-20T11:56:12.6Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/ea/8870f208c20b43571a5c409ebb2fe9b9dba5f494e9e60f9314ac01ea8f78/ujson-5.11.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:80017e870d882d5517d28995b62e4e518a894f932f1e242cbc802a2fd64d365c", size = 1088837, upload-time = "2025-08-20T11:56:14.15Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/63/b6/c0e6607e37fa47929920a685a968c6b990a802dec65e9c5181e97845985d/ujson-5.11.0-cp314-cp314-win32.whl", hash = "sha256:1d663b96eb34c93392e9caae19c099ec4133ba21654b081956613327f0e973ac", size = 41022, upload-time = "2025-08-20T11:56:15.509Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4e/56/f4fe86b4c9000affd63e9219e59b222dc48b01c534533093e798bf617a7e/ujson-5.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:849e65b696f0d242833f1df4182096cedc50d414215d1371fca85c541fbff629", size = 45111, upload-time = "2025-08-20T11:56:16.597Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/0a/f3/669437f0280308db4783b12a6d88c00730b394327d8334cc7a32ef218e64/ujson-5.11.0-cp314-cp314-win_arm64.whl", hash = "sha256:e73df8648c9470af2b6a6bf5250d4744ad2cf3d774dcf8c6e31f018bdd04d764", size = 39682, upload-time = "2025-08-20T11:56:17.763Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6e/cd/e9809b064a89fe5c4184649adeb13c1b98652db3f8518980b04227358574/ujson-5.11.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:de6e88f62796372fba1de973c11138f197d3e0e1d80bcb2b8aae1e826096d433", size = 55759, upload-time = "2025-08-20T11:56:18.882Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1b/be/ae26a6321179ebbb3a2e2685b9007c71bcda41ad7a77bbbe164005e956fc/ujson-5.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:49e56ef8066f11b80d620985ae36869a3ff7e4b74c3b6129182ec5d1df0255f3", size = 53634, upload-time = "2025-08-20T11:56:20.012Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ae/e9/fb4a220ee6939db099f4cfeeae796ecb91e7584ad4d445d4ca7f994a9135/ujson-5.11.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1a325fd2c3a056cf6c8e023f74a0c478dd282a93141356ae7f16d5309f5ff823", size = 58547, upload-time = "2025-08-20T11:56:21.175Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bd/f8/fc4b952b8f5fea09ea3397a0bd0ad019e474b204cabcb947cead5d4d1ffc/ujson-5.11.0-cp314-cp314t-manylinux_2_24_i686.manylinux_2_28_i686.whl", hash = "sha256:a0af6574fc1d9d53f4ff371f58c96673e6d988ed2b5bf666a6143c782fa007e9", size = 60489, upload-time = "2025-08-20T11:56:22.342Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2e/e5/af5491dfda4f8b77e24cf3da68ee0d1552f99a13e5c622f4cef1380925c3/ujson-5.11.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10f29e71ecf4ecd93a6610bd8efa8e7b6467454a363c3d6416db65de883eb076", size = 58035, upload-time = "2025-08-20T11:56:23.92Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/09/0945349dd41f25cc8c38d78ace49f14c5052c5bbb7257d2f466fa7bdb533/ujson-5.11.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1a0a9b76a89827a592656fe12e000cf4f12da9692f51a841a4a07aa4c7ecc41c", size = 1037212, upload-time = "2025-08-20T11:56:25.274Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/49/44/8e04496acb3d5a1cbee3a54828d9652f67a37523efa3d3b18a347339680a/ujson-5.11.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:b16930f6a0753cdc7d637b33b4e8f10d5e351e1fb83872ba6375f1e87be39746", size = 1196500, upload-time = "2025-08-20T11:56:27.517Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/64/ae/4bc825860d679a0f208a19af2f39206dfd804ace2403330fdc3170334a2f/ujson-5.11.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:04c41afc195fd477a59db3a84d5b83a871bd648ef371cf8c6f43072d89144eef", size = 1089487, upload-time = "2025-08-20T11:56:29.07Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/30/ed/5a057199fb0a5deabe0957073a1c1c1c02a3e99476cd03daee98ea21fa57/ujson-5.11.0-cp314-cp314t-win32.whl", hash = "sha256:aa6d7a5e09217ff93234e050e3e380da62b084e26b9f2e277d2606406a2fc2e5", size = 41859, upload-time = "2025-08-20T11:56:30.495Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/aa/03/b19c6176bdf1dc13ed84b886e99677a52764861b6cc023d5e7b6ebda249d/ujson-5.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:48055e1061c1bb1f79e75b4ac39e821f3f35a9b82de17fce92c3140149009bec", size = 46183, upload-time = "2025-08-20T11:56:31.574Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/5d/ca/a0413a3874b2dc1708b8796ca895bf363292f9c70b2e8ca482b7dbc0259d/ujson-5.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:1194b943e951092db611011cb8dbdb6cf94a3b816ed07906e14d3bc6ce0e90ab", size = 40264, upload-time = "2025-08-20T11:56:32.773Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "2.3.0"
|
||||
|
||||
@@ -5,7 +5,7 @@ import { MagicWandIcon } from "@radix-ui/react-icons";
|
||||
import { AnimatePresence, motion } from "framer-motion";
|
||||
import { ArrowUp, Lightbulb, X } from "lucide-react";
|
||||
import { useTranslations } from "next-intl";
|
||||
import { useCallback, useMemo, useRef, useState } from "react";
|
||||
import { useCallback, useRef, useState } from "react";
|
||||
|
||||
import { Detective } from "~/components/deer-flow/icons/detective";
|
||||
import MessageInput, {
|
||||
|
||||
@@ -91,6 +91,9 @@ function ActivityListItem({ messageId }: { messageId: string }) {
|
||||
if (message) {
|
||||
if (!message.isStreaming && message.toolCalls?.length) {
|
||||
for (const toolCall of message.toolCalls) {
|
||||
if (toolCall.result?.startsWith("Error")) {
|
||||
return null;
|
||||
}
|
||||
if (toolCall.name === "web_search") {
|
||||
return <WebSearchToolCall key={toolCall.id} toolCall={toolCall} />;
|
||||
} else if (toolCall.name === "crawl_tool") {
|
||||
@@ -111,16 +114,16 @@ function ActivityListItem({ messageId }: { messageId: string }) {
|
||||
const __pageCache = new LRUCache<string, string>({ max: 100 });
|
||||
type SearchResult =
|
||||
| {
|
||||
type: "page";
|
||||
title: string;
|
||||
url: string;
|
||||
content: string;
|
||||
}
|
||||
type: "page";
|
||||
title: string;
|
||||
url: string;
|
||||
content: string;
|
||||
}
|
||||
| {
|
||||
type: "image";
|
||||
image_url: string;
|
||||
image_description: string;
|
||||
};
|
||||
type: "image";
|
||||
image_url: string;
|
||||
image_description: string;
|
||||
};
|
||||
|
||||
function WebSearchToolCall({ toolCall }: { toolCall: ToolCallRuntime }) {
|
||||
const t = useTranslations("chat.research");
|
||||
@@ -317,7 +320,7 @@ function RetrieverToolCall({ toolCall }: { toolCall: ToolCallRuntime }) {
|
||||
/>
|
||||
</li>
|
||||
))}
|
||||
{documents.map((doc, i) => (
|
||||
{documents?.map((doc, i) => (
|
||||
<motion.li
|
||||
key={`search-result-${i}`}
|
||||
className="text-muted-foreground bg-accent flex max-w-40 gap-2 rounded-md px-2 py-1 text-sm"
|
||||
@@ -330,7 +333,7 @@ function RetrieverToolCall({ toolCall }: { toolCall: ToolCallRuntime }) {
|
||||
}}
|
||||
>
|
||||
<FileText size={32} />
|
||||
{doc.title}
|
||||
{doc.title} (chunk-{i},size-{doc.content.length})
|
||||
</motion.li>
|
||||
))}
|
||||
</ul>
|
||||
|
||||
@@ -12,7 +12,7 @@ import type { Tab } from "./types";
|
||||
|
||||
export const AboutTab: Tab = () => {
|
||||
const locale = useLocale();
|
||||
const t = useTranslations("settings.about");
|
||||
//const t = useTranslations("settings.about");
|
||||
|
||||
const aboutContent = locale === "zh" ? aboutZh : aboutEn;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user