diff --git a/.env.example b/.env.example index 41468ab..7bb51a6 100644 --- a/.env.example +++ b/.env.example @@ -41,6 +41,29 @@ TAVILY_API_KEY=tvly-xxx # RAGFLOW_RETRIEVAL_SIZE=10 # RAGFLOW_CROSS_LANGUAGES=English,Chinese,Spanish,French,German,Japanese,Korean # Optional. To use RAGFlow's cross-language search, please separate each language with a single comma + +# RAG_PROVIDER: milvus (using free milvus instance on zilliz cloud: https://docs.zilliz.com/docs/quick-start ) +# RAG_PROVIDER=milvus +# MILVUS_URI= +# MILVUS_USER= +# MILVUS_PASSWORD= +# MILVUS_COLLECTION=documents +# MILVUS_EMBEDDING_PROVIDER=openai # support openai,dashscope +# MILVUS_EMBEDDING_BASE_URL= +# MILVUS_EMBEDDING_MODEL= +# MILVUS_EMBEDDING_API_KEY= +# MILVUS_AUTO_LOAD_EXAMPLES=true + +# RAG_PROVIDER: milvus (using milvus lite on Mac or Linux) +# RAG_PROVIDER=milvus +# MILVUS_URI=./milvus_demo.db +# MILVUS_COLLECTION=documents +# MILVUS_EMBEDDING_PROVIDER=openai # support openai,dashscope +# MILVUS_EMBEDDING_BASE_URL= +# MILVUS_EMBEDDING_MODEL= +# MILVUS_EMBEDDING_API_KEY= +# MILVUS_AUTO_LOAD_EXAMPLES=true + # Optional, volcengine TTS for generating podcast VOLCENGINE_TTS_APPID=xxx VOLCENGINE_TTS_ACCESS_TOKEN=xxx diff --git a/docs/configuration_guide.md b/docs/configuration_guide.md index 65672bc..8233cb5 100644 --- a/docs/configuration_guide.md +++ b/docs/configuration_guide.md @@ -179,4 +179,40 @@ SEARCH_ENGINE: exclude_domains: - unreliable-site.com - spam-domain.net +``` +## RAG (Retrieval-Augmented Generation) Configuration + +DeerFlow supports multiple RAG providers for document retrieval. Configure the RAG provider by setting environment variables. + +### Supported RAG Providers + +- **RAGFlow**: Document retrieval using RAGFlow API +- **VikingDB Knowledge Base**: ByteDance's VikingDB knowledge base service +- **Milvus**: Open-source vector database for similarity search + +### Milvus Configuration + +To use Milvus as your RAG provider, set the following environment variables: + +```bash +# RAG_PROVIDER: milvus (using free milvus instance on zilliz cloud: https://docs.zilliz.com/docs/quick-start ) +RAG_PROVIDER=milvus +MILVUS_URI= +MILVUS_USER= +MILVUS_PASSWORD= +MILVUS_COLLECTION=documents +MILVUS_EMBEDDING_PROVIDER=openai +MILVUS_EMBEDDING_BASE_URL= +MILVUS_EMBEDDING_MODEL= +MILVUS_EMBEDDING_API_KEY= + +# RAG_PROVIDER: milvus (using milvus lite on Mac or Linux) +RAG_PROVIDER=milvus +MILVUS_URI=./milvus_demo.db +MILVUS_COLLECTION=documents +MILVUS_EMBEDDING_PROVIDER=openai +MILVUS_EMBEDDING_BASE_URL= +MILVUS_EMBEDDING_MODEL= +MILVUS_EMBEDDING_API_KEY= +``` diff --git a/pyproject.toml b/pyproject.toml index a64b41d..708ada1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,8 @@ dependencies = [ "langchain-tavily<0.3", "langgraph-checkpoint-mongodb>=0.1.4", "langgraph-checkpoint-postgres==2.0.21", + "pymilvus>=2.3.0", + "langchain-milvus>=0.2.1", "psycopg[binary]>=3.2.9", ] diff --git a/src/config/configuration.py b/src/config/configuration.py index e9ebd96..5c570f4 100644 --- a/src/config/configuration.py +++ b/src/config/configuration.py @@ -10,36 +10,10 @@ from langchain_core.runnables import RunnableConfig from src.config.report_style import ReportStyle from src.rag.retriever import Resource +from src.config.loader import get_str_env, get_int_env, get_bool_env logger = logging.getLogger(__name__) -_TRUTHY = {"1", "true", "yes", "y", "on"} - - -def get_bool_env(name: str, default: bool = False) -> bool: - val = os.getenv(name) - if val is None: - return default - return str(val).strip().lower() in _TRUTHY - - -def get_str_env(name: str, default: str = "") -> str: - val = os.getenv(name) - return default if val is None else str(val).strip() - - -def get_int_env(name: str, default: int = 0) -> int: - val = os.getenv(name) - if val is None: - return default - try: - return int(val.strip()) - except ValueError: - logger.warning( - f"Invalid integer value for {name}: {val}. Using default {default}." - ) - return default - def get_recursion_limit(default: int = 25) -> int: """Get the recursion limit from environment variable or use default. diff --git a/src/config/loader.py b/src/config/loader.py index d9c5e0b..4126e45 100644 --- a/src/config/loader.py +++ b/src/config/loader.py @@ -7,6 +7,29 @@ from typing import Any, Dict import yaml +def get_bool_env(name: str, default: bool = False) -> bool: + val = os.getenv(name) + if val is None: + return default + return str(val).strip().lower() in {"1", "true", "yes", "y", "on"} + + +def get_str_env(name: str, default: str = "") -> str: + val = os.getenv(name) + return default if val is None else str(val).strip() + + +def get_int_env(name: str, default: int = 0) -> int: + val = os.getenv(name) + if val is None: + return default + try: + return int(val.strip()) + except ValueError: + print(f"Invalid integer value for {name}: {val}. Using default {default}.") + return default + + def replace_env_vars(value: str) -> str: """Replace environment variables in string values.""" if not isinstance(value, str): diff --git a/src/config/tools.py b/src/config/tools.py index 3fea088..ff9ab58 100644 --- a/src/config/tools.py +++ b/src/config/tools.py @@ -24,6 +24,7 @@ SELECTED_SEARCH_ENGINE = os.getenv("SEARCH_API", SearchEngine.TAVILY.value) class RAGProvider(enum.Enum): RAGFLOW = "ragflow" VIKINGDB_KNOWLEDGE_BASE = "vikingdb_knowledge_base" + MILVUS = "milvus" SELECTED_RAG_PROVIDER = os.getenv("RAG_PROVIDER") diff --git a/src/graph/checkpoint.py b/src/graph/checkpoint.py index d35ac38..4a28091 100644 --- a/src/graph/checkpoint.py +++ b/src/graph/checkpoint.py @@ -10,7 +10,7 @@ import psycopg from psycopg.rows import dict_row from pymongo import MongoClient from langgraph.store.memory import InMemoryStore -from src.config.configuration import get_bool_env, get_str_env +from src.config.loader import get_bool_env, get_str_env class ChatStreamManager: diff --git a/src/rag/builder.py b/src/rag/builder.py index 314d9d5..1032649 100644 --- a/src/rag/builder.py +++ b/src/rag/builder.py @@ -5,6 +5,7 @@ from src.config.tools import SELECTED_RAG_PROVIDER, RAGProvider from src.rag.ragflow import RAGFlowProvider from src.rag.retriever import Retriever from src.rag.vikingdb_knowledge_base import VikingDBKnowledgeBaseProvider +from src.rag.milvus import MilvusProvider def build_retriever() -> Retriever | None: @@ -12,6 +13,8 @@ def build_retriever() -> Retriever | None: return RAGFlowProvider() elif SELECTED_RAG_PROVIDER == RAGProvider.VIKINGDB_KNOWLEDGE_BASE.value: return VikingDBKnowledgeBaseProvider() + elif SELECTED_RAG_PROVIDER == RAGProvider.MILVUS.value: + return MilvusProvider() elif SELECTED_RAG_PROVIDER: raise ValueError(f"Unsupported RAG provider: {SELECTED_RAG_PROVIDER}") return None diff --git a/src/rag/milvus.py b/src/rag/milvus.py new file mode 100644 index 0000000..7003ad9 --- /dev/null +++ b/src/rag/milvus.py @@ -0,0 +1,785 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT + +import hashlib +import logging +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Sequence, Set + +from langchain_milvus.vectorstores import Milvus as LangchainMilvus +from pymilvus import MilvusClient, CollectionSchema, FieldSchema, DataType +from langchain_openai import OpenAIEmbeddings +from openai import OpenAI +from src.rag.retriever import Chunk, Document, Resource, Retriever +from src.config.loader import get_bool_env, get_str_env, get_int_env + +logger = logging.getLogger(__name__) + + +class DashscopeEmbeddings: + """OpenAI-compatible embeddings wrapper.""" + + def __init__(self, **kwargs: Any) -> None: + self._client: OpenAI = OpenAI( + api_key=kwargs.get("api_key", ""), base_url=kwargs.get("base_url", "") + ) + self._model: str = kwargs.get("model", "") + self._encoding_format: str = kwargs.get("encoding_format", "float") + + def _embed(self, texts: Sequence[str]) -> List[List[float]]: + """Internal helper performing the embedding API call.""" + clean_texts = [t if isinstance(t, str) else str(t) for t in texts] + if not clean_texts: + return [] + resp = self._client.embeddings.create( + model=self._model, + input=clean_texts, + encoding_format=self._encoding_format, + ) + return [d.embedding for d in resp.data] + + def embed_query(self, text: str) -> List[float]: + """Return embedding for a given text.""" + embeddings = self._embed([text]) + return embeddings[0] if embeddings else [] + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Return embeddings for multiple documents (LangChain interface).""" + return self._embed(texts) + + +class MilvusRetriever(Retriever): + """Retriever implementation backed by a Milvus vector store. + Responsibilities: + * Initialize / lazily connect to Milvus (local Lite or remote server). + * Provide methods for inserting content chunks & querying similarity. + * Optionally surface example markdown resources found in the project. + Environment variables (selected): + MILVUS_URI: Connection URI or local *.db path for Milvus Lite. + MILVUS_COLLECTION: Target collection name (default: documents). + MILVUS_TOP_K: Result set size (default: 10). + MILVUS_EMBEDDING_PROVIDER: openai | dashscope (default: openai). + MILVUS_EMBEDDING_MODEL: Embedding model name. + MILVUS_EMBEDDING_DIM: Override embedding dimensionality. + MILVUS_AUTO_LOAD_EXAMPLES: Load example *.md files if true. + MILVUS_EXAMPLES_DIR: Folder containing example markdown files. + """ + + def __init__(self) -> None: + # --- Connection / collection configuration --- + self.uri: str = get_str_env("MILVUS_URI", "http://localhost:19530") + self.user: str = get_str_env("MILVUS_USER") + self.password: str = get_str_env("MILVUS_PASSWORD") + self.collection_name: str = get_str_env("MILVUS_COLLECTION", "documents") + + # --- Search configuration --- + top_k_raw = get_str_env("MILVUS_TOP_K", "10") + self.top_k: int = int(top_k_raw) if top_k_raw.isdigit() else 10 + + # --- Vector field names --- + self.vector_field: str = get_str_env("MILVUS_VECTOR_FIELD", "embedding") + self.id_field: str = get_str_env("MILVUS_ID_FIELD", "id") + self.content_field: str = get_str_env("MILVUS_CONTENT_FIELD", "content") + self.title_field: str = get_str_env("MILVUS_TITLE_FIELD", "title") + self.url_field: str = get_str_env("MILVUS_URL_FIELD", "url") + self.metadata_field: str = get_str_env("MILVUS_METADATA_FIELD", "metadata") + + # --- Embedding configuration --- + self.embedding_model = get_str_env("MILVUS_EMBEDDING_MODEL") + self.embedding_api_key = get_str_env("MILVUS_EMBEDDING_API_KEY") + self.embedding_base_url = get_str_env("MILVUS_EMBEDDING_BASE_URL") + self.embedding_dim: int = self._get_embedding_dimension(self.embedding_model) + self.embedding_provider = get_str_env("MILVUS_EMBEDDING_PROVIDER", "openai") + + # --- Examples / auto-load configuration --- + self.auto_load_examples: bool = get_bool_env("MILVUS_AUTO_LOAD_EXAMPLES", True) + self.examples_dir: str = get_str_env("MILVUS_EXAMPLES_DIR", "examples") + # chunk size + self.chunk_size: int = get_int_env("MILVUS_CHUNK_SIZE", 4000) + + # --- Embedding model initialization --- + self._init_embedding_model() + + # Client (MilvusClient or LangchainMilvus) created lazily + self.client: Any = None + + def _init_embedding_model(self) -> None: + """Initialize the embedding model based on configuration.""" + kwargs = { + "api_key": self.embedding_api_key, + "model": self.embedding_model, + "base_url": self.embedding_base_url, + "encoding_format": "float", + "dimensions": self.embedding_dim, + } + if self.embedding_provider.lower() == "openai": + self.embedding_model = OpenAIEmbeddings(**kwargs) + elif self.embedding_provider.lower() == "dashscope": + self.embedding_model = DashscopeEmbeddings(**kwargs) + else: + raise ValueError( + f"Unsupported embedding provider: {self.embedding_provider}. " + "Supported providers: openai,dashscope" + ) + + def _get_embedding_dimension(self, model_name: str) -> int: + """Return embedding dimension for the supplied model name.""" + # Common OpenAI embedding model dimensions + embedding_dims = { + "text-embedding-ada-002": 1536, + "text-embedding-v4": 2048, + } + + # Check if user has explicitly set the dimension + explicit_dim = get_int_env("MILVUS_EMBEDDING_DIM", 0) + if explicit_dim > 0: + return explicit_dim + # Return the dimension for the specified model + return embedding_dims.get(model_name, 1536) # Default to 1536 + + def _create_collection_schema(self) -> CollectionSchema: + """Build and return a Milvus ``CollectionSchema`` object with metadata field. + Attempts to use a JSON field for metadata; falls back to VARCHAR if JSON + type isn't supported in the deployment. + """ + fields = [ + FieldSchema( + name=self.id_field, + dtype=DataType.VARCHAR, + max_length=512, + is_primary=True, + auto_id=False, + ), + FieldSchema( + name=self.vector_field, + dtype=DataType.FLOAT_VECTOR, + dim=self.embedding_dim, + ), + FieldSchema( + name=self.content_field, dtype=DataType.VARCHAR, max_length=65535 + ), + FieldSchema(name=self.title_field, dtype=DataType.VARCHAR, max_length=512), + FieldSchema(name=self.url_field, dtype=DataType.VARCHAR, max_length=1024), + ] + + schema = CollectionSchema( + fields=fields, + description=f"Collection for DeerFlow RAG documents: {self.collection_name}", + enable_dynamic_field=True, # Allow additional dynamic metadata fields + ) + return schema + + def _ensure_collection_exists(self) -> None: + """Ensure the configured collection exists (create if missing). + For Milvus Lite we create the collection manually; for the remote + (LangChain) client we rely on LangChain's internal logic. + """ + if self._is_milvus_lite(): + # For Milvus Lite, use MilvusClient + try: + # Check if collection exists + collections = self.client.list_collections() + if self.collection_name not in collections: + # Create collection + schema = self._create_collection_schema() + self.client.create_collection( + collection_name=self.collection_name, + schema=schema, + index_params={ + "field_name": self.vector_field, + "index_type": "IVF_FLAT", + "metric_type": "IP", + "params": {"nlist": 1024}, + }, + ) + logger.info("Created Milvus collection: %s", self.collection_name) + + except Exception as e: + logger.warning("Could not ensure collection exists: %s", e) + else: + # For LangChain Milvus, collection creation is handled automatically + logger.warning( + "Could not ensure collection exists: %s", self.collection_name + ) + + def _load_example_files(self) -> None: + """Load example markdown files into the collection (idempotent). + Each markdown file is split into chunks and inserted only if a chunk + with the derived document id hasn't been previously stored. + """ + try: + # Get the project root directory + current_file = Path(__file__) + project_root = current_file.parent.parent.parent # Go up to project root + examples_path = project_root / self.examples_dir + + if not examples_path.exists(): + logger.info("Examples directory not found: %s", examples_path) + return + + logger.info("Loading example files from: %s", examples_path) + + # Find all markdown files + md_files = list(examples_path.glob("*.md")) + if not md_files: + logger.info("No markdown files found in examples directory") + return + # Check if files are already loaded + existing_docs = self._get_existing_document_ids() + loaded_count = 0 + for md_file in md_files: + doc_id = self._generate_doc_id(md_file) + + # Skip if already loaded + if doc_id in existing_docs: + continue + try: + # Read and process the file + content = md_file.read_text(encoding="utf-8") + title = self._extract_title_from_markdown(content, md_file.name) + + # Split content into chunks if it's too long + chunks = self._split_content(content) + + # Insert each chunk + for i, chunk in enumerate(chunks): + chunk_id = f"{doc_id}_chunk_{i}" if len(chunks) > 1 else doc_id + self._insert_document_chunk( + doc_id=chunk_id, + content=chunk, + title=title, + url=f"milvus://{self.collection_name}/{md_file.name}", + metadata={"source": "examples", "file": md_file.name}, + ) + + loaded_count += 1 + logger.debug("Loaded example markdown: %s", md_file.name) + + except Exception as e: + logger.warning("Error loading %s: %s", md_file.name, e) + + logger.info( + "Successfully loaded %d example files into Milvus", loaded_count + ) + + except Exception as e: + logger.error("Error loading example files: %s", e) + + def _generate_doc_id(self, file_path: Path) -> str: + """Return a stable identifier derived from name, size & mtime hash.""" + # Use file name and size for a simple but effective ID + file_stat = file_path.stat() + content_hash = hashlib.md5( + f"{file_path.name}_{file_stat.st_size}_{file_stat.st_mtime}".encode() + ).hexdigest()[:8] + return f"example_{file_path.stem}_{content_hash}" + + def _extract_title_from_markdown(self, content: str, filename: str) -> str: + """Extract the first level-1 heading; else derive from file name.""" + lines = content.split("\n") + for line in lines: + line = line.strip() + if line.startswith("# "): + return line[2:].strip() + + # Fallback to filename without extension + return filename.replace(".md", "").replace("_", " ").title() + + def _split_content(self, content: str) -> List[str]: + """Split long markdown text into paragraph-based chunks.""" + if len(content) <= self.chunk_size: + return [content] + + chunks = [] + paragraphs = content.split("\n\n") + current_chunk = "" + + for paragraph in paragraphs: + if len(current_chunk) + len(paragraph) <= self.chunk_size: + current_chunk += paragraph + "\n\n" + else: + if current_chunk: + chunks.append(current_chunk.strip()) + current_chunk = paragraph + "\n\n" + + if current_chunk: + chunks.append(current_chunk.strip()) + + return chunks + + def _get_existing_document_ids(self) -> Set[str]: + """Return set of existing document identifiers in the collection.""" + try: + if self._is_milvus_lite(): + results = self.client.query( + collection_name=self.collection_name, + filter="", + output_fields=[self.id_field], + limit=10000, + ) + return { + result.get(self.id_field, "") + for result in results + if result.get(self.id_field) + } + else: + # For LangChain Milvus, we can't easily query all IDs + # Return empty set to allow re-insertion (LangChain will handle duplicates) + return set() + except Exception: + return set() + + def _insert_document_chunk( + self, doc_id: str, content: str, title: str, url: str, metadata: Dict[str, Any] + ) -> None: + """Insert a single content chunk into Milvus.""" + try: + # Generate embedding + embedding = self._get_embedding(content) + + if self._is_milvus_lite(): + # For Milvus Lite, use MilvusClient + data = [ + { + self.id_field: doc_id, + self.vector_field: embedding, + self.content_field: content, + self.title_field: title, + self.url_field: url, + **metadata, # Add metadata fields + } + ] + self.client.insert(collection_name=self.collection_name, data=data) + else: + # For LangChain Milvus, use add_texts + self.client.add_texts( + texts=[content], + metadatas=[ + { + self.id_field: doc_id, + self.title_field: title, + self.url_field: url, + **metadata, + } + ], + ) + except Exception as e: + raise RuntimeError(f"Failed to insert document chunk: {str(e)}") + + def _connect(self) -> None: + """Create the underlying Milvus client (idempotent).""" + try: + # Check if using Milvus Lite (file-based) vs server-based Milvus + if self._is_milvus_lite(): + # Use MilvusClient for Milvus Lite (local file database) + self.client = MilvusClient(self.uri) + # Ensure collection exists + self._ensure_collection_exists() + else: + connection_args = { + "uri": self.uri, + } + # Add user/password only if provided + if self.user: + connection_args["user"] = self.user + if self.password: + connection_args["password"] = self.password + + # Create LangChain client (it will handle collection creation automatically) + self.client = LangchainMilvus( + embedding_function=self.embedding_model, + collection_name=self.collection_name, + connection_args=connection_args, + # optional (if collection already exists with different schema, be careful) + drop_old=False, + ) + except Exception as e: + raise ConnectionError(f"Failed to connect to Milvus: {str(e)}") + + def _is_milvus_lite(self) -> bool: + """Return True if the URI points to a local Milvus Lite file. + Milvus Lite uses local file paths (often ``*.db``) without an HTTP/HTTPS + scheme. We treat any path not containing a protocol and not starting + with an HTTP(S) prefix as a Lite instance. + """ + return self.uri.endswith(".db") or ( + not self.uri.startswith(("http://", "https://")) and "://" not in self.uri + ) + + def _get_embedding(self, text: str) -> List[float]: + """Return embedding for a given text.""" + try: + # Validate input + if not isinstance(text, str): + raise ValueError(f"Text must be a string, got {type(text)}") + + if not text.strip(): + raise ValueError("Text cannot be empty or only whitespace") + # Unified embedding interface (OpenAIEmbeddings or DashscopeEmbeddings wrapper) + embeddings = self.embedding_model.embed_query(text=text.strip()) + + # Validate output + if not isinstance(embeddings, list) or not embeddings: + raise ValueError(f"Invalid embedding format: {type(embeddings)}") + + return embeddings + except Exception as e: + raise RuntimeError(f"Failed to generate embedding: {str(e)}") + + def list_resources(self, query: Optional[str] = None) -> List[Resource]: + """List available resource summaries. + + Strategy: + 1. If connected to Milvus Lite: query stored document metadata. + 2. If LangChain client: perform a lightweight similarity search + using either the provided ``query`` or a zero vector to fetch + candidate docs (mocked in tests). + 3. Append local markdown example titles (non-ingested) for user + discoverability. + + Args: + query: Optional search text to bias resource ordering. + + Returns: + List of ``Resource`` objects. + """ + resources: List[Resource] = [] + + # Ensure connection established + if not self.client: + try: + self._connect() + except Exception: + # Fall back to only local examples if connection fails + return self._list_local_markdown_resources() + + try: + if self._is_milvus_lite(): + # Query limited metadata. Empty filter returns up to limit docs. + results = self.client.query( + collection_name=self.collection_name, + filter="source == 'examples'", + output_fields=[self.id_field, self.title_field, self.url_field], + limit=100, + ) + for r in results: + resources.append( + Resource( + uri=r.get(self.url_field, "") + or f"milvus://{r.get(self.id_field,'')}", + title=r.get(self.title_field, "") + or r.get(self.id_field, "Unnamed"), + description="Stored Milvus document", + ) + ) + else: + # Use similarity_search_by_vector for lightweight listing. + # If a query is provided embed it; else use a zero vector. + docs: Iterable[Any] = self.client.similarity_search( + query, k=100, expr="source == 'examples'" # Limit to 100 results + ) + for d in docs: + meta = getattr(d, "metadata", {}) or {} + # check if the resource is in the list of resources + if resources and any( + r.uri == meta.get(self.url_field, "") + or r.uri == f"milvus://{meta.get(self.id_field,'')}" + for r in resources + ): + continue + resources.append( + Resource( + uri=meta.get(self.url_field, "") + or f"milvus://{meta.get(self.id_field,'')}", + title=meta.get(self.title_field, "") + or meta.get(self.id_field, "Unnamed"), + description="Stored Milvus document", + ) + ) + logger.info( + "Succeed listed %d resources from Milvus collection: %s", + len(resources), + self.collection_name, + ) + except Exception: + logger.warning( + "Failed to query Milvus for resources, falling back to local examples." + ) + # Fall back to only local examples if connection fails + return self._list_local_markdown_resources() + return resources + + def _list_local_markdown_resources(self) -> List[Resource]: + """Return local example markdown files as ``Resource`` objects. + + These are surfaced even when not ingested so users can choose to load + them. Controlled by directory presence only (lightweight).""" + current_file = Path(__file__) + project_root = current_file.parent.parent.parent # up to project root + examples_path = project_root / self.examples_dir + if not examples_path.exists(): + return [] + + md_files = list(examples_path.glob("*.md")) + resources: list[Resource] = [] + for md_file in md_files: + try: + content = md_file.read_text(encoding="utf-8", errors="ignore") + title = self._extract_title_from_markdown(content, md_file.name) + uri = f"milvus://{self.collection_name}/{md_file.name}" + resources.append( + Resource( + uri=uri, + title=title, + description="Local markdown example (not yet ingested)", + ) + ) + except Exception: + continue + return resources + + def query_relevant_documents( + self, query: str, resources: Optional[List[Resource]] = None + ) -> List[Document]: + """Perform vector similarity search returning rich ``Document`` objects. + + Args: + query: Natural language query string. + resources: Optional subset filter of ``Resource`` objects; if + provided, only documents whose id/url appear in the list will + be included. + + Returns: + List of aggregated ``Document`` objects; each contains one or more + ``Chunk`` instances (one per matched piece of content). + + Raises: + RuntimeError: On underlying search errors. + """ + resources = resources or [] + try: + if not self.client: + self._connect() + + # Get embeddings for the query + query_embedding = self._get_embedding(query) + + # For Milvus Lite, use MilvusClient directly + if self._is_milvus_lite(): + # Perform vector search + search_results = self.client.search( + collection_name=self.collection_name, + data=[query_embedding], + anns_field=self.vector_field, + param={"metric_type": "IP", "params": {"nprobe": 10}}, + limit=self.top_k, + output_fields=[ + self.id_field, + self.content_field, + self.title_field, + self.url_field, + ], + ) + + documents = {} + + for result_list in search_results: + for result in result_list: + entity = result.get("entity", {}) + doc_id = entity.get(self.id_field, "") + content = entity.get(self.content_field, "") + title = entity.get(self.title_field, "") + url = entity.get(self.url_field, "") + score = result.get("distance", 0.0) + + # Skip if resource filtering is requested and this doc is not in the list + if resources: + doc_in_resources = False + for resource in resources: + if ( + url and url in resource.uri + ) or doc_id in resource.uri: + doc_in_resources = True + break + if not doc_in_resources: + continue + + # Create or update document + if doc_id not in documents: + documents[doc_id] = Document( + id=doc_id, url=url, title=title, chunks=[] + ) + + # Add chunk to document + chunk = Chunk(content=content, similarity=score) + documents[doc_id].chunks.append(chunk) + + return list(documents.values()) + + else: + # For LangChain Milvus, use similarity search + search_results = self.client.similarity_search_with_score( + query=query, k=self.top_k + ) + + documents = {} + + for doc, score in search_results: + metadata = doc.metadata or {} + doc_id = metadata.get(self.id_field, "") + title = metadata.get(self.title_field, "") + url = metadata.get(self.url_field, "") + content = doc.page_content + + # Skip if resource filtering is requested and this doc is not in the list + if resources: + doc_in_resources = False + for resource in resources: + if (url and url in resource.uri) or doc_id in resource.uri: + doc_in_resources = True + break + if not doc_in_resources: + continue + + # Create or update document + if doc_id not in documents: + documents[doc_id] = Document( + id=doc_id, url=url, title=title, chunks=[] + ) + + # Add chunk to document + chunk = Chunk(content=content, similarity=score) + documents[doc_id].chunks.append(chunk) + + return list(documents.values()) + + except Exception as e: + raise RuntimeError(f"Failed to query documents from Milvus: {str(e)}") + + def create_collection(self) -> None: + """Public hook ensuring collection exists (explicit initialization).""" + if not self.client: + self._connect() + else: + # If we're using Milvus Lite, ensure collection exists + if self._is_milvus_lite(): + self._ensure_collection_exists() + + def load_examples(self, force_reload: bool = False) -> None: + """Load example markdown files, optionally clearing existing ones. + + Args: + force_reload: If True existing example documents are deleted first. + """ + if not self.client: + self._connect() + + if force_reload: + # Clear existing examples + self._clear_example_documents() + + self._load_example_files() + + def _clear_example_documents(self) -> None: + """Delete previously ingested example documents (Milvus Lite only).""" + try: + if self._is_milvus_lite(): + # For Milvus Lite, delete documents with source='examples' + # Note: Milvus doesn't support direct delete by filter in all versions + # So we'll query and delete by IDs + results = self.client.query( + collection_name=self.collection_name, + filter="source == 'examples'", + output_fields=[self.id_field], + limit=10000, + ) + + if results: + doc_ids = [result[self.id_field] for result in results] + self.client.delete( + collection_name=self.collection_name, ids=doc_ids + ) + logger.info("Cleared %d existing example documents", len(doc_ids)) + else: + # For LangChain Milvus, we can't easily delete by metadata + logger.info( + "Clearing existing examples not supported for LangChain Milvus client" + ) + + except Exception as e: + logger.warning("Could not clear existing examples: %s", e) + + def get_loaded_examples(self) -> List[Dict[str, str]]: + """Return metadata for previously ingested example documents.""" + try: + if not self.client: + self._connect() + + if self._is_milvus_lite(): + results = self.client.query( + collection_name=self.collection_name, + filter="source == 'examples'", + output_fields=[ + self.id_field, + self.title_field, + self.url_field, + "source", + "file", + ], + limit=1000, + ) + + examples = [] + for result in results: + examples.append( + { + "id": result.get(self.id_field, ""), + "title": result.get(self.title_field, ""), + "file": result.get("file", ""), + "url": result.get(self.url_field, ""), + } + ) + + return examples + else: + # For LangChain Milvus, we can't easily filter by metadata + logger.info( + "Getting loaded examples not supported for LangChain Milvus client" + ) + return [] + + except Exception as e: + logger.error("Error getting loaded examples: %s", e) + return [] + + def close(self) -> None: + """Release underlying client resources (idempotent).""" + if hasattr(self, "client") and self.client: + try: + # For Milvus Lite (MilvusClient), close the connection + if self._is_milvus_lite() and hasattr(self.client, "close"): + self.client.close() + # For LangChain Milvus, no explicit close method needed + self.client = None + except Exception: + # Ignore errors during cleanup + pass + + def __del__(self) -> None: # pragma: no cover - best-effort cleanup + """Best-effort cleanup when instance is garbage collected.""" + self.close() + + +# Backwards compatibility export (original class name kept for external imports) +class MilvusProvider(MilvusRetriever): + """Backward compatible alias for ``MilvusRetriever`` (original name).""" + + pass + + +def load_examples() -> None: + auto_load_examples = get_bool_env("MILVUS_AUTO_LOAD_EXAMPLES", False) + rag_provider = get_str_env("RAG_PROVIDER", "") + if rag_provider == "milvus" and auto_load_examples: + provider = MilvusProvider() + provider.load_examples() diff --git a/src/server/app.py b/src/server/app.py index b7067f9..2d24a2a 100644 --- a/src/server/app.py +++ b/src/server/app.py @@ -17,7 +17,8 @@ from langgraph.checkpoint.mongodb import AsyncMongoDBSaver from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver from psycopg_pool import AsyncConnectionPool -from src.config.configuration import get_recursion_limit, get_bool_env, get_str_env +from src.config.configuration import get_recursion_limit +from src.config.loader import get_bool_env, get_str_env from src.config.report_style import ReportStyle from src.config.tools import SELECTED_RAG_PROVIDER from src.graph.builder import build_graph_with_memory @@ -27,6 +28,7 @@ from src.ppt.graph.builder import build_graph as build_ppt_graph from src.prompt_enhancer.graph.builder import build_graph as build_prompt_enhancer_graph from src.prose.graph.builder import build_graph as build_prose_graph from src.rag.builder import build_retriever +from src.rag.milvus import load_examples from src.rag.retriever import Resource from src.server.chat_request import ( ChatRequest, @@ -73,6 +75,10 @@ app.add_middleware( allow_methods=["GET", "POST", "OPTIONS"], # Use the configured list of methods allow_headers=["*"], # Now allow all headers, but can be restricted further ) + +# Load examples into Milvus if configured +load_examples() + in_memory_store = InMemoryStore() graph = build_graph_with_memory() diff --git a/tests/unit/rag/test_milvus.py b/tests/unit/rag/test_milvus.py new file mode 100644 index 0000000..d55b950 --- /dev/null +++ b/tests/unit/rag/test_milvus.py @@ -0,0 +1,824 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# SPDX-License-Identifier: MIT + +from __future__ import annotations +from uuid import uuid4 +from types import SimpleNamespace +from pathlib import Path +import pytest + +import src.rag.milvus as milvus_mod +from src.rag.milvus import MilvusProvider +from src.rag.retriever import Resource + + +class DummyEmbedding: + + def __init__(self, **kwargs): + self.kwargs = kwargs + + def embed_query(self, text: str): + return [0.1, 0.2, 0.3] + + def embed_documents(self, texts): + return [[0.1, 0.2, 0.3] for _ in texts] + + +@pytest.fixture(autouse=True) +def patch_embeddings(monkeypatch): + # Prevent network / external API usage during __init__ + monkeypatch.setenv("MILVUS_EMBEDDING_PROVIDER", "openai") + monkeypatch.setenv("MILVUS_EMBEDDING_MODEL", "text-embedding-ada-002") + monkeypatch.setenv("MILVUS_COLLECTION", "documents") + monkeypatch.setenv("MILVUS_URI", "./milvus_demo.db") # default lite + monkeypatch.setattr(milvus_mod, "OpenAIEmbeddings", DummyEmbedding) + monkeypatch.setattr(milvus_mod, "DashscopeEmbeddings", DummyEmbedding) + yield + + +@pytest.fixture +def project_root(): + # Mirror logic from implementation: current_file.parent.parent.parent + return Path(milvus_mod.__file__).parent.parent.parent + + +def _patch_init(monkeypatch): + """Patch retriever initialization to use dummy embedding model.""" + monkeypatch.setattr( + MilvusProvider, + "_init_embedding_model", + lambda self: setattr(self, "embedding_model", DummyEmbedding()), + ) + + +def test_list_local_markdown_resources_missing_dir(project_root): + retriever = MilvusProvider() + # Point to a non-existent examples dir + retriever.examples_dir = f"missing_examples_{uuid4().hex}" + resources = retriever._list_local_markdown_resources() + assert resources == [] + + +def test_list_local_markdown_resources_populated(project_root): + retriever = MilvusProvider() + examples_dir = f"examples_test_{uuid4().hex}" + retriever.examples_dir = examples_dir + target_dir = project_root / examples_dir + target_dir.mkdir(parents=True, exist_ok=True) + + # File with heading + (target_dir / "file1.md").write_text( + "# Title One\n\nContent body.", encoding="utf-8" + ) + # File without heading -> fallback title + (target_dir / "file_two.md").write_text("No heading here.", encoding="utf-8") + # Non-markdown file should be ignored + (target_dir / "ignore.txt").write_text("Should not be picked up.", encoding="utf-8") + + resources = retriever._list_local_markdown_resources() + # Order not guaranteed; sort by uri for assertions + resources.sort(key=lambda r: r.uri) + + # Expect two resources + assert len(resources) == 2 + uris = {r.uri for r in resources} + assert uris == { + f"milvus://{retriever.collection_name}/file1.md", + f"milvus://{retriever.collection_name}/file_two.md", + } + + res_map = {r.uri: r for r in resources} + r1 = res_map[f"milvus://{retriever.collection_name}/file1.md"] + assert isinstance(r1, Resource) + assert r1.title == "Title One" + assert r1.description == "Local markdown example (not yet ingested)" + + r2 = res_map[f"milvus://{retriever.collection_name}/file_two.md"] + # Fallback logic: filename -> "file_two" -> "file two" -> title case -> "File Two" + assert r2.title == "File Two" + assert r2.description == "Local markdown example (not yet ingested)" + + +def test_list_local_markdown_resources_read_error(monkeypatch, project_root): + retriever = MilvusProvider() + examples_dir = f"examples_error_{uuid4().hex}" + retriever.examples_dir = examples_dir + target_dir = project_root / examples_dir + target_dir.mkdir(parents=True, exist_ok=True) + + bad_file = target_dir / "bad.md" + good_file = target_dir / "good.md" + good_file.write_text("# Good Title\n\nBody.", encoding="utf-8") + bad_file.write_text("Broken", encoding="utf-8") + + # Patch Path.read_text to raise for bad.md only + original_read_text = Path.read_text + + def fake_read_text(self, *args, **kwargs): + if self == bad_file: + raise OSError("Cannot read file") + return original_read_text(self, *args, **kwargs) + + monkeypatch.setattr(Path, "read_text", fake_read_text) + + resources = retriever._list_local_markdown_resources() + # Only good.md should appear + assert len(resources) == 1 + r = resources[0] + assert r.title == "Good Title" + assert r.uri == f"milvus://{retriever.collection_name}/good.md" + + +def test_create_collection_schema_fields(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + schema = retriever._create_collection_schema() + field_names = {f.name for f in schema.fields} + # Core fields must be present + assert { + retriever.id_field, + retriever.vector_field, + retriever.content_field, + } <= field_names + # Dynamic field enabled for extra metadata + assert schema.enable_dynamic_field is True + + +def test_generate_doc_id_stable(monkeypatch, tmp_path): + _patch_init(monkeypatch) + retriever = MilvusProvider() + test_file = tmp_path / "example.md" + test_file.write_text("# Title\nBody", encoding="utf-8") + doc_id1 = retriever._generate_doc_id(test_file) + doc_id2 = retriever._generate_doc_id(test_file) + assert doc_id1 == doc_id2 # deterministic given unchanged file metadata + + +def test_extract_title_from_markdown(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + heading = retriever._extract_title_from_markdown("# Heading\nBody", "ignored.md") + assert heading == "Heading" + fallback = retriever._extract_title_from_markdown("Body only", "my_file_name.md") + assert fallback == "My File Name" + + +def test_split_content_chunking(monkeypatch): + monkeypatch.setenv("MILVUS_CHUNK_SIZE", "40") # small to force split + _patch_init(monkeypatch) + retriever = MilvusProvider() + long_content = ( + "Para1 text here.\n\nPara2 second block.\n\nPara3 final." # 3 paragraphs + ) + chunks = retriever._split_content(long_content) + assert len(chunks) >= 2 # forced split + assert all(chunks) # no empty chunks + + +def test_get_embedding_invalid_inputs(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + # Non-string value + with pytest.raises(RuntimeError): + retriever._get_embedding(123) # type: ignore[arg-type] + # Whitespace only + with pytest.raises(RuntimeError): + retriever._get_embedding(" ") + + +def test_list_resources_remote_success_and_dedup(monkeypatch): + monkeypatch.setenv("MILVUS_URI", "http://remote") + _patch_init(monkeypatch) + retriever = MilvusProvider() + + class DocObj: + def __init__(self, content: str, meta: dict): + self.page_content = content + self.metadata = meta + + calls = {"similarity_search": 0} + + class RemoteClient: + def similarity_search(self, query, k, expr): # noqa: D401 + calls["similarity_search"] += 1 + # Two docs with identical id to test dedup + meta1 = { + retriever.id_field: "d1", + retriever.title_field: "T1", + retriever.url_field: "u1", + } + meta2 = { + retriever.id_field: "d1", + retriever.title_field: "T1_dup", + retriever.url_field: "u1", + } + return [DocObj("c1", meta1), DocObj("c1_dup", meta2)] + + retriever.client = RemoteClient() + resources = retriever.list_resources("query text") + assert len(resources) == 1 # dedup applied + assert resources[0].title.startswith("T1") + assert calls["similarity_search"] == 1 + + +def test_list_resources_lite_success(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + + class DummyMilvusLite: + def query(self, collection_name, filter, output_fields, limit): # noqa: D401 + return [ + { + retriever.id_field: "idA", + retriever.title_field: "Alpha", + retriever.url_field: "u://a", + }, + { + retriever.id_field: "idB", + retriever.title_field: "Beta", + retriever.url_field: "u://b", + }, + ] + + retriever.client = DummyMilvusLite() + resources = retriever.list_resources() + assert {r.title for r in resources} == {"Alpha", "Beta"} + + +def test_query_relevant_documents_lite_success(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + + # Provide deterministic embedding output + retriever.embedding_model.embed_query = lambda text: [0.1, 0.2, 0.3] # type: ignore + + class DummyMilvusLite: + def search( + self, collection_name, data, anns_field, param, limit, output_fields + ): # noqa: D401 + # Simulate two result entries + return [ + [ + { + "entity": { + retriever.id_field: "d1", + retriever.content_field: "c1", + retriever.title_field: "T1", + retriever.url_field: "u1", + }, + "distance": 0.9, + }, + { + "entity": { + retriever.id_field: "d2", + retriever.content_field: "c2", + retriever.title_field: "T2", + retriever.url_field: "u2", + }, + "distance": 0.8, + }, + ] + ] + + retriever.client = DummyMilvusLite() + # Filter for only d2 via resource list + docs = retriever.query_relevant_documents( + "question", resources=[Resource(uri="milvus://d2", title="", description="")] + ) + assert len(docs) == 1 and docs[0].id == "d2" and docs[0].chunks[0].similarity == 0.8 + + +def test_query_relevant_documents_remote_success(monkeypatch): + monkeypatch.setenv("MILVUS_URI", "http://remote") + _patch_init(monkeypatch) + retriever = MilvusProvider() + retriever.embedding_model.embed_query = lambda text: [0.1, 0.2, 0.3] # type: ignore + + class DocObj: + def __init__(self, content: str, meta: dict): # noqa: D401 + self.page_content = content + self.metadata = meta + + class RemoteClient: + def similarity_search_with_score(self, query, k): # noqa: D401 + return [ + ( + DocObj( + "c1", + { + retriever.id_field: "d1", + retriever.title_field: "T1", + retriever.url_field: "u1", + }, + ), + 0.7, + ), + ( + DocObj( + "c2", + { + retriever.id_field: "d2", + retriever.title_field: "T2", + retriever.url_field: "u2", + }, + ), + 0.6, + ), + ] + + retriever.client = RemoteClient() + # Filter to only d1 + docs = retriever.query_relevant_documents( + "q", resources=[Resource(uri="milvus://d1", title="", description="")] + ) + assert len(docs) == 1 and docs[0].id == "d1" and docs[0].chunks[0].similarity == 0.7 + + +def test_get_embedding_dimension_explicit(monkeypatch): + monkeypatch.setenv("MILVUS_EMBEDDING_DIM", "777") + _patch_init(monkeypatch) + retriever = MilvusProvider() + assert retriever.embedding_dim == 777 + + +def test_get_embedding_dimension_unknown_model(monkeypatch): + monkeypatch.delenv("MILVUS_EMBEDDING_DIM", raising=False) + monkeypatch.setenv("MILVUS_EMBEDDING_MODEL", "unknown-model-x") + _patch_init(monkeypatch) + retriever = MilvusProvider() + # falls back to default 1536 + assert retriever.embedding_dim == 1536 + + +def test_is_milvus_lite_variants(monkeypatch): + _patch_init(monkeypatch) + monkeypatch.setenv("MILVUS_URI", "mydb.db") + assert MilvusProvider()._is_milvus_lite() is True + monkeypatch.setenv("MILVUS_URI", "relative_path_store") + assert MilvusProvider()._is_milvus_lite() is True + monkeypatch.setenv("MILVUS_URI", "http://host:19530") + assert MilvusProvider()._is_milvus_lite() is False + + +def test_create_collection_lite(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + created: dict = {} + + class DummyMilvusLite: + def list_collections(self): # noqa: D401 + return [] # empty triggers creation + + def create_collection( + self, collection_name, schema, index_params + ): # noqa: D401 + created["name"] = collection_name + created["schema"] = schema + created["index"] = index_params + + retriever.client = DummyMilvusLite() + retriever._ensure_collection_exists() + assert created["name"] == retriever.collection_name + + +def test_ensure_collection_exists_remote(monkeypatch): + _patch_init(monkeypatch) + monkeypatch.setenv("MILVUS_URI", "http://remote:19530") + retriever = MilvusProvider() + # remote path, nothing thrown + retriever.client = SimpleNamespace() + retriever._ensure_collection_exists() + + +def test_get_existing_document_ids_lite(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + + class DummyMilvusLite: + def query(self, collection_name, filter, output_fields, limit): # noqa: D401 + return [ + {retriever.id_field: "a"}, + {retriever.id_field: "b"}, + {"other": "ignored"}, + ] + + retriever.client = DummyMilvusLite() + assert retriever._get_existing_document_ids() == {"a", "b"} + + +def test_get_existing_document_ids_remote(monkeypatch): + _patch_init(monkeypatch) + monkeypatch.setenv("MILVUS_URI", "http://x") + retriever = MilvusProvider() + retriever.client = object() + assert retriever._get_existing_document_ids() == set() + + +def test_insert_document_chunk_lite_and_error(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + + captured = {} + + class DummyMilvusLite: + def insert(self, collection_name, data): # noqa: D401 + captured["data"] = data + + retriever.client = DummyMilvusLite() + retriever._insert_document_chunk( + doc_id="id1", content="hello", title="T", url="u", metadata={"m": 1} + ) + assert captured["data"][0][retriever.id_field] == "id1" + + # error path: patch embedding to raise + def bad_embed(text): # noqa: D401 + raise RuntimeError("boom") + + retriever.embedding_model.embed_query = bad_embed # type: ignore[attr-defined] + with pytest.raises(RuntimeError): + retriever._insert_document_chunk( + doc_id="id2", content="err", title="T", url="u", metadata={} + ) + + +def test_insert_document_chunk_remote(monkeypatch): + _patch_init(monkeypatch) + monkeypatch.setenv("MILVUS_URI", "http://remote") + retriever = MilvusProvider() + added = {} + + class RemoteClient: + def add_texts(self, texts, metadatas): # noqa: D401 + added["texts"] = texts + added["meta"] = metadatas + + retriever.client = RemoteClient() + retriever._insert_document_chunk( + doc_id="idx", content="ct", title="Title", url="urlx", metadata={"k": 2} + ) + assert added["meta"][0][retriever.id_field] == "idx" + + +def test_connect_lite_and_error(monkeypatch): + # patch MilvusClient to a dummy + class FakeMilvusClient: + def __init__(self, uri): # noqa: D401 + self.uri = uri + + def list_collections(self): # noqa: D401 + return [] + + def create_collection(self, **kwargs): # noqa: D401 + pass + + monkeypatch.setattr(milvus_mod, "MilvusClient", FakeMilvusClient) + _patch_init(monkeypatch) + retriever = MilvusProvider() + retriever._connect() + assert isinstance(retriever.client, FakeMilvusClient) + + # error path: patch MilvusClient to raise + class BadClient: + def __init__(self, uri): # noqa: D401 + raise RuntimeError("fail connect") + + monkeypatch.setattr(milvus_mod, "MilvusClient", BadClient) + retriever2 = MilvusProvider() + with pytest.raises(ConnectionError): + retriever2._connect() + + +def test_connect_remote(monkeypatch): + monkeypatch.setenv("MILVUS_URI", "http://remote") + _patch_init(monkeypatch) + created = {} + + class FakeLangchainMilvus: + def __init__(self, **kwargs): # noqa: D401 + created.update(kwargs) + + monkeypatch.setattr(milvus_mod, "LangchainMilvus", FakeLangchainMilvus) + retriever = MilvusProvider() + retriever._connect() + assert created["collection_name"] == retriever.collection_name + + +def test_list_resources_remote_failure(monkeypatch): + monkeypatch.setenv("MILVUS_URI", "http://remote") + _patch_init(monkeypatch) + retriever = MilvusProvider() + + # Provide minimal working local examples dir (none -> returns []) + monkeypatch.setattr(retriever, "_list_local_markdown_resources", lambda: []) + + # patch client to raise inside similarity_search to trigger fallback path + class BadClient: + def similarity_search(self, *args, **kwargs): # noqa: D401 + raise RuntimeError("fail") + + retriever.client = BadClient() + # Should fallback to [] without raising + assert retriever.list_resources() == [] + + +def test_list_local_markdown_resources_empty(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + monkeypatch.setenv("MILVUS_EXAMPLES_DIR", "nonexistent_dir") + retriever.examples_dir = "nonexistent_dir" + assert retriever._list_local_markdown_resources() == [] + + +def test_query_relevant_documents_error(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + retriever.embedding_model.embed_query = lambda text: ( # type: ignore + _ for _ in () + ).throw(RuntimeError("embed fail")) + with pytest.raises(RuntimeError): + retriever.query_relevant_documents("q") + + +def test_create_collection_when_client_exists(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + retriever.client = SimpleNamespace(closed=False) + # remote vs lite path difference handled by _is_milvus_lite + retriever.create_collection() # should no-op gracefully + + +def test_load_examples_force_reload(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + retriever.client = SimpleNamespace() + called = {"clear": 0, "load": 0} + monkeypatch.setattr( + retriever, "_clear_example_documents", lambda: called.__setitem__("clear", 1) + ) + monkeypatch.setattr( + retriever, "_load_example_files", lambda: called.__setitem__("load", 1) + ) + retriever.load_examples(force_reload=True) + assert called == {"clear": 1, "load": 1} + + +def test_clear_example_documents_remote(monkeypatch): + monkeypatch.setenv("MILVUS_URI", "http://remote") + _patch_init(monkeypatch) + retriever = MilvusProvider() + retriever.client = SimpleNamespace() + # Should just log and not raise + retriever._clear_example_documents() + + +def test_clear_example_documents_lite(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + deleted = {} + + class DummyMilvusLite: + def query(self, **kwargs): # noqa: D401 + return [ + {retriever.id_field: "ex1"}, + {retriever.id_field: "ex2"}, + ] + + def delete(self, collection_name, ids): # noqa: D401 + deleted["ids"] = ids + + retriever.client = DummyMilvusLite() + retriever._clear_example_documents() + assert deleted["ids"] == ["ex1", "ex2"] + + +def test_get_loaded_examples_lite_and_error(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + + class DummyMilvusLite: + def query(self, **kwargs): # noqa: D401 + return [ + { + retriever.id_field: "id1", + retriever.title_field: "T1", + retriever.url_field: "u1", + "file": "f1", + } + ] + + retriever.client = DummyMilvusLite() + loaded = retriever.get_loaded_examples() + assert loaded[0]["id"] == "id1" + + # error path + class BadClient: + def query(self, **kwargs): # noqa: D401 + raise RuntimeError("fail") + + retriever.client = BadClient() + assert retriever.get_loaded_examples() == [] + + +def test_get_loaded_examples_remote(monkeypatch): + monkeypatch.setenv("MILVUS_URI", "http://remote") + _patch_init(monkeypatch) + retriever = MilvusProvider() + retriever.client = SimpleNamespace() + assert retriever.get_loaded_examples() == [] + + +def test_close_lite_and_remote(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + closed = {"c": 0} + + class DummyMilvusLite: + def close(self): # noqa: D401 + closed["c"] += 1 + + def list_collections(self): # noqa: D401 + return [] + + def create_collection(self, **kwargs): # noqa: D401 + pass + + retriever.client = DummyMilvusLite() + retriever.close() + assert closed["c"] == 1 + + # remote path: no close attr usage expected + monkeypatch.setenv("MILVUS_URI", "http://remote") + retriever2 = MilvusProvider() + retriever2.client = SimpleNamespace() + retriever2.close() # should not raise + + +def test_get_embedding_invalid_output(monkeypatch): + _patch_init(monkeypatch) + retriever = MilvusProvider() + # patch embedding model to return invalid output (empty list) + retriever.embedding_model.embed_query = lambda text: [] # type: ignore + with pytest.raises(RuntimeError): + retriever._get_embedding("text") + + +def test_dashscope_embeddings_empty_inputs_short_circuit(monkeypatch): + # Use real class but swap _client to ensure create is never called + emb = milvus_mod.DashscopeEmbeddings(model="m") + + class FailingClient: + class _Emb: + def create(self, *a, **k): + raise AssertionError("Should not be called for empty input") + + embeddings = _Emb() + + emb._client = FailingClient() # type: ignore + assert emb.embed_documents([]) == [] + + +# Tests for _init_embedding_model provider selection logic +def test_init_embedding_model_openai(monkeypatch): + monkeypatch.setenv("MILVUS_EMBEDDING_PROVIDER", "openai") + monkeypatch.setenv("MILVUS_EMBEDDING_MODEL", "text-embedding-ada-002") + captured = {} + + class CapturingOpenAI: + def __init__(self, **kwargs): + captured.update(kwargs) + + monkeypatch.setattr(milvus_mod, "OpenAIEmbeddings", CapturingOpenAI) + prov = MilvusProvider() + assert isinstance(prov.embedding_model, CapturingOpenAI) + # kwargs forwarded + assert captured["model"] == "text-embedding-ada-002" + assert captured["encoding_format"] == "float" + assert captured["dimensions"] == prov.embedding_dim + + +def test_init_embedding_model_dashscope(monkeypatch): + monkeypatch.setenv("MILVUS_EMBEDDING_PROVIDER", "dashscope") + monkeypatch.setenv("MILVUS_EMBEDDING_MODEL", "text-embedding-ada-002") + captured = {} + + class CapturingDashscope: + def __init__(self, **kwargs): + captured.update(kwargs) + + monkeypatch.setattr(milvus_mod, "DashscopeEmbeddings", CapturingDashscope) + prov = MilvusProvider() + assert isinstance(prov.embedding_model, CapturingDashscope) + assert captured["model"] == "text-embedding-ada-002" + assert captured["encoding_format"] == "float" + assert captured["dimensions"] == prov.embedding_dim + + +def test_init_embedding_model_invalid_provider(monkeypatch): + monkeypatch.setenv("MILVUS_EMBEDDING_PROVIDER", "not_a_provider") + with pytest.raises(ValueError): + MilvusProvider() + + +def test_load_example_files_directory_missing(monkeypatch): + _patch_init(monkeypatch) + missing_dir = "examples_dir_does_not_exist_xyz" + monkeypatch.setenv("MILVUS_EXAMPLES_DIR", missing_dir) + retriever = MilvusProvider() + retriever.examples_dir = missing_dir + called = {"insert": 0} + monkeypatch.setattr( + retriever, + "_insert_document_chunk", + lambda **kwargs: (_ for _ in ()).throw(AssertionError("should not insert")), + ) + retriever._load_example_files() + assert called["insert"] == 0 # sanity (no insertion attempted) + + +def test_load_example_files_loads_and_skips_existing(monkeypatch): + _patch_init(monkeypatch) + project_root = Path(milvus_mod.__file__).parent.parent.parent + examples_dir_name = "examples_test_load_skip" + examples_path = project_root / examples_dir_name + examples_path.mkdir(exist_ok=True) + + file1 = examples_path / "file1.md" + file2 = examples_path / "file2.md" + file1.write_text("# Title One\nContent A", encoding="utf-8") + file2.write_text("# Title Two\nContent B", encoding="utf-8") + + monkeypatch.setenv("MILVUS_EXAMPLES_DIR", examples_dir_name) + retriever = MilvusProvider() + retriever.examples_dir = examples_dir_name + + # Compute doc ids using real method + doc_id_file1 = retriever._generate_doc_id(file1) + doc_id_file2 = retriever._generate_doc_id(file2) + + # Existing docs contains file1 so it is skipped + monkeypatch.setattr(retriever, "_get_existing_document_ids", lambda: {doc_id_file1}) + # Force two chunks for any file to test suffix logic + monkeypatch.setattr(retriever, "_split_content", lambda content: ["part1", "part2"]) + + calls = [] + + def record_insert(doc_id, content, title, url, metadata): + calls.append( + { + "doc_id": doc_id, + "content": content, + "title": title, + "url": url, + "metadata": metadata, + } + ) + + monkeypatch.setattr(retriever, "_insert_document_chunk", record_insert) + + retriever._load_example_files() + + # Only file2 processed -> two chunk inserts + assert len(calls) == 2 + expected_ids = {f"{doc_id_file2}_chunk_0", f"{doc_id_file2}_chunk_1"} + assert {c["doc_id"] for c in calls} == expected_ids + assert all(c["metadata"]["file"] == "file2.md" for c in calls) + assert all(c["metadata"]["source"] == "examples" for c in calls) + assert all(c["title"] == "Title Two" for c in calls) + + +def test_load_example_files_single_chunk_no_suffix(monkeypatch): + _patch_init(monkeypatch) + project_root = Path(milvus_mod.__file__).parent.parent.parent + examples_dir_name = "examples_test_single_chunk" + examples_path = project_root / examples_dir_name + examples_path.mkdir(exist_ok=True) + + file_single = examples_path / "single.md" + file_single.write_text( + "# Single Title\nOnly one small paragraph.", encoding="utf-8" + ) + + monkeypatch.setenv("MILVUS_EXAMPLES_DIR", examples_dir_name) + retriever = MilvusProvider() + retriever.examples_dir = examples_dir_name + + base_doc_id = retriever._generate_doc_id(file_single) + + monkeypatch.setattr(retriever, "_get_existing_document_ids", lambda: set()) + monkeypatch.setattr(retriever, "_split_content", lambda content: ["onlychunk"]) + + captured = {} + + def capture(doc_id, content, title, url, metadata): + captured["doc_id"] = doc_id + captured["title"] = title + captured["metadata"] = metadata + + monkeypatch.setattr(retriever, "_insert_document_chunk", capture) + + retriever._load_example_files() + + assert captured["doc_id"] == base_doc_id # no _chunk_ suffix + assert captured["title"] == "Single Title" + assert captured["metadata"]["file"] == "single.md" + assert captured["metadata"]["source"] == "examples" diff --git a/uv.lock b/uv.lock index 11e9675..497d4ea 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12" resolution-markers = [ "python_full_version >= '3.13'", @@ -397,6 +397,7 @@ dependencies = [ { name = "langchain-deepseek" }, { name = "langchain-experimental" }, { name = "langchain-mcp-adapters" }, + { name = "langchain-milvus" }, { name = "langchain-openai" }, { name = "langchain-tavily" }, { name = "langgraph" }, @@ -408,6 +409,7 @@ dependencies = [ { name = "numpy" }, { name = "pandas" }, { name = "psycopg", extra = ["binary"] }, + { name = "pymilvus" }, { name = "python-dotenv" }, { name = "readabilipy" }, { name = "socksio" }, @@ -445,6 +447,7 @@ requires-dist = [ { name = "langchain-deepseek", specifier = ">=0.1.3" }, { name = "langchain-experimental", specifier = ">=0.3.4" }, { name = "langchain-mcp-adapters", specifier = ">=0.0.9" }, + { name = "langchain-milvus", specifier = ">=0.2.1" }, { name = "langchain-openai", specifier = ">=0.3.8" }, { name = "langchain-tavily", specifier = "<0.3" }, { name = "langgraph", specifier = ">=0.3.5" }, @@ -458,6 +461,7 @@ requires-dist = [ { name = "numpy", specifier = ">=2.2.3" }, { name = "pandas", specifier = ">=2.2.3" }, { name = "psycopg", extras = ["binary"], specifier = ">=3.2.9" }, + { name = "pymilvus", specifier = ">=2.3.0" }, { name = "pytest", marker = "extra == 'test'", specifier = ">=7.4.0" }, { name = "pytest-asyncio", marker = "extra == 'test'", specifier = ">=1.0.0" }, { name = "pytest-cov", marker = "extra == 'test'", specifier = ">=4.1.0" }, @@ -639,6 +643,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ac/38/08cc303ddddc4b3d7c628c3039a61a3aae36c241ed01393d00c2fd663473/greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6", size = 1142112, upload-time = "2024-09-20T17:09:28.753Z" }, ] +[[package]] +name = "grpcio" +version = "1.74.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/38/b4/35feb8f7cab7239c5b94bd2db71abb3d6adb5f335ad8f131abb6060840b6/grpcio-1.74.0.tar.gz", hash = "sha256:80d1f4fbb35b0742d3e3d3bb654b7381cd5f015f8497279a1e9c21ba623e01b1", size = 12756048, upload-time = "2025-07-24T18:54:23.039Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/5d/e504d5d5c4469823504f65687d6c8fb97b7f7bf0b34873b7598f1df24630/grpcio-1.74.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:8533e6e9c5bd630ca98062e3a1326249e6ada07d05acf191a77bc33f8948f3d8", size = 5445551, upload-time = "2025-07-24T18:53:23.641Z" }, + { url = "https://files.pythonhosted.org/packages/43/01/730e37056f96f2f6ce9f17999af1556df62ee8dab7fa48bceeaab5fd3008/grpcio-1.74.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:2918948864fec2a11721d91568effffbe0a02b23ecd57f281391d986847982f6", size = 10979810, upload-time = "2025-07-24T18:53:25.349Z" }, + { url = "https://files.pythonhosted.org/packages/79/3d/09fd100473ea5c47083889ca47ffd356576173ec134312f6aa0e13111dee/grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:60d2d48b0580e70d2e1954d0d19fa3c2e60dd7cbed826aca104fff518310d1c5", size = 5941946, upload-time = "2025-07-24T18:53:27.387Z" }, + { url = "https://files.pythonhosted.org/packages/8a/99/12d2cca0a63c874c6d3d195629dcd85cdf5d6f98a30d8db44271f8a97b93/grpcio-1.74.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3601274bc0523f6dc07666c0e01682c94472402ac2fd1226fd96e079863bfa49", size = 6621763, upload-time = "2025-07-24T18:53:29.193Z" }, + { url = "https://files.pythonhosted.org/packages/9d/2c/930b0e7a2f1029bbc193443c7bc4dc2a46fedb0203c8793dcd97081f1520/grpcio-1.74.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:176d60a5168d7948539def20b2a3adcce67d72454d9ae05969a2e73f3a0feee7", size = 6180664, upload-time = "2025-07-24T18:53:30.823Z" }, + { url = "https://files.pythonhosted.org/packages/db/d5/ff8a2442180ad0867717e670f5ec42bfd8d38b92158ad6bcd864e6d4b1ed/grpcio-1.74.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e759f9e8bc908aaae0412642afe5416c9f983a80499448fcc7fab8692ae044c3", size = 6301083, upload-time = "2025-07-24T18:53:32.454Z" }, + { url = "https://files.pythonhosted.org/packages/b0/ba/b361d390451a37ca118e4ec7dccec690422e05bc85fba2ec72b06cefec9f/grpcio-1.74.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9e7c4389771855a92934b2846bd807fc25a3dfa820fd912fe6bd8136026b2707", size = 6994132, upload-time = "2025-07-24T18:53:34.506Z" }, + { url = "https://files.pythonhosted.org/packages/3b/0c/3a5fa47d2437a44ced74141795ac0251bbddeae74bf81df3447edd767d27/grpcio-1.74.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cce634b10aeab37010449124814b05a62fb5f18928ca878f1bf4750d1f0c815b", size = 6489616, upload-time = "2025-07-24T18:53:36.217Z" }, + { url = "https://files.pythonhosted.org/packages/ae/95/ab64703b436d99dc5217228babc76047d60e9ad14df129e307b5fec81fd0/grpcio-1.74.0-cp312-cp312-win32.whl", hash = "sha256:885912559974df35d92219e2dc98f51a16a48395f37b92865ad45186f294096c", size = 3807083, upload-time = "2025-07-24T18:53:37.911Z" }, + { url = "https://files.pythonhosted.org/packages/84/59/900aa2445891fc47a33f7d2f76e00ca5d6ae6584b20d19af9c06fa09bf9a/grpcio-1.74.0-cp312-cp312-win_amd64.whl", hash = "sha256:42f8fee287427b94be63d916c90399ed310ed10aadbf9e2e5538b3e497d269bc", size = 4490123, upload-time = "2025-07-24T18:53:39.528Z" }, + { url = "https://files.pythonhosted.org/packages/d4/d8/1004a5f468715221450e66b051c839c2ce9a985aa3ee427422061fcbb6aa/grpcio-1.74.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:2bc2d7d8d184e2362b53905cb1708c84cb16354771c04b490485fa07ce3a1d89", size = 5449488, upload-time = "2025-07-24T18:53:41.174Z" }, + { url = "https://files.pythonhosted.org/packages/94/0e/33731a03f63740d7743dced423846c831d8e6da808fcd02821a4416df7fa/grpcio-1.74.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:c14e803037e572c177ba54a3e090d6eb12efd795d49327c5ee2b3bddb836bf01", size = 10974059, upload-time = "2025-07-24T18:53:43.066Z" }, + { url = "https://files.pythonhosted.org/packages/0d/c6/3d2c14d87771a421205bdca991467cfe473ee4c6a1231c1ede5248c62ab8/grpcio-1.74.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f6ec94f0e50eb8fa1744a731088b966427575e40c2944a980049798b127a687e", size = 5945647, upload-time = "2025-07-24T18:53:45.269Z" }, + { url = "https://files.pythonhosted.org/packages/c5/83/5a354c8aaff58594eef7fffebae41a0f8995a6258bbc6809b800c33d4c13/grpcio-1.74.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:566b9395b90cc3d0d0c6404bc8572c7c18786ede549cdb540ae27b58afe0fb91", size = 6626101, upload-time = "2025-07-24T18:53:47.015Z" }, + { url = "https://files.pythonhosted.org/packages/3f/ca/4fdc7bf59bf6994aa45cbd4ef1055cd65e2884de6113dbd49f75498ddb08/grpcio-1.74.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1ea6176d7dfd5b941ea01c2ec34de9531ba494d541fe2057c904e601879f249", size = 6182562, upload-time = "2025-07-24T18:53:48.967Z" }, + { url = "https://files.pythonhosted.org/packages/fd/48/2869e5b2c1922583686f7ae674937986807c2f676d08be70d0a541316270/grpcio-1.74.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:64229c1e9cea079420527fa8ac45d80fc1e8d3f94deaa35643c381fa8d98f362", size = 6303425, upload-time = "2025-07-24T18:53:50.847Z" }, + { url = "https://files.pythonhosted.org/packages/a6/0e/bac93147b9a164f759497bc6913e74af1cb632c733c7af62c0336782bd38/grpcio-1.74.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:0f87bddd6e27fc776aacf7ebfec367b6d49cad0455123951e4488ea99d9b9b8f", size = 6996533, upload-time = "2025-07-24T18:53:52.747Z" }, + { url = "https://files.pythonhosted.org/packages/84/35/9f6b2503c1fd86d068b46818bbd7329db26a87cdd8c01e0d1a9abea1104c/grpcio-1.74.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3b03d8f2a07f0fea8c8f74deb59f8352b770e3900d143b3d1475effcb08eec20", size = 6491489, upload-time = "2025-07-24T18:53:55.06Z" }, + { url = "https://files.pythonhosted.org/packages/75/33/a04e99be2a82c4cbc4039eb3a76f6c3632932b9d5d295221389d10ac9ca7/grpcio-1.74.0-cp313-cp313-win32.whl", hash = "sha256:b6a73b2ba83e663b2480a90b82fdae6a7aa6427f62bf43b29912c0cfd1aa2bfa", size = 3805811, upload-time = "2025-07-24T18:53:56.798Z" }, + { url = "https://files.pythonhosted.org/packages/34/80/de3eb55eb581815342d097214bed4c59e806b05f1b3110df03b2280d6dfd/grpcio-1.74.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd3c71aeee838299c5887230b8a1822795325ddfea635edd82954c1eaa831e24", size = 4489214, upload-time = "2025-07-24T18:53:59.771Z" }, +] + [[package]] name = "h11" version = "0.14.0" @@ -984,6 +1016,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/eb/9e98822d3db22beff44449a8f61fca208d4f59d592a7ce67ce4c400b8f8f/langchain_mcp_adapters-0.1.9-py3-none-any.whl", hash = "sha256:fd131009c60c9e5a864f96576bbe757fc1809abd604891cb2e5d6e8aebd6975c", size = 15300, upload-time = "2025-07-09T15:56:13.316Z" }, ] +[[package]] +name = "langchain-milvus" +version = "0.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "pymilvus" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3e/dd/5e8b7f6f17da0e54205956feab3f7856cb7dc821dbe817f2990aa028e4cc/langchain_milvus-0.2.1.tar.gz", hash = "sha256:6e60e43959464ae2be9dadceb4fab6b3ddcec5bb1f2d29e898924f1c2651baf1", size = 32639, upload-time = "2025-06-28T09:59:53.826Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/b1/54e176cc8ac80df9a2c4ee9f726d6383fcf9818317c68532cfc90fa91b6c/langchain_milvus-0.2.1-py3-none-any.whl", hash = "sha256:faabf4685c15ef9651605172427073d6ffc52c0f36f3b88842977db883062c99", size = 36110, upload-time = "2025-06-28T09:59:52.965Z" }, +] + [[package]] name = "langchain-mongodb" version = "0.6.2" @@ -1370,6 +1415,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2f/cf/3fd38cfe43962452e4bfadc6966b2ea0afaf8e0286cb3991c247c8c33ebd/mcp-1.12.2-py3-none-any.whl", hash = "sha256:b86d584bb60193a42bd78aef01882c5c42d614e416cbf0480149839377ab5a5f", size = 158473, upload-time = "2025-07-24T18:29:03.419Z" }, ] +[[package]] +name = "milvus-lite" +version = "2.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tqdm" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/b2/acc5024c8e8b6a0b034670b8e8af306ebd633ede777dcbf557eac4785937/milvus_lite-2.5.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:6b014453200ba977be37ba660cb2d021030375fa6a35bc53c2e1d92980a0c512", size = 27934713, upload-time = "2025-06-30T04:23:37.028Z" }, + { url = "https://files.pythonhosted.org/packages/9b/2e/746f5bb1d6facd1e73eb4af6dd5efda11125b0f29d7908a097485ca6cad9/milvus_lite-2.5.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a2e031088bf308afe5f8567850412d618cfb05a65238ed1a6117f60decccc95a", size = 24421451, upload-time = "2025-06-30T04:23:51.747Z" }, + { url = "https://files.pythonhosted.org/packages/2e/cf/3d1fee5c16c7661cf53977067a34820f7269ed8ba99fe9cf35efc1700866/milvus_lite-2.5.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:a13277e9bacc6933dea172e42231f7e6135bd3bdb073dd2688ee180418abd8d9", size = 45337093, upload-time = "2025-06-30T04:24:06.706Z" }, + { url = "https://files.pythonhosted.org/packages/d3/82/41d9b80f09b82e066894d9b508af07b7b0fa325ce0322980674de49106a0/milvus_lite-2.5.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:25ce13f4b8d46876dd2b7ac8563d7d8306da7ff3999bb0d14b116b30f71d706c", size = 55263911, upload-time = "2025-06-30T04:24:19.434Z" }, +] + [[package]] name = "mirakuru" version = "2.6.1" @@ -1750,6 +1809,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b5/35/6c4c6fc8774a9e3629cd750dc24a7a4fb090a25ccd5c3246d127b70f9e22/propcache-0.3.0-py3-none-any.whl", hash = "sha256:67dda3c7325691c2081510e92c561f465ba61b975f481735aefdfc845d2cd043", size = 12101, upload-time = "2025-02-20T19:03:27.202Z" }, ] +[[package]] +name = "protobuf" +version = "6.32.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fa/a4/cc17347aa2897568beece2e674674359f911d6fe21b0b8d6268cd42727ac/protobuf-6.32.1.tar.gz", hash = "sha256:ee2469e4a021474ab9baafea6cd070e5bf27c7d29433504ddea1a4ee5850f68d", size = 440635, upload-time = "2025-09-11T21:38:42.935Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/98/645183ea03ab3995d29086b8bf4f7562ebd3d10c9a4b14ee3f20d47cfe50/protobuf-6.32.1-cp310-abi3-win32.whl", hash = "sha256:a8a32a84bc9f2aad712041b8b366190f71dde248926da517bde9e832e4412085", size = 424411, upload-time = "2025-09-11T21:38:27.427Z" }, + { url = "https://files.pythonhosted.org/packages/8c/f3/6f58f841f6ebafe076cebeae33fc336e900619d34b1c93e4b5c97a81fdfa/protobuf-6.32.1-cp310-abi3-win_amd64.whl", hash = "sha256:b00a7d8c25fa471f16bc8153d0e53d6c9e827f0953f3c09aaa4331c718cae5e1", size = 435738, upload-time = "2025-09-11T21:38:30.959Z" }, + { url = "https://files.pythonhosted.org/packages/10/56/a8a3f4e7190837139e68c7002ec749190a163af3e330f65d90309145a210/protobuf-6.32.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8c7e6eb619ffdf105ee4ab76af5a68b60a9d0f66da3ea12d1640e6d8dab7281", size = 426454, upload-time = "2025-09-11T21:38:34.076Z" }, + { url = "https://files.pythonhosted.org/packages/3f/be/8dd0a927c559b37d7a6c8ab79034fd167dcc1f851595f2e641ad62be8643/protobuf-6.32.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:2f5b80a49e1eb7b86d85fcd23fe92df154b9730a725c3b38c4e43b9d77018bf4", size = 322874, upload-time = "2025-09-11T21:38:35.509Z" }, + { url = "https://files.pythonhosted.org/packages/5c/f6/88d77011b605ef979aace37b7703e4eefad066f7e84d935e5a696515c2dd/protobuf-6.32.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:b1864818300c297265c83a4982fd3169f97122c299f56a56e2445c3698d34710", size = 322013, upload-time = "2025-09-11T21:38:37.017Z" }, + { url = "https://files.pythonhosted.org/packages/97/b7/15cc7d93443d6c6a84626ae3258a91f4c6ac8c0edd5df35ea7658f71b79c/protobuf-6.32.1-py3-none-any.whl", hash = "sha256:2601b779fc7d32a866c6b4404f9d42a3f67c5b9f3f15b4db3cccabe06b95c346", size = 169289, upload-time = "2025-09-11T21:38:41.234Z" }, +] + [[package]] name = "psutil" version = "7.0.0" @@ -1908,6 +1981,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, ] +[[package]] +name = "pymilvus" +version = "2.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "milvus-lite", marker = "sys_platform != 'win32'" }, + { name = "pandas" }, + { name = "protobuf" }, + { name = "python-dotenv" }, + { name = "setuptools" }, + { name = "ujson" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/70/a9/b25af985972082d1bb0b26739fece8cea3f56370733b4b1de690c42a77cc/pymilvus-2.6.1.tar.gz", hash = "sha256:ef1d7f5039719398d131ca80c19e55bc2bccc7ab6609f2cca9a04217dcb0a7fb", size = 1322169, upload-time = "2025-08-29T10:03:50.523Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/1a/8b677e0f4ef683bbfb00d495960573fff0844ed509b3cf0abede79a48e90/pymilvus-2.6.1-py3-none-any.whl", hash = "sha256:e3d76d45ce04d3555a6849645a18a1e2992706e248d5b6dc58a00504d0b60165", size = 254252, upload-time = "2025-08-29T10:03:48.539Z" }, +] + [[package]] name = "pymongo" version = "4.12.1" @@ -2258,6 +2349,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/65/dea992c6a97074f6d8ff9eab34741298cac2ce23e2b6c74fb7d08afdf85c/sentinels-1.1.1-py3-none-any.whl", hash = "sha256:835d3b28f3b47f5284afa4bf2db6e00f2dc5f80f9923d4b7e7aeeeccf6146a11", size = 3744, upload-time = "2025-08-12T07:57:48.858Z" }, ] +[[package]] +name = "setuptools" +version = "80.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958, upload-time = "2025-05-27T00:56:51.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" }, +] + [[package]] name = "sgmllib3k" version = "1.0.0" @@ -2474,6 +2574,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0f/dd/84f10e23edd882c6f968c21c2434fe67bd4a528967067515feca9e611e5e/tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639", size = 346762, upload-time = "2025-01-21T19:49:37.187Z" }, ] +[[package]] +name = "ujson" +version = "5.11.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/d9/3f17e3c5773fb4941c68d9a37a47b1a79c9649d6c56aefbed87cc409d18a/ujson-5.11.0.tar.gz", hash = "sha256:e204ae6f909f099ba6b6b942131cee359ddda2b6e4ea39c12eb8b991fe2010e0", size = 7156583, upload-time = "2025-08-20T11:57:02.452Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b9/ef/a9cb1fce38f699123ff012161599fb9f2ff3f8d482b4b18c43a2dc35073f/ujson-5.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7895f0d2d53bd6aea11743bd56e3cb82d729980636cd0ed9b89418bf66591702", size = 55434, upload-time = "2025-08-20T11:55:34.987Z" }, + { url = "https://files.pythonhosted.org/packages/b1/05/dba51a00eb30bd947791b173766cbed3492269c150a7771d2750000c965f/ujson-5.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12b5e7e22a1fe01058000d1b317d3b65cc3daf61bd2ea7a2b76721fe160fa74d", size = 53190, upload-time = "2025-08-20T11:55:36.384Z" }, + { url = "https://files.pythonhosted.org/packages/03/3c/fd11a224f73fbffa299fb9644e425f38b38b30231f7923a088dd513aabb4/ujson-5.11.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0180a480a7d099082501cad1fe85252e4d4bf926b40960fb3d9e87a3a6fbbc80", size = 57600, upload-time = "2025-08-20T11:55:37.692Z" }, + { url = "https://files.pythonhosted.org/packages/55/b9/405103cae24899df688a3431c776e00528bd4799e7d68820e7ebcf824f92/ujson-5.11.0-cp312-cp312-manylinux_2_24_i686.manylinux_2_28_i686.whl", hash = "sha256:fa79fdb47701942c2132a9dd2297a1a85941d966d8c87bfd9e29b0cf423f26cc", size = 59791, upload-time = "2025-08-20T11:55:38.877Z" }, + { url = "https://files.pythonhosted.org/packages/17/7b/2dcbc2bbfdbf68f2368fb21ab0f6735e872290bb604c75f6e06b81edcb3f/ujson-5.11.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8254e858437c00f17cb72e7a644fc42dad0ebb21ea981b71df6e84b1072aaa7c", size = 57356, upload-time = "2025-08-20T11:55:40.036Z" }, + { url = "https://files.pythonhosted.org/packages/d1/71/fea2ca18986a366c750767b694430d5ded6b20b6985fddca72f74af38a4c/ujson-5.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1aa8a2ab482f09f6c10fba37112af5f957689a79ea598399c85009f2f29898b5", size = 1036313, upload-time = "2025-08-20T11:55:41.408Z" }, + { url = "https://files.pythonhosted.org/packages/a3/bb/d4220bd7532eac6288d8115db51710fa2d7d271250797b0bfba9f1e755af/ujson-5.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a638425d3c6eed0318df663df44480f4a40dc87cc7c6da44d221418312f6413b", size = 1195782, upload-time = "2025-08-20T11:55:43.357Z" }, + { url = "https://files.pythonhosted.org/packages/80/47/226e540aa38878ce1194454385701d82df538ccb5ff8db2cf1641dde849a/ujson-5.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7e3cff632c1d78023b15f7e3a81c3745cd3f94c044d1e8fa8efbd6b161997bbc", size = 1088817, upload-time = "2025-08-20T11:55:45.262Z" }, + { url = "https://files.pythonhosted.org/packages/7e/81/546042f0b23c9040d61d46ea5ca76f0cc5e0d399180ddfb2ae976ebff5b5/ujson-5.11.0-cp312-cp312-win32.whl", hash = "sha256:be6b0eaf92cae8cdee4d4c9e074bde43ef1c590ed5ba037ea26c9632fb479c88", size = 39757, upload-time = "2025-08-20T11:55:46.522Z" }, + { url = "https://files.pythonhosted.org/packages/44/1b/27c05dc8c9728f44875d74b5bfa948ce91f6c33349232619279f35c6e817/ujson-5.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:b7b136cc6abc7619124fd897ef75f8e63105298b5ca9bdf43ebd0e1fa0ee105f", size = 43859, upload-time = "2025-08-20T11:55:47.987Z" }, + { url = "https://files.pythonhosted.org/packages/22/2d/37b6557c97c3409c202c838aa9c960ca3896843b4295c4b7bb2bbd260664/ujson-5.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:6cd2df62f24c506a0ba322d5e4fe4466d47a9467b57e881ee15a31f7ecf68ff6", size = 38361, upload-time = "2025-08-20T11:55:49.122Z" }, + { url = "https://files.pythonhosted.org/packages/1c/ec/2de9dd371d52c377abc05d2b725645326c4562fc87296a8907c7bcdf2db7/ujson-5.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:109f59885041b14ee9569bf0bb3f98579c3fa0652317b355669939e5fc5ede53", size = 55435, upload-time = "2025-08-20T11:55:50.243Z" }, + { url = "https://files.pythonhosted.org/packages/5b/a4/f611f816eac3a581d8a4372f6967c3ed41eddbae4008d1d77f223f1a4e0a/ujson-5.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a31c6b8004438e8c20fc55ac1c0e07dad42941db24176fe9acf2815971f8e752", size = 53193, upload-time = "2025-08-20T11:55:51.373Z" }, + { url = "https://files.pythonhosted.org/packages/e9/c5/c161940967184de96f5cbbbcce45b562a4bf851d60f4c677704b1770136d/ujson-5.11.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78c684fb21255b9b90320ba7e199780f653e03f6c2528663768965f4126a5b50", size = 57603, upload-time = "2025-08-20T11:55:52.583Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d6/c7b2444238f5b2e2d0e3dab300b9ddc3606e4b1f0e4bed5a48157cebc792/ujson-5.11.0-cp313-cp313-manylinux_2_24_i686.manylinux_2_28_i686.whl", hash = "sha256:4c9f5d6a27d035dd90a146f7761c2272cf7103de5127c9ab9c4cd39ea61e878a", size = 59794, upload-time = "2025-08-20T11:55:53.69Z" }, + { url = "https://files.pythonhosted.org/packages/fe/a3/292551f936d3d02d9af148f53e1bc04306b00a7cf1fcbb86fa0d1c887242/ujson-5.11.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:837da4d27fed5fdc1b630bd18f519744b23a0b5ada1bbde1a36ba463f2900c03", size = 57363, upload-time = "2025-08-20T11:55:54.843Z" }, + { url = "https://files.pythonhosted.org/packages/90/a6/82cfa70448831b1a9e73f882225980b5c689bf539ec6400b31656a60ea46/ujson-5.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:787aff4a84da301b7f3bac09bc696e2e5670df829c6f8ecf39916b4e7e24e701", size = 1036311, upload-time = "2025-08-20T11:55:56.197Z" }, + { url = "https://files.pythonhosted.org/packages/84/5c/96e2266be50f21e9b27acaee8ca8f23ea0b85cb998c33d4f53147687839b/ujson-5.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6dd703c3e86dc6f7044c5ac0b3ae079ed96bf297974598116aa5fb7f655c3a60", size = 1195783, upload-time = "2025-08-20T11:55:58.081Z" }, + { url = "https://files.pythonhosted.org/packages/8d/20/78abe3d808cf3bb3e76f71fca46cd208317bf461c905d79f0d26b9df20f1/ujson-5.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3772e4fe6b0c1e025ba3c50841a0ca4786825a4894c8411bf8d3afe3a8061328", size = 1088822, upload-time = "2025-08-20T11:55:59.469Z" }, + { url = "https://files.pythonhosted.org/packages/d8/50/8856e24bec5e2fc7f775d867aeb7a3f137359356200ac44658f1f2c834b2/ujson-5.11.0-cp313-cp313-win32.whl", hash = "sha256:8fa2af7c1459204b7a42e98263b069bd535ea0cd978b4d6982f35af5a04a4241", size = 39753, upload-time = "2025-08-20T11:56:01.345Z" }, + { url = "https://files.pythonhosted.org/packages/5b/d8/1baee0f4179a4d0f5ce086832147b6cc9b7731c24ca08e14a3fdb8d39c32/ujson-5.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:34032aeca4510a7c7102bd5933f59a37f63891f30a0706fb46487ab6f0edf8f0", size = 43866, upload-time = "2025-08-20T11:56:02.552Z" }, + { url = "https://files.pythonhosted.org/packages/a9/8c/6d85ef5be82c6d66adced3ec5ef23353ed710a11f70b0b6a836878396334/ujson-5.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:ce076f2df2e1aa62b685086fbad67f2b1d3048369664b4cdccc50707325401f9", size = 38363, upload-time = "2025-08-20T11:56:03.688Z" }, + { url = "https://files.pythonhosted.org/packages/28/08/4518146f4984d112764b1dfa6fb7bad691c44a401adadaa5e23ccd930053/ujson-5.11.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:65724738c73645db88f70ba1f2e6fb678f913281804d5da2fd02c8c5839af302", size = 55462, upload-time = "2025-08-20T11:56:04.873Z" }, + { url = "https://files.pythonhosted.org/packages/29/37/2107b9a62168867a692654d8766b81bd2fd1e1ba13e2ec90555861e02b0c/ujson-5.11.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29113c003ca33ab71b1b480bde952fbab2a0b6b03a4ee4c3d71687cdcbd1a29d", size = 53246, upload-time = "2025-08-20T11:56:06.054Z" }, + { url = "https://files.pythonhosted.org/packages/9b/f8/25583c70f83788edbe3ca62ce6c1b79eff465d78dec5eb2b2b56b3e98b33/ujson-5.11.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c44c703842024d796b4c78542a6fcd5c3cb948b9fc2a73ee65b9c86a22ee3638", size = 57631, upload-time = "2025-08-20T11:56:07.374Z" }, + { url = "https://files.pythonhosted.org/packages/ed/ca/19b3a632933a09d696f10dc1b0dfa1d692e65ad507d12340116ce4f67967/ujson-5.11.0-cp314-cp314-manylinux_2_24_i686.manylinux_2_28_i686.whl", hash = "sha256:e750c436fb90edf85585f5c62a35b35082502383840962c6983403d1bd96a02c", size = 59877, upload-time = "2025-08-20T11:56:08.534Z" }, + { url = "https://files.pythonhosted.org/packages/55/7a/4572af5324ad4b2bfdd2321e898a527050290147b4ea337a79a0e4e87ec7/ujson-5.11.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f278b31a7c52eb0947b2db55a5133fbc46b6f0ef49972cd1a80843b72e135aba", size = 57363, upload-time = "2025-08-20T11:56:09.758Z" }, + { url = "https://files.pythonhosted.org/packages/7b/71/a2b8c19cf4e1efe53cf439cdf7198ac60ae15471d2f1040b490c1f0f831f/ujson-5.11.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ab2cb8351d976e788669c8281465d44d4e94413718af497b4e7342d7b2f78018", size = 1036394, upload-time = "2025-08-20T11:56:11.168Z" }, + { url = "https://files.pythonhosted.org/packages/7a/3e/7b98668cba3bb3735929c31b999b374ebc02c19dfa98dfebaeeb5c8597ca/ujson-5.11.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:090b4d11b380ae25453100b722d0609d5051ffe98f80ec52853ccf8249dfd840", size = 1195837, upload-time = "2025-08-20T11:56:12.6Z" }, + { url = "https://files.pythonhosted.org/packages/a1/ea/8870f208c20b43571a5c409ebb2fe9b9dba5f494e9e60f9314ac01ea8f78/ujson-5.11.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:80017e870d882d5517d28995b62e4e518a894f932f1e242cbc802a2fd64d365c", size = 1088837, upload-time = "2025-08-20T11:56:14.15Z" }, + { url = "https://files.pythonhosted.org/packages/63/b6/c0e6607e37fa47929920a685a968c6b990a802dec65e9c5181e97845985d/ujson-5.11.0-cp314-cp314-win32.whl", hash = "sha256:1d663b96eb34c93392e9caae19c099ec4133ba21654b081956613327f0e973ac", size = 41022, upload-time = "2025-08-20T11:56:15.509Z" }, + { url = "https://files.pythonhosted.org/packages/4e/56/f4fe86b4c9000affd63e9219e59b222dc48b01c534533093e798bf617a7e/ujson-5.11.0-cp314-cp314-win_amd64.whl", hash = "sha256:849e65b696f0d242833f1df4182096cedc50d414215d1371fca85c541fbff629", size = 45111, upload-time = "2025-08-20T11:56:16.597Z" }, + { url = "https://files.pythonhosted.org/packages/0a/f3/669437f0280308db4783b12a6d88c00730b394327d8334cc7a32ef218e64/ujson-5.11.0-cp314-cp314-win_arm64.whl", hash = "sha256:e73df8648c9470af2b6a6bf5250d4744ad2cf3d774dcf8c6e31f018bdd04d764", size = 39682, upload-time = "2025-08-20T11:56:17.763Z" }, + { url = "https://files.pythonhosted.org/packages/6e/cd/e9809b064a89fe5c4184649adeb13c1b98652db3f8518980b04227358574/ujson-5.11.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:de6e88f62796372fba1de973c11138f197d3e0e1d80bcb2b8aae1e826096d433", size = 55759, upload-time = "2025-08-20T11:56:18.882Z" }, + { url = "https://files.pythonhosted.org/packages/1b/be/ae26a6321179ebbb3a2e2685b9007c71bcda41ad7a77bbbe164005e956fc/ujson-5.11.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:49e56ef8066f11b80d620985ae36869a3ff7e4b74c3b6129182ec5d1df0255f3", size = 53634, upload-time = "2025-08-20T11:56:20.012Z" }, + { url = "https://files.pythonhosted.org/packages/ae/e9/fb4a220ee6939db099f4cfeeae796ecb91e7584ad4d445d4ca7f994a9135/ujson-5.11.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1a325fd2c3a056cf6c8e023f74a0c478dd282a93141356ae7f16d5309f5ff823", size = 58547, upload-time = "2025-08-20T11:56:21.175Z" }, + { url = "https://files.pythonhosted.org/packages/bd/f8/fc4b952b8f5fea09ea3397a0bd0ad019e474b204cabcb947cead5d4d1ffc/ujson-5.11.0-cp314-cp314t-manylinux_2_24_i686.manylinux_2_28_i686.whl", hash = "sha256:a0af6574fc1d9d53f4ff371f58c96673e6d988ed2b5bf666a6143c782fa007e9", size = 60489, upload-time = "2025-08-20T11:56:22.342Z" }, + { url = "https://files.pythonhosted.org/packages/2e/e5/af5491dfda4f8b77e24cf3da68ee0d1552f99a13e5c622f4cef1380925c3/ujson-5.11.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10f29e71ecf4ecd93a6610bd8efa8e7b6467454a363c3d6416db65de883eb076", size = 58035, upload-time = "2025-08-20T11:56:23.92Z" }, + { url = "https://files.pythonhosted.org/packages/c4/09/0945349dd41f25cc8c38d78ace49f14c5052c5bbb7257d2f466fa7bdb533/ujson-5.11.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1a0a9b76a89827a592656fe12e000cf4f12da9692f51a841a4a07aa4c7ecc41c", size = 1037212, upload-time = "2025-08-20T11:56:25.274Z" }, + { url = "https://files.pythonhosted.org/packages/49/44/8e04496acb3d5a1cbee3a54828d9652f67a37523efa3d3b18a347339680a/ujson-5.11.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:b16930f6a0753cdc7d637b33b4e8f10d5e351e1fb83872ba6375f1e87be39746", size = 1196500, upload-time = "2025-08-20T11:56:27.517Z" }, + { url = "https://files.pythonhosted.org/packages/64/ae/4bc825860d679a0f208a19af2f39206dfd804ace2403330fdc3170334a2f/ujson-5.11.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:04c41afc195fd477a59db3a84d5b83a871bd648ef371cf8c6f43072d89144eef", size = 1089487, upload-time = "2025-08-20T11:56:29.07Z" }, + { url = "https://files.pythonhosted.org/packages/30/ed/5a057199fb0a5deabe0957073a1c1c1c02a3e99476cd03daee98ea21fa57/ujson-5.11.0-cp314-cp314t-win32.whl", hash = "sha256:aa6d7a5e09217ff93234e050e3e380da62b084e26b9f2e277d2606406a2fc2e5", size = 41859, upload-time = "2025-08-20T11:56:30.495Z" }, + { url = "https://files.pythonhosted.org/packages/aa/03/b19c6176bdf1dc13ed84b886e99677a52764861b6cc023d5e7b6ebda249d/ujson-5.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:48055e1061c1bb1f79e75b4ac39e821f3f35a9b82de17fce92c3140149009bec", size = 46183, upload-time = "2025-08-20T11:56:31.574Z" }, + { url = "https://files.pythonhosted.org/packages/5d/ca/a0413a3874b2dc1708b8796ca895bf363292f9c70b2e8ca482b7dbc0259d/ujson-5.11.0-cp314-cp314t-win_arm64.whl", hash = "sha256:1194b943e951092db611011cb8dbdb6cf94a3b816ed07906e14d3bc6ce0e90ab", size = 40264, upload-time = "2025-08-20T11:56:32.773Z" }, +] + [[package]] name = "urllib3" version = "2.3.0" diff --git a/web/src/app/chat/components/input-box.tsx b/web/src/app/chat/components/input-box.tsx index 19673fb..ae18ec4 100644 --- a/web/src/app/chat/components/input-box.tsx +++ b/web/src/app/chat/components/input-box.tsx @@ -5,7 +5,7 @@ import { MagicWandIcon } from "@radix-ui/react-icons"; import { AnimatePresence, motion } from "framer-motion"; import { ArrowUp, Lightbulb, X } from "lucide-react"; import { useTranslations } from "next-intl"; -import { useCallback, useMemo, useRef, useState } from "react"; +import { useCallback, useRef, useState } from "react"; import { Detective } from "~/components/deer-flow/icons/detective"; import MessageInput, { diff --git a/web/src/app/chat/components/research-activities-block.tsx b/web/src/app/chat/components/research-activities-block.tsx index 6730734..3e78022 100644 --- a/web/src/app/chat/components/research-activities-block.tsx +++ b/web/src/app/chat/components/research-activities-block.tsx @@ -91,6 +91,9 @@ function ActivityListItem({ messageId }: { messageId: string }) { if (message) { if (!message.isStreaming && message.toolCalls?.length) { for (const toolCall of message.toolCalls) { + if (toolCall.result?.startsWith("Error")) { + return null; + } if (toolCall.name === "web_search") { return ; } else if (toolCall.name === "crawl_tool") { @@ -111,16 +114,16 @@ function ActivityListItem({ messageId }: { messageId: string }) { const __pageCache = new LRUCache({ max: 100 }); type SearchResult = | { - type: "page"; - title: string; - url: string; - content: string; - } + type: "page"; + title: string; + url: string; + content: string; + } | { - type: "image"; - image_url: string; - image_description: string; - }; + type: "image"; + image_url: string; + image_description: string; + }; function WebSearchToolCall({ toolCall }: { toolCall: ToolCallRuntime }) { const t = useTranslations("chat.research"); @@ -317,7 +320,7 @@ function RetrieverToolCall({ toolCall }: { toolCall: ToolCallRuntime }) { /> ))} - {documents.map((doc, i) => ( + {documents?.map((doc, i) => ( - {doc.title} + {doc.title} (chunk-{i},size-{doc.content.length}) ))} diff --git a/web/src/app/settings/tabs/about-tab.tsx b/web/src/app/settings/tabs/about-tab.tsx index 4cf7ba2..14c5088 100644 --- a/web/src/app/settings/tabs/about-tab.tsx +++ b/web/src/app/settings/tabs/about-tab.tsx @@ -12,7 +12,7 @@ import type { Tab } from "./types"; export const AboutTab: Tab = () => { const locale = useLocale(); - const t = useTranslations("settings.about"); + //const t = useTranslations("settings.about"); const aboutContent = locale === "zh" ? aboutZh : aboutEn;