From 462752b462dd280acccb31abb6fa254bf059549b Mon Sep 17 00:00:00 2001 From: JeffJiang Date: Wed, 28 May 2025 14:13:46 +0800 Subject: [PATCH] feat: RAG Integration (#238) * feat: add rag provider and retriever * feat: retriever tool * feat: add retriever tool to the researcher node * feat: add rag http apis * feat: new message input supports resource mentions * feat: new message input component support resource mentions * refactor: need_web_search to need_search * chore: RAG integration docs * chore: change example api host * fix: user message color in dark mode * fix: mentions style * feat: add local_search_tool to researcher prompt * chore: research prompt * fix: ragflow page size and reporter with * docs: ragflow integration and add acknowledgment projects * chore: format --- .env.example | 6 + README.md | 15 ++ src/config/configuration.py | 7 +- src/config/tools.py | 7 + src/graph/nodes.py | 31 ++- src/graph/types.py | 5 +- src/prompts/planner.md | 47 ++-- src/prompts/planner_model.py | 6 +- src/prompts/researcher.md | 5 +- src/rag/__init__.py | 5 + src/rag/builder.py | 11 + src/rag/ragflow.py | 130 +++++++++++ src/rag/retriever.py | 77 +++++++ src/server/app.py | 30 ++- src/server/chat_request.py | 5 + src/server/rag_request.py | 25 +++ src/tools/__init__.py | 2 + src/tools/retriever.py | 74 ++++++ src/tools/search.py | 8 +- tests/test_state.py | 6 +- web/package.json | 7 +- web/pnpm-lock.yaml | 210 +++++++++++++++++- .../replay/github-top-trending-repo.txt | 6 +- .../replay/nanjing-traditional-dishes.txt | 6 +- .../replay/rental-apartment-decoration.txt | 6 +- .../replay/review-of-the-professional.txt | 4 +- web/src/app/chat/components/input-box.tsx | 123 ++++------ .../app/chat/components/message-list-view.tsx | 14 +- .../app/chat/components/messages-block.tsx | 11 +- .../components/research-activities-block.tsx | 6 +- .../chat/components/research-report-block.tsx | 5 +- web/src/components/deer-flow/link.tsx | 2 +- .../components/deer-flow/message-input.tsx | 184 +++++++++++++++ .../deer-flow/resource-mentions.tsx | 84 +++++++ .../deer-flow/resource-suggestion.tsx | 83 +++++++ web/src/components/editor/index.tsx | 17 -- web/src/core/api/chat.ts | 2 + web/src/core/api/hooks.ts | 26 +++ web/src/core/api/rag.ts | 24 ++ web/src/core/messages/types.ts | 6 + web/src/core/store/store.ts | 6 +- web/src/styles/globals.css | 2 +- web/src/styles/prosemirror.css | 17 +- 43 files changed, 1172 insertions(+), 181 deletions(-) create mode 100644 src/rag/__init__.py create mode 100644 src/rag/builder.py create mode 100644 src/rag/ragflow.py create mode 100644 src/rag/retriever.py create mode 100644 src/server/rag_request.py create mode 100644 src/tools/retriever.py create mode 100644 web/src/components/deer-flow/message-input.tsx create mode 100644 web/src/components/deer-flow/resource-mentions.tsx create mode 100644 web/src/components/deer-flow/resource-suggestion.tsx create mode 100644 web/src/core/api/rag.ts diff --git a/.env.example b/.env.example index d8c7c37..a45a4f9 100644 --- a/.env.example +++ b/.env.example @@ -13,6 +13,12 @@ TAVILY_API_KEY=tvly-xxx # BRAVE_SEARCH_API_KEY=xxx # Required only if SEARCH_API is brave_search # JINA_API_KEY=jina_xxx # Optional, default is None +# Optional, RAG provider +# RAG_PROVIDER=ragflow +# RAGFLOW_API_URL="http://localhost:9388" +# RAGFLOW_API_KEY="ragflow-xxx" +# RAGFLOW_RETRIEVAL_SIZE=10 + # Optional, volcengine TTS for generating podcast VOLCENGINE_TTS_APPID=xxx VOLCENGINE_TTS_ACCESS_TOKEN=xxx diff --git a/README.md b/README.md index 39f03d2..aaf23d2 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,18 @@ SEARCH_API=tavily - Crawling with Jina - Advanced content extraction +- 📃 **RAG Integration** + + - Supports mentioning files from [RAGFlow](https://github.com/infiniflow/ragflow) within the input box. [Start up RAGFlow server](https://ragflow.io/docs/dev/). + + ```bash + # .env + RAG_PROVIDER=ragflow + RAGFLOW_API_URL="http://localhost:9388" + RAGFLOW_API_KEY="ragflow-xxx" + RAGFLOW_RETRIEVAL_SIZE=10 + ``` + - 🔗 **MCP Seamless Integration** - Expand capabilities for private domain access, knowledge graph, web browsing and more - Facilitates integration of diverse research tools and methodologies @@ -352,6 +364,7 @@ When you submit a research topic in the Studio UI, you'll be able to see the ent DeerFlow supports LangSmith tracing to help you debug and monitor your workflows. To enable LangSmith tracing: 1. Make sure your `.env` file has the following configurations (see `.env.example`): + ```bash LANGSMITH_TRACING=true LANGSMITH_ENDPOINT="https://api.smith.langchain.com" @@ -538,6 +551,8 @@ We would like to extend our sincere appreciation to the following projects for t - **[LangChain](https://github.com/langchain-ai/langchain)**: Their exceptional framework powers our LLM interactions and chains, enabling seamless integration and functionality. - **[LangGraph](https://github.com/langchain-ai/langgraph)**: Their innovative approach to multi-agent orchestration has been instrumental in enabling DeerFlow's sophisticated workflows. +- **[Novel](https://github.com/steven-tey/novel)**: Their Notion-style WYSIWYG editor supports our report editing and AI-assisted rewriting. +- **[RAGFlow](https://github.com/infiniflow/ragflow)**: We have achieved support for research on users' private knowledge bases through integration with RAGFlow. These projects exemplify the transformative power of open-source collaboration, and we are proud to build upon their foundations. diff --git a/src/config/configuration.py b/src/config/configuration.py index 42e4af4..fa1057e 100644 --- a/src/config/configuration.py +++ b/src/config/configuration.py @@ -2,16 +2,21 @@ # SPDX-License-Identifier: MIT import os -from dataclasses import dataclass, fields +from dataclasses import dataclass, field, fields from typing import Any, Optional from langchain_core.runnables import RunnableConfig +from src.rag.retriever import Resource + @dataclass(kw_only=True) class Configuration: """The configurable fields.""" + resources: list[Resource] = field( + default_factory=list + ) # Resources to be used for the research max_plan_iterations: int = 1 # Maximum number of plan iterations max_step_num: int = 3 # Maximum number of steps in a plan max_search_results: int = 3 # Maximum number of search results diff --git a/src/config/tools.py b/src/config/tools.py index 941de2b..9462c3b 100644 --- a/src/config/tools.py +++ b/src/config/tools.py @@ -17,3 +17,10 @@ class SearchEngine(enum.Enum): # Tool configuration SELECTED_SEARCH_ENGINE = os.getenv("SEARCH_API", SearchEngine.TAVILY.value) + + +class RAGProvider(enum.Enum): + RAGFLOW = "ragflow" + + +SELECTED_RAG_PROVIDER = os.getenv("RAG_PROVIDER") diff --git a/src/graph/nodes.py b/src/graph/nodes.py index 9613ee6..c0eff53 100644 --- a/src/graph/nodes.py +++ b/src/graph/nodes.py @@ -17,6 +17,7 @@ from src.tools.search import LoggedTavilySearch from src.tools import ( crawl_tool, get_web_search_tool, + get_retriever_tool, python_repl_tool, ) @@ -206,10 +207,11 @@ def human_feedback_node( def coordinator_node( - state: State, + state: State, config: RunnableConfig ) -> Command[Literal["planner", "background_investigator", "__end__"]]: """Coordinator node that communicate with customers.""" logger.info("Coordinator talking.") + configurable = Configuration.from_runnable_config(config) messages = apply_prompt_template("coordinator", state) response = ( get_llm_by_type(AGENT_LLM_MAP["coordinator"]) @@ -242,7 +244,7 @@ def coordinator_node( logger.debug(f"Coordinator response: {response}") return Command( - update={"locale": locale}, + update={"locale": locale, "resources": configurable.resources}, goto=goto, ) @@ -326,14 +328,14 @@ async def _execute_agent_step( logger.warning("No unexecuted step found") return Command(goto="research_team") - logger.info(f"Executing step: {current_step.title}") + logger.info(f"Executing step: {current_step.title}, agent: {agent_name}") # Format completed steps information completed_steps_info = "" if completed_steps: completed_steps_info = "# Existing Research Findings\n\n" for i, step in enumerate(completed_steps): - completed_steps_info += f"## Existing Finding {i+1}: {step.title}\n\n" + completed_steps_info += f"## Existing Finding {i + 1}: {step.title}\n\n" completed_steps_info += f"\n{step.execution_res}\n\n\n" # Prepare the input for the agent with completed steps info @@ -347,6 +349,19 @@ async def _execute_agent_step( # Add citation reminder for researcher agent if agent_name == "researcher": + if state.get("resources"): + resources_info = "**The user mentioned the following resource files:**\n\n" + for resource in state.get("resources"): + resources_info += f"- {resource.title} ({resource.description})\n" + + agent_input["messages"].append( + HumanMessage( + content=resources_info + + "\n\n" + + "You MUST use the **local_search_tool** to retrieve the information from the resource files.", + ) + ) + agent_input["messages"].append( HumanMessage( content="IMPORTANT: DO NOT include inline citations in the text. Instead, track all sources and include a References section at the end using link reference format. Include an empty line between each citation for better readability. Use this format for each reference:\n- [Source Title](URL)\n\n- [Another Source](URL)", @@ -377,6 +392,7 @@ async def _execute_agent_step( ) recursion_limit = default_recursion_limit + logger.info(f"Agent input: {agent_input}") result = await agent.ainvoke( input=agent_input, config={"recursion_limit": recursion_limit} ) @@ -468,11 +484,16 @@ async def researcher_node( """Researcher node that do research""" logger.info("Researcher node is researching.") configurable = Configuration.from_runnable_config(config) + tools = [get_web_search_tool(configurable.max_search_results), crawl_tool] + retriever_tool = get_retriever_tool(state.get("resources", [])) + if retriever_tool: + tools.insert(0, retriever_tool) + logger.info(f"Researcher tools: {tools}") return await _setup_and_execute_agent_step( state, config, "researcher", - [get_web_search_tool(configurable.max_search_results), crawl_tool], + tools, ) diff --git a/src/graph/types.py b/src/graph/types.py index 5ba9cf7..fba8264 100644 --- a/src/graph/types.py +++ b/src/graph/types.py @@ -1,12 +1,10 @@ # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates # SPDX-License-Identifier: MIT -import operator -from typing import Annotated - from langgraph.graph import MessagesState from src.prompts.planner_model import Plan +from src.rag import Resource class State(MessagesState): @@ -15,6 +13,7 @@ class State(MessagesState): # Runtime Variables locale: str = "en-US" observations: list[str] = [] + resources: list[Resource] = [] plan_iterations: int = 0 current_plan: Plan | str = None final_report: str = "" diff --git a/src/prompts/planner.md b/src/prompts/planner.md index 59549a4..a0b032a 100644 --- a/src/prompts/planner.md +++ b/src/prompts/planner.md @@ -57,14 +57,15 @@ Before creating a detailed plan, assess if there is sufficient context to answer Different types of steps have different web search requirements: -1. **Research Steps** (`need_web_search: true`): +1. **Research Steps** (`need_search: true`): + - Retrieve information from the file with the URL with `rag://` or `http://` prefix specified by the user - Gathering market data or industry trends - Finding historical information - Collecting competitor analysis - Researching current events or news - Finding statistical data or reports -2. **Data Processing Steps** (`need_web_search: false`): +2. **Data Processing Steps** (`need_search: false`): - API calls and data extraction - Database queries - Raw data collection from existing sources @@ -74,10 +75,10 @@ Different types of steps have different web search requirements: ## Exclusions - **No Direct Calculations in Research Steps**: - - Research steps should only gather data and information - - All mathematical calculations must be handled by processing steps - - Numerical analysis must be delegated to processing steps - - Research steps focus on information gathering only + - Research steps should only gather data and information + - All mathematical calculations must be handled by processing steps + - Numerical analysis must be delegated to processing steps + - Research steps focus on information gathering only ## Analysis Framework @@ -135,16 +136,16 @@ When planning information gathering, consider these key aspects and ensure COMPR - To begin with, repeat user's requirement in your own words as `thought`. - Rigorously assess if there is sufficient context to answer the question using the strict criteria above. - If context is sufficient: - - Set `has_enough_context` to true - - No need to create information gathering steps + - Set `has_enough_context` to true + - No need to create information gathering steps - If context is insufficient (default assumption): - - Break down the required information using the Analysis Framework - - Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects - - Ensure each step is substantial and covers related information categories - - Prioritize breadth and depth within the {{ max_step_num }}-step constraint - - For each step, carefully assess if web search is needed: - - Research and external data gathering: Set `need_web_search: true` - - Internal data processing: Set `need_web_search: false` + - Break down the required information using the Analysis Framework + - Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects + - Ensure each step is substantial and covers related information categories + - Prioritize breadth and depth within the {{ max_step_num }}-step constraint + - For each step, carefully assess if web search is needed: + - Research and external data gathering: Set `need_search: true` + - Internal data processing: Set `need_search: false` - Specify the exact data to be collected in step's `description`. Include a `note` if necessary. - Prioritize depth and volume of relevant information - limited information is not acceptable. - Use the same language as the user to generate the plan. @@ -156,10 +157,10 @@ Directly output the raw JSON format of `Plan` without "```json". The `Plan` inte ```ts interface Step { - need_web_search: boolean; // Must be explicitly set for each step + need_search: boolean; // Must be explicitly set for each step title: string; - description: string; // Specify exactly what data to collect - step_type: "research" | "processing"; // Indicates the nature of the step + description: string; // Specify exactly what data to collect. If the user input contains a link, please retain the full Markdown format when necessary. + step_type: "research" | "processing"; // Indicates the nature of the step } interface Plan { @@ -167,7 +168,7 @@ interface Plan { has_enough_context: boolean; thought: string; title: string; - steps: Step[]; // Research & Processing steps to get more context + steps: Step[]; // Research & Processing steps to get more context } ``` @@ -179,8 +180,8 @@ interface Plan { - Prioritize BOTH breadth (covering essential aspects) AND depth (detailed information on each aspect) - Never settle for minimal information - the goal is a comprehensive, detailed final report - Limited or insufficient information will lead to an inadequate final report -- Carefully assess each step's web search requirement based on its nature: - - Research steps (`need_web_search: true`) for gathering information - - Processing steps (`need_web_search: false`) for calculations and data processing +- Carefully assess each step's web search or retrieve from URL requirement based on its nature: + - Research steps (`need_search: true`) for gathering information + - Processing steps (`need_search: false`) for calculations and data processing - Default to gathering more information unless the strictest sufficient context criteria are met -- Always use the language specified by the locale = **{{ locale }}**. \ No newline at end of file +- Always use the language specified by the locale = **{{ locale }}**. diff --git a/src/prompts/planner_model.py b/src/prompts/planner_model.py index 4e1f544..905b716 100644 --- a/src/prompts/planner_model.py +++ b/src/prompts/planner_model.py @@ -13,9 +13,7 @@ class StepType(str, Enum): class Step(BaseModel): - need_web_search: bool = Field( - ..., description="Must be explicitly set for each step" - ) + need_search: bool = Field(..., description="Must be explicitly set for each step") title: str description: str = Field(..., description="Specify exactly what data to collect") step_type: StepType = Field(..., description="Indicates the nature of the step") @@ -47,7 +45,7 @@ class Plan(BaseModel): "title": "AI Market Research Plan", "steps": [ { - "need_web_search": True, + "need_search": True, "title": "Current AI Market Analysis", "description": ( "Collect data on market size, growth rates, major players, and investment trends in AI sector." diff --git a/src/prompts/researcher.md b/src/prompts/researcher.md index a8327e4..5a73dd9 100644 --- a/src/prompts/researcher.md +++ b/src/prompts/researcher.md @@ -11,6 +11,9 @@ You are dedicated to conducting thorough investigations using search tools and p You have access to two types of tools: 1. **Built-in Tools**: These are always available: + {% if resources %} + - **local_search_tool**: For retrieving information from the local knowledge base when user mentioned in the messages. + {% endif %} - **web_search_tool**: For performing web searches - **crawl_tool**: For reading content from URLs @@ -34,7 +37,7 @@ You have access to two types of tools: 3. **Plan the Solution**: Determine the best approach to solve the problem using the available tools. 4. **Execute the Solution**: - Forget your previous knowledge, so you **should leverage the tools** to retrieve the information. - - Use the **web_search_tool** or other suitable search tool to perform a search with the provided keywords. + - Use the {% if resources %}**local_search_tool** or{% endif %}**web_search_tool** or other suitable search tool to perform a search with the provided keywords. - When the task includes time range requirements: - Incorporate appropriate time-based search parameters in your queries (e.g., "after:2020", "before:2023", or specific date ranges) - Ensure search results respect the specified time constraints. diff --git a/src/rag/__init__.py b/src/rag/__init__.py new file mode 100644 index 0000000..17a73ac --- /dev/null +++ b/src/rag/__init__.py @@ -0,0 +1,5 @@ +from .retriever import Retriever, Document, Resource +from .ragflow import RAGFlowProvider +from .builder import build_retriever + +__all__ = [Retriever, Document, Resource, RAGFlowProvider, build_retriever] diff --git a/src/rag/builder.py b/src/rag/builder.py new file mode 100644 index 0000000..0b1da2a --- /dev/null +++ b/src/rag/builder.py @@ -0,0 +1,11 @@ +from src.config.tools import SELECTED_RAG_PROVIDER, RAGProvider +from src.rag.ragflow import RAGFlowProvider +from src.rag.retriever import Retriever + + +def build_retriever() -> Retriever | None: + if SELECTED_RAG_PROVIDER == RAGProvider.RAGFLOW.value: + return RAGFlowProvider() + elif SELECTED_RAG_PROVIDER: + raise ValueError(f"Unsupported RAG provider: {SELECTED_RAG_PROVIDER}") + return None diff --git a/src/rag/ragflow.py b/src/rag/ragflow.py new file mode 100644 index 0000000..056c3d7 --- /dev/null +++ b/src/rag/ragflow.py @@ -0,0 +1,130 @@ +import os +import requests +from src.rag.retriever import Chunk, Document, Resource, Retriever +from urllib.parse import urlparse + + +class RAGFlowProvider(Retriever): + """ + RAGFlowProvider is a provider that uses RAGFlow to retrieve documents. + """ + + api_url: str + api_key: str + page_size: int = 10 + + def __init__(self): + api_url = os.getenv("RAGFLOW_API_URL") + if not api_url: + raise ValueError("RAGFLOW_API_URL is not set") + self.api_url = api_url + + api_key = os.getenv("RAGFLOW_API_KEY") + if not api_key: + raise ValueError("RAGFLOW_API_KEY is not set") + self.api_key = api_key + + page_size = os.getenv("RAGFLOW_PAGE_SIZE") + if page_size: + self.page_size = int(page_size) + + def query_relevant_documents( + self, query: str, resources: list[Resource] = [] + ) -> list[Document]: + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + + dataset_ids: list[str] = [] + document_ids: list[str] = [] + + for resource in resources: + dataset_id, document_id = parse_uri(resource.uri) + dataset_ids.append(dataset_id) + if document_id: + document_ids.append(document_id) + + payload = { + "question": query, + "dataset_ids": dataset_ids, + "document_ids": document_ids, + "page_size": self.page_size, + } + + response = requests.post( + f"{self.api_url}/api/v1/retrieval", headers=headers, json=payload + ) + + if response.status_code != 200: + raise Exception(f"Failed to query documents: {response.text}") + + result = response.json() + data = result.get("data", {}) + doc_aggs = data.get("doc_aggs", []) + docs: dict[str, Document] = { + doc.get("doc_id"): Document( + id=doc.get("doc_id"), + title=doc.get("doc_name"), + chunks=[], + ) + for doc in doc_aggs + } + + for chunk in data.get("chunks", []): + doc = docs.get(chunk.get("document_id")) + if doc: + doc.chunks.append( + Chunk( + content=chunk.get("content"), + similarity=chunk.get("similarity"), + ) + ) + + return list(docs.values()) + + def list_resources(self, query: str | None = None) -> list[Resource]: + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + + params = {} + if query: + params["name"] = query + + response = requests.get( + f"{self.api_url}/api/v1/datasets", headers=headers, params=params + ) + + if response.status_code != 200: + raise Exception(f"Failed to list resources: {response.text}") + + result = response.json() + resources = [] + + for item in result.get("data", []): + item = Resource( + uri=f"rag://dataset/{item.get('id')}", + title=item.get("name", ""), + description=item.get("description", ""), + ) + resources.append(item) + + return resources + + +def parse_uri(uri: str) -> tuple[str, str]: + parsed = urlparse(uri) + if parsed.scheme != "rag": + raise ValueError(f"Invalid URI: {uri}") + return parsed.path.split("/")[1], parsed.fragment + + +if __name__ == "__main__": + uri = "rag://dataset/123#abc" + parsed = urlparse(uri) + print(parsed.scheme) + print(parsed.netloc) + print(parsed.path) + print(parsed.fragment) diff --git a/src/rag/retriever.py b/src/rag/retriever.py new file mode 100644 index 0000000..221598a --- /dev/null +++ b/src/rag/retriever.py @@ -0,0 +1,77 @@ +import abc +from pydantic import BaseModel, Field + + +class Chunk: + content: str + similarity: float + + def __init__(self, content: str, similarity: float): + self.content = content + self.similarity = similarity + + +class Document: + """ + Document is a class that represents a document. + """ + + id: str + url: str | None = None + title: str | None = None + chunks: list[Chunk] = [] + + def __init__( + self, + id: str, + url: str | None = None, + title: str | None = None, + chunks: list[Chunk] = [], + ): + self.id = id + self.url = url + self.title = title + self.chunks = chunks + + def to_dict(self) -> dict: + d = { + "id": self.id, + "content": "\n\n".join([chunk.content for chunk in self.chunks]), + } + if self.url: + d["url"] = self.url + if self.title: + d["title"] = self.title + return d + + +class Resource(BaseModel): + """ + Resource is a class that represents a resource. + """ + + uri: str = Field(..., description="The URI of the resource") + title: str = Field(..., description="The title of the resource") + description: str | None = Field("", description="The description of the resource") + + +class Retriever(abc.ABC): + """ + Define a RAG provider, which can be used to query documents and resources. + """ + + @abc.abstractmethod + def list_resources(self, query: str | None = None) -> list[Resource]: + """ + List resources from the rag provider. + """ + pass + + @abc.abstractmethod + def query_relevant_documents( + self, query: str, resources: list[Resource] = [] + ) -> list[Document]: + """ + Query relevant documents from the resources. + """ + pass diff --git a/src/server/app.py b/src/server/app.py index 0937a2f..a109b23 100644 --- a/src/server/app.py +++ b/src/server/app.py @@ -5,19 +5,22 @@ import base64 import json import logging import os -from typing import List, cast +from typing import Annotated, List, cast from uuid import uuid4 -from fastapi import FastAPI, HTTPException +from fastapi import FastAPI, HTTPException, Query from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import Response, StreamingResponse from langchain_core.messages import AIMessageChunk, ToolMessage, BaseMessage from langgraph.types import Command +from src.config.tools import SELECTED_RAG_PROVIDER from src.graph.builder import build_graph_with_memory from src.podcast.graph.builder import build_graph as build_podcast_graph from src.ppt.graph.builder import build_graph as build_ppt_graph from src.prose.graph.builder import build_graph as build_prose_graph +from src.rag.builder import build_retriever +from src.rag.retriever import Resource from src.server.chat_request import ( ChatMessage, ChatRequest, @@ -28,6 +31,11 @@ from src.server.chat_request import ( ) from src.server.mcp_request import MCPServerMetadataRequest, MCPServerMetadataResponse from src.server.mcp_utils import load_mcp_tools +from src.server.rag_request import ( + RAGConfigResponse, + RAGResourceRequest, + RAGResourcesResponse, +) from src.tools import VolcengineTTS logger = logging.getLogger(__name__) @@ -59,6 +67,7 @@ async def chat_stream(request: ChatRequest): _astream_workflow_generator( request.model_dump()["messages"], thread_id, + request.resources, request.max_plan_iterations, request.max_step_num, request.max_search_results, @@ -74,6 +83,7 @@ async def chat_stream(request: ChatRequest): async def _astream_workflow_generator( messages: List[ChatMessage], thread_id: str, + resources: List[Resource], max_plan_iterations: int, max_step_num: int, max_search_results: int, @@ -101,6 +111,7 @@ async def _astream_workflow_generator( input_, config={ "thread_id": thread_id, + "resources": resources, "max_plan_iterations": max_plan_iterations, "max_step_num": max_step_num, "max_search_results": max_search_results, @@ -319,3 +330,18 @@ async def mcp_server_metadata(request: MCPServerMetadataRequest): logger.exception(f"Error in MCP server metadata endpoint: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) raise + + +@app.get("/api/rag/config", response_model=RAGConfigResponse) +async def rag_config(): + """Get the config of the RAG.""" + return RAGConfigResponse(provider=SELECTED_RAG_PROVIDER) + + +@app.get("/api/rag/resources", response_model=RAGResourcesResponse) +async def rag_resources(request: Annotated[RAGResourceRequest, Query()]): + """Get the resources of the RAG.""" + retriever = build_retriever() + if retriever: + return RAGResourcesResponse(resources=retriever.list_resources(request.query)) + return RAGResourcesResponse(resources=[]) diff --git a/src/server/chat_request.py b/src/server/chat_request.py index 8e6d786..95cb68f 100644 --- a/src/server/chat_request.py +++ b/src/server/chat_request.py @@ -5,6 +5,8 @@ from typing import List, Optional, Union from pydantic import BaseModel, Field +from src.rag.retriever import Resource + class ContentItem(BaseModel): type: str = Field(..., description="The type of content (text, image, etc.)") @@ -28,6 +30,9 @@ class ChatRequest(BaseModel): messages: Optional[List[ChatMessage]] = Field( [], description="History of messages between the user and the assistant" ) + resources: Optional[List[Resource]] = Field( + [], description="Resources to be used for the research" + ) debug: Optional[bool] = Field(False, description="Whether to enable debug logging") thread_id: Optional[str] = Field( "__default__", description="A specific conversation identifier" diff --git a/src/server/rag_request.py b/src/server/rag_request.py new file mode 100644 index 0000000..d89cff1 --- /dev/null +++ b/src/server/rag_request.py @@ -0,0 +1,25 @@ +from pydantic import BaseModel, Field + +from src.rag.retriever import Resource + + +class RAGConfigResponse(BaseModel): + """Response model for RAG config.""" + + provider: str | None = Field( + None, description="The provider of the RAG, default is ragflow" + ) + + +class RAGResourceRequest(BaseModel): + """Request model for RAG resource.""" + + query: str | None = Field( + None, description="The query of the resource need to be searched" + ) + + +class RAGResourcesResponse(BaseModel): + """Response model for RAG resources.""" + + resources: list[Resource] = Field(..., description="The resources of the RAG") diff --git a/src/tools/__init__.py b/src/tools/__init__.py index fb89121..5101998 100644 --- a/src/tools/__init__.py +++ b/src/tools/__init__.py @@ -5,6 +5,7 @@ import os from .crawl import crawl_tool from .python_repl import python_repl_tool +from .retriever import get_retriever_tool from .search import get_web_search_tool from .tts import VolcengineTTS @@ -12,5 +13,6 @@ __all__ = [ "crawl_tool", "python_repl_tool", "get_web_search_tool", + "get_retriever_tool", "VolcengineTTS", ] diff --git a/src/tools/retriever.py b/src/tools/retriever.py new file mode 100644 index 0000000..648b80b --- /dev/null +++ b/src/tools/retriever.py @@ -0,0 +1,74 @@ +import logging +from typing import List, Optional, Type +from langchain_core.tools import BaseTool +from langchain_core.callbacks import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from pydantic import BaseModel, Field + +from src.config.tools import SELECTED_RAG_PROVIDER +from src.rag import Document, Retriever, Resource, build_retriever + +logger = logging.getLogger(__name__) + + +class RetrieverInput(BaseModel): + keywords: str = Field(description="search keywords to look up") + + +class RetrieverTool(BaseTool): + name: str = "local_search_tool" + description: str = ( + "Useful for retrieving information from the file with `rag://` uri prefix, it should be higher priority than the web search or writing code. Input should be a search keywords." + ) + args_schema: Type[BaseModel] = RetrieverInput + + retriever: Retriever = Field(default_factory=Retriever) + resources: list[Resource] = Field(default_factory=list) + + def _run( + self, + keywords: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> list[Document]: + logger.info( + f"Retriever tool query: {keywords}", extra={"resources": self.resources} + ) + documents = self.retriever.query_relevant_documents(keywords, self.resources) + if not documents: + return "No results found from the local knowledge base." + return [doc.to_dict() for doc in documents] + + async def _arun( + self, + keywords: str, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> list[Document]: + return self._run(keywords, run_manager.get_sync()) + + +def get_retriever_tool(resources: List[Resource]) -> RetrieverTool | None: + if not resources: + return None + logger.info(f"create retriever tool: {SELECTED_RAG_PROVIDER}") + retriever = build_retriever() + + if not retriever: + return None + return RetrieverTool(retriever=retriever, resources=resources) + + +if __name__ == "__main__": + resources = [ + Resource( + uri="rag://dataset/1c7e2ea4362911f09a41c290d4b6a7f0", + title="西游记", + description="西游记是中国古代四大名著之一,讲述了唐僧师徒四人西天取经的故事。", + ) + ] + retriever_tool = get_retriever_tool(resources) + print(retriever_tool.name) + print(retriever_tool.description) + print(retriever_tool.args) + print(retriever_tool.invoke("三打白骨精")) diff --git a/src/tools/search.py b/src/tools/search.py index 88dd5eb..fa8445d 100644 --- a/src/tools/search.py +++ b/src/tools/search.py @@ -61,5 +61,9 @@ def get_web_search_tool(max_search_results: int): if __name__ == "__main__": results = LoggedDuckDuckGoSearch( name="web_search", max_results=3, output_format="list" - ).invoke("cute panda") - print(json.dumps(results, indent=2, ensure_ascii=False)) + ) + print(results.name) + print(results.description) + print(results.args) + # .invoke("cute panda") + # print(json.dumps(results, indent=2, ensure_ascii=False)) diff --git a/tests/test_state.py b/tests/test_state.py index a24c91c..f65681b 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -14,8 +14,8 @@ class StepType: class Step: - def __init__(self, need_web_search, title, description, step_type): - self.need_web_search = need_web_search + def __init__(self, need_search, title, description, step_type): + self.need_search = need_search self.title = title self.description = description self.step_type = step_type @@ -90,7 +90,7 @@ def test_state_initialization(): def test_state_with_custom_values(): """Test that State can be initialized with custom values.""" test_step = Step( - need_web_search=True, + need_search=True, title="Test Step", description="Step description", step_type=StepType.RESEARCH, diff --git a/web/package.json b/web/package.json index b1e83d3..26ff75e 100644 --- a/web/package.json +++ b/web/package.json @@ -6,7 +6,7 @@ "scripts": { "build": "next build", "check": "next lint && tsc --noEmit", - "dev": "next dev --turbo", + "dev": "dotenv -f ../.env -e true run next dev --turbo", "scan": "next dev & npx react-scan@latest localhost:3000", "format:check": "prettier --check \"**/*.{ts,tsx,js,jsx,mdx}\" --cache", "format:write": "prettier --write \"**/*.{ts,tsx,js,jsx,mdx}\" --cache", @@ -35,12 +35,16 @@ "@radix-ui/react-switch": "^1.2.2", "@radix-ui/react-tabs": "^1.1.4", "@radix-ui/react-tooltip": "^1.2.0", + "@rc-component/mentions": "^1.2.0", "@t3-oss/env-nextjs": "^0.11.0", "@tailwindcss/typography": "^0.5.16", + "@tiptap/extension-document": "^2.12.0", + "@tiptap/extension-mention": "^2.12.0", "@tiptap/extension-table": "^2.11.7", "@tiptap/extension-table-cell": "^2.11.7", "@tiptap/extension-table-header": "^2.11.7", "@tiptap/extension-table-row": "^2.11.7", + "@tiptap/extension-text": "^2.12.0", "@tiptap/react": "^2.11.7", "@xyflow/react": "^12.6.0", "best-effort-json-parser": "^1.1.3", @@ -70,6 +74,7 @@ "remark-math": "^6.0.0", "sonner": "^2.0.3", "tailwind-merge": "^3.2.0", + "tippy.js": "^6.3.7", "tiptap-markdown": "^0.8.10", "tw-animate-css": "^1.2.5", "unist-util-visit": "^5.0.0", diff --git a/web/pnpm-lock.yaml b/web/pnpm-lock.yaml index e9878c4..45adb9f 100644 --- a/web/pnpm-lock.yaml +++ b/web/pnpm-lock.yaml @@ -62,12 +62,21 @@ importers: '@radix-ui/react-tooltip': specifier: ^1.2.0 version: 1.2.0(@types/react-dom@19.1.1(@types/react@19.1.2))(@types/react@19.1.2)(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/mentions': + specifier: ^1.2.0 + version: 1.2.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0) '@t3-oss/env-nextjs': specifier: ^0.11.0 version: 0.11.1(typescript@5.8.3)(zod@3.24.3) '@tailwindcss/typography': specifier: ^0.5.16 version: 0.5.16(tailwindcss@4.1.4) + '@tiptap/extension-document': + specifier: ^2.12.0 + version: 2.12.0(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) + '@tiptap/extension-mention': + specifier: ^2.12.0 + version: 2.12.0(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))(@tiptap/pm@2.11.7)(@tiptap/suggestion@2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))(@tiptap/pm@2.11.7)) '@tiptap/extension-table': specifier: ^2.11.7 version: 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))(@tiptap/pm@2.11.7) @@ -80,6 +89,9 @@ importers: '@tiptap/extension-table-row': specifier: ^2.11.7 version: 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) + '@tiptap/extension-text': + specifier: ^2.12.0 + version: 2.12.0(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) '@tiptap/react': specifier: ^2.11.7 version: 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))(@tiptap/pm@2.11.7)(react-dom@19.1.0(react@19.1.0))(react@19.1.0) @@ -167,6 +179,9 @@ importers: tailwind-merge: specifier: ^3.2.0 version: 3.2.0 + tippy.js: + specifier: ^6.3.7 + version: 6.3.7 tiptap-markdown: specifier: ^0.8.10 version: 0.8.10(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) @@ -1193,6 +1208,56 @@ packages: '@radix-ui/rect@1.1.1': resolution: {integrity: sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==} + '@rc-component/input@1.0.1': + resolution: {integrity: sha512-omxsjWpB+RamzDDB0NzgV6qI7Ok/U6nrN2KLL/hLZJcI7sZZgLYAN+Xs1pN7OYBnUeyn25PizcntEE0nofHv8Q==} + peerDependencies: + react: '>=16.0.0' + react-dom: '>=16.0.0' + + '@rc-component/mentions@1.2.0': + resolution: {integrity: sha512-dSr9mX5bQWDegeVLr+NoffjZO5paG/nzM5f+RVslpznfVqR5d3c+xan+f6ZqZWHJqJOfROqNGAkUb8pqqAV7wQ==} + peerDependencies: + react: '>=16.9.0' + react-dom: '>=16.9.0' + + '@rc-component/menu@1.1.3': + resolution: {integrity: sha512-NN/J0nJFwwDfQBycl9mordDTBdSai5Ie4nxaGkH2eHVa37KjyhpU98EtcVb/ss393I7SZTDCvoylS3MQOjgYkw==} + peerDependencies: + react: '>=16.9.0' + react-dom: '>=16.9.0' + + '@rc-component/motion@1.1.4': + resolution: {integrity: sha512-rz3+kqQ05xEgIAB9/UKQZKCg5CO/ivGNU78QWYKVfptmbjJKynZO4KXJ7pJD3oMxE9aW94LD/N3eppXWeysTjw==} + peerDependencies: + react: '>=16.9.0' + react-dom: '>=16.9.0' + + '@rc-component/portal@2.0.0': + resolution: {integrity: sha512-337ADhBfgH02S8OujUl33OT+8zVJ67eyuUq11j/dE71rXKYNihMsggW8R2VfI2aL3SciDp8gAFsmPVoPkxLUGw==} + engines: {node: '>=12.x'} + peerDependencies: + react: '>=18.0.0' + react-dom: '>=18.0.0' + + '@rc-component/resize-observer@1.0.0': + resolution: {integrity: sha512-inR8Ka87OOwtrDJzdVp2VuEVlc5nK20lHolvkwFUnXwV50p+nLhKny1NvNTCKvBmS/pi/rTn/1Hvsw10sRRnXA==} + peerDependencies: + react: '>=16.9.0' + react-dom: '>=16.9.0' + + '@rc-component/textarea@1.0.0': + resolution: {integrity: sha512-GuXakeRWZuWUnF2sqfC8RjtzfAh5UI89dPk6r5SgosyQGfQIueuN8LkWmFq5OKTOJIlc82MOjHiPBigKB9+KGw==} + peerDependencies: + react: '>=16.9.0' + react-dom: '>=16.9.0' + + '@rc-component/trigger@3.4.0': + resolution: {integrity: sha512-Vu+RS7bGAHHNtzP6EzrMwH+xiZl+SHQgR98oAUXtoQIy4+4lsSppwQPcl6Q7ORZuZevil1BSw4GHXNWD8BJOXw==} + engines: {node: '>=8.x'} + peerDependencies: + react: '>=18.0.0' + react-dom: '>=18.0.0' + '@rc-component/util@1.2.1': resolution: {integrity: sha512-AUVu6jO+lWjQnUOOECwu8iR0EdElQgWW5NBv5vP/Uf9dWbAX3udhMutRlkVXjuac2E40ghkFy+ve00mc/3Fymg==} peerDependencies: @@ -1399,8 +1464,8 @@ packages: '@tiptap/core': ^2.7.0 '@tiptap/extension-text-style': ^2.7.0 - '@tiptap/extension-document@2.11.7': - resolution: {integrity: sha512-95ouJXPjdAm9+VBRgFo4lhDoMcHovyl/awORDI8gyEn0Rdglt+ZRZYoySFzbVzer9h0cre+QdIwr9AIzFFbfdA==} + '@tiptap/extension-document@2.12.0': + resolution: {integrity: sha512-sA1Q+mxDIv0Y3qQTBkYGwknNbDcGFiJ/fyAFholXpqbrcRx3GavwR/o0chBdsJZlFht0x7AWGwUYWvIo7wYilA==} peerDependencies: '@tiptap/core': ^2.7.0 @@ -1470,6 +1535,13 @@ packages: peerDependencies: '@tiptap/core': ^2.7.0 + '@tiptap/extension-mention@2.12.0': + resolution: {integrity: sha512-+b/fqOU+pRWWAo0ZfyInkhkvV0Ub5RpNrYZ45v2nn5PjbXbxyxNQ51zT6cGk2F6Jmc6UBmlR8iqqNTIQY9ieEg==} + peerDependencies: + '@tiptap/core': ^2.7.0 + '@tiptap/pm': ^2.7.0 + '@tiptap/suggestion': ^2.7.0 + '@tiptap/extension-ordered-list@2.11.7': resolution: {integrity: sha512-bLGCHDMB0vbJk7uu8bRg8vES3GsvxkX7Cgjgm/6xysHFbK98y0asDtNxkW1VvuRreNGz4tyB6vkcVCfrxl4jKw==} peerDependencies: @@ -1528,8 +1600,8 @@ packages: peerDependencies: '@tiptap/core': ^2.7.0 - '@tiptap/extension-text@2.11.7': - resolution: {integrity: sha512-wObCn8qZkIFnXTLvBP+X8KgaEvTap/FJ/i4hBMfHBCKPGDx99KiJU6VIbDXG8d5ZcFZE0tOetK1pP5oI7qgMlQ==} + '@tiptap/extension-text@2.12.0': + resolution: {integrity: sha512-0ytN9V1tZYTXdiYDQg4FB2SQ56JAJC9r/65snefb9ztl+gZzDrIvih7CflHs1ic9PgyjexfMLeH+VzuMccNyZw==} peerDependencies: '@tiptap/core': ^2.7.0 @@ -3520,6 +3592,24 @@ packages: peerDependencies: webpack: ^4.0.0 || ^5.0.0 + rc-overflow@1.4.1: + resolution: {integrity: sha512-3MoPQQPV1uKyOMVNd6SZfONi+f3st0r8PksexIdBTeIYbMX0Jr+k7pHEDvsXtR4BpCv90/Pv2MovVNhktKrwvw==} + peerDependencies: + react: '>=16.9.0' + react-dom: '>=16.9.0' + + rc-resize-observer@1.4.3: + resolution: {integrity: sha512-YZLjUbyIWox8E9i9C3Tm7ia+W7euPItNWSPX5sCcQTYbnwDb5uNpnLHQCG1f22oZWUhLw4Mv2tFmeWe68CDQRQ==} + peerDependencies: + react: '>=16.9.0' + react-dom: '>=16.9.0' + + rc-util@5.44.4: + resolution: {integrity: sha512-resueRJzmHG9Q6rI/DfK6Kdv9/Lfls05vzMs1Sk3M2P+3cJa+MakaZyWY8IPfehVuhPJFKrIY1IK4GqbiaiY5w==} + peerDependencies: + react: '>=16.9.0' + react-dom: '>=16.9.0' + react-css-styled@1.1.9: resolution: {integrity: sha512-M7fJZ3IWFaIHcZEkoFOnkjdiUFmwd8d+gTh2bpqMOcnxy/0Gsykw4dsL4QBiKsxcGow6tETUa4NAUcmJF+/nfw==} @@ -3639,6 +3729,9 @@ packages: resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==} engines: {node: '>=0.10.0'} + resize-observer-polyfill@1.5.1: + resolution: {integrity: sha512-LwZrotdHOo12nQuZlHEmtuXdqGoOD0OhaxopaNFxWzInpEgaLWoVuAMbTzixuosCx2nEG58ngzW3vxdWoxIgdg==} + resolve-from@4.0.0: resolution: {integrity: sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==} engines: {node: '>=4'} @@ -5047,6 +5140,74 @@ snapshots: '@radix-ui/rect@1.1.1': {} + '@rc-component/input@1.0.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': + dependencies: + '@rc-component/util': 1.2.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + classnames: 2.5.1 + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + + '@rc-component/mentions@1.2.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': + dependencies: + '@rc-component/input': 1.0.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/menu': 1.1.3(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/textarea': 1.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/trigger': 3.4.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/util': 1.2.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + classnames: 2.5.1 + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + + '@rc-component/menu@1.1.3(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': + dependencies: + '@rc-component/motion': 1.1.4(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/trigger': 3.4.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/util': 1.2.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + classnames: 2.5.1 + rc-overflow: 1.4.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + + '@rc-component/motion@1.1.4(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': + dependencies: + '@rc-component/util': 1.2.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + classnames: 2.5.1 + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + + '@rc-component/portal@2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': + dependencies: + '@rc-component/util': 1.2.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + classnames: 2.5.1 + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + + '@rc-component/resize-observer@1.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': + dependencies: + '@rc-component/util': 1.2.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + classnames: 2.5.1 + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + + '@rc-component/textarea@1.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': + dependencies: + '@rc-component/input': 1.0.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/resize-observer': 1.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/util': 1.2.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + classnames: 2.5.1 + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + + '@rc-component/trigger@3.4.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': + dependencies: + '@rc-component/motion': 1.1.4(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/portal': 2.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/resize-observer': 1.0.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + '@rc-component/util': 1.2.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + classnames: 2.5.1 + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + '@rc-component/util@1.2.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0)': dependencies: react: 19.1.0 @@ -5216,7 +5377,7 @@ snapshots: '@tiptap/core': 2.11.7(@tiptap/pm@2.11.7) '@tiptap/extension-text-style': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) - '@tiptap/extension-document@2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))': + '@tiptap/extension-document@2.12.0(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))': dependencies: '@tiptap/core': 2.11.7(@tiptap/pm@2.11.7) @@ -5276,6 +5437,12 @@ snapshots: dependencies: '@tiptap/core': 2.11.7(@tiptap/pm@2.11.7) + '@tiptap/extension-mention@2.12.0(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))(@tiptap/pm@2.11.7)(@tiptap/suggestion@2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))(@tiptap/pm@2.11.7))': + dependencies: + '@tiptap/core': 2.11.7(@tiptap/pm@2.11.7) + '@tiptap/pm': 2.11.7 + '@tiptap/suggestion': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))(@tiptap/pm@2.11.7) + '@tiptap/extension-ordered-list@2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))': dependencies: '@tiptap/core': 2.11.7(@tiptap/pm@2.11.7) @@ -5323,7 +5490,7 @@ snapshots: dependencies: '@tiptap/core': 2.11.7(@tiptap/pm@2.11.7) - '@tiptap/extension-text@2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))': + '@tiptap/extension-text@2.12.0(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))': dependencies: '@tiptap/core': 2.11.7(@tiptap/pm@2.11.7) @@ -5376,7 +5543,7 @@ snapshots: '@tiptap/extension-bullet-list': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) '@tiptap/extension-code': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) '@tiptap/extension-code-block': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))(@tiptap/pm@2.11.7) - '@tiptap/extension-document': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) + '@tiptap/extension-document': 2.12.0(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) '@tiptap/extension-dropcursor': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))(@tiptap/pm@2.11.7) '@tiptap/extension-gapcursor': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7))(@tiptap/pm@2.11.7) '@tiptap/extension-hard-break': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) @@ -5388,7 +5555,7 @@ snapshots: '@tiptap/extension-ordered-list': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) '@tiptap/extension-paragraph': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) '@tiptap/extension-strike': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) - '@tiptap/extension-text': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) + '@tiptap/extension-text': 2.12.0(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) '@tiptap/extension-text-style': 2.11.7(@tiptap/core@2.11.7(@tiptap/pm@2.11.7)) '@tiptap/pm': 2.11.7 @@ -7816,6 +7983,31 @@ snapshots: schema-utils: 3.3.0 webpack: 5.99.6 + rc-overflow@1.4.1(react-dom@19.1.0(react@19.1.0))(react@19.1.0): + dependencies: + '@babel/runtime': 7.27.0 + classnames: 2.5.1 + rc-resize-observer: 1.4.3(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + rc-util: 5.44.4(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + + rc-resize-observer@1.4.3(react-dom@19.1.0(react@19.1.0))(react@19.1.0): + dependencies: + '@babel/runtime': 7.27.0 + classnames: 2.5.1 + rc-util: 5.44.4(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + resize-observer-polyfill: 1.5.1 + + rc-util@5.44.4(react-dom@19.1.0(react@19.1.0))(react@19.1.0): + dependencies: + '@babel/runtime': 7.27.0 + react: 19.1.0 + react-dom: 19.1.0(react@19.1.0) + react-is: 18.3.1 + react-css-styled@1.1.9: dependencies: css-styled: 1.0.8 @@ -8020,6 +8212,8 @@ snapshots: require-from-string@2.0.2: {} + resize-observer-polyfill@1.5.1: {} + resolve-from@4.0.0: {} resolve-pkg-maps@1.0.0: {} diff --git a/web/public/replay/github-top-trending-repo.txt b/web/public/replay/github-top-trending-repo.txt index d4fef75..671acc8 100644 --- a/web/public/replay/github-top-trending-repo.txt +++ b/web/public/replay/github-top-trending-repo.txt @@ -23,19 +23,19 @@ event: message_chunk data: {"thread_id": "LmC3xxJCFljoFXggnmvst", "agent": "planner", "id": "run-33af75e6-c1b5-4276-9749-7cfb7a967402", "role": "assistant", "content": " reason it's trending, and some key statistics (stars, forks, contributors, etc.).\",\n \"title\": \"Research Plan: Top Trending GitHub Repository Today"} event: message_chunk -data: {"thread_id": "LmC3xxJCFljoFXggnmvst", "agent": "planner", "id": "run-33af75e6-c1b5-4276-9749-7cfb7a967402", "role": "assistant", "content": "\",\n \"steps\": [\n {\n \"need_web_search\": true,\n \"title\": \"Identify and Profile the Top Trending Repository\",\n \"description\": \"Identify the #1 trending repository on"} +data: {"thread_id": "LmC3xxJCFljoFXggnmvst", "agent": "planner", "id": "run-33af75e6-c1b5-4276-9749-7cfb7a967402", "role": "assistant", "content": "\",\n \"steps\": [\n {\n \"need_search\": true,\n \"title\": \"Identify and Profile the Top Trending Repository\",\n \"description\": \"Identify the #1 trending repository on"} event: message_chunk data: {"thread_id": "LmC3xxJCFljoFXggnmvst", "agent": "planner", "id": "run-33af75e6-c1b5-4276-9749-7cfb7a967402", "role": "assistant", "content": " GitHub today. Collect the following information: repository name, repository owner/organization, a short description of the repository's purpose, the primary programming language used, and the reason GitHub marks it as trending (e.g., 'X new stars today"} event: message_chunk -data: {"thread_id": "LmC3xxJCFljoFXggnmvst", "agent": "planner", "id": "run-33af75e6-c1b5-4276-9749-7cfb7a967402", "role": "assistant", "content": "'). Note: ensure to filter for 'today' to get the current trending repo.\",\n \"step_type\": \"research\"\n },\n {\n \"need_web_search\": true,\n \"title\": \"Gather Repository Statistics and Community Data\",\n \"description\": \"Collect"} +data: {"thread_id": "LmC3xxJCFljoFXggnmvst", "agent": "planner", "id": "run-33af75e6-c1b5-4276-9749-7cfb7a967402", "role": "assistant", "content": "'). Note: ensure to filter for 'today' to get the current trending repo.\",\n \"step_type\": \"research\"\n },\n {\n \"need_search\": true,\n \"title\": \"Gather Repository Statistics and Community Data\",\n \"description\": \"Collect"} event: message_chunk data: {"thread_id": "LmC3xxJCFljoFXggnmvst", "agent": "planner", "id": "run-33af75e6-c1b5-4276-9749-7cfb7a967402", "role": "assistant", "content": " detailed statistics for the top trending repository. This includes the total number of stars, forks, open issues, closed issues, contributors, and recent commit activity. Also, gather data about the community's involvement, such as the number of active contributors in the last month, and any available information on significant discussions or contributions happening"} event: message_chunk -data: {"thread_id": "LmC3xxJCFljoFXggnmvst", "agent": "planner", "id": "run-33af75e6-c1b5-4276-9749-7cfb7a967402", "role": "assistant", "content": " within the project. Check for recent release notes or announcements.\",\n \"step_type\": \"research\"\n },\n {\n \"need_web_search\": true,\n \"title\": \"Determine Context and Significance\",\n \"description\": \"Research the broader context and significance of the trending"} +data: {"thread_id": "LmC3xxJCFljoFXggnmvst", "agent": "planner", "id": "run-33af75e6-c1b5-4276-9749-7cfb7a967402", "role": "assistant", "content": " within the project. Check for recent release notes or announcements.\",\n \"step_type\": \"research\"\n },\n {\n \"need_search\": true,\n \"title\": \"Determine Context and Significance\",\n \"description\": \"Research the broader context and significance of the trending"} event: message_chunk data: {"thread_id": "LmC3xxJCFljoFXggnmvst", "agent": "planner", "id": "run-33af75e6-c1b5-4276-9749-7cfb7a967402", "role": "assistant", "content": " repository. Determine the repository's purpose or function. Investigate the project's background, the problem it solves, or the features it provides. Identify the industry, user base, or application area it serves. Search for recent news, articles, or blog posts mentioning the repository and its impact or potential. Identify its license"} diff --git a/web/public/replay/nanjing-traditional-dishes.txt b/web/public/replay/nanjing-traditional-dishes.txt index 46554c4..5de3baa 100644 --- a/web/public/replay/nanjing-traditional-dishes.txt +++ b/web/public/replay/nanjing-traditional-dishes.txt @@ -20,16 +20,16 @@ event: message_chunk data: {"thread_id": "PDgExJb-Qsq2fNtO4B_sZ", "agent": "planner", "id": "run-f9561a11-723f-4d5f-917c-95f96601f87f", "role": "assistant", "content": " culinary scene and document its traditional dishes. I will create comprehensive steps to gather the most important data and create a good final report.\",\n \"title\": \"Research"} event: message_chunk -data: {"thread_id": "PDgExJb-Qsq2fNtO4B_sZ", "agent": "planner", "id": "run-f9561a11-723f-4d5f-917c-95f96601f87f", "role": "assistant", "content": " Plan: Nanjing's Culinary Scene and Traditional Dishes\",\n \"steps\": [\n {\n \"need_web_search\": true,\n "} +data: {"thread_id": "PDgExJb-Qsq2fNtO4B_sZ", "agent": "planner", "id": "run-f9561a11-723f-4d5f-917c-95f96601f87f", "role": "assistant", "content": " Plan: Nanjing's Culinary Scene and Traditional Dishes\",\n \"steps\": [\n {\n \"need_search\": true,\n "} event: message_chunk data: {"thread_id": "PDgExJb-Qsq2fNtO4B_sZ", "agent": "planner", "id": "run-f9561a11-723f-4d5f-917c-95f96601f87f", "role": "assistant", "content": "\"title\": \"Identify and Document Key Traditional Nanjing Dishes\",\n \"description\": \"Research and compile a comprehensive list of traditional Nanjing dishes, including their names (in both English and Chinese), detailed descriptions of ingredients and preparation methods, and historical origins"} event: message_chunk -data: {"thread_id": "PDgExJb-Qsq2fNtO4B_sZ", "agent": "planner", "id": "run-f9561a11-723f-4d5f-917c-95f96601f87f", "role": "assistant", "content": ". Identify dishes that are representative of Nanjing's culinary heritage and those that are less well-known but still significant. Document the specific cooking techniques that characterize Nanjing cuisine.\",\n \"step_type\": \"research\"\n },\n {\n \"need_web_search\": true,\n \"title\": \"Investigate the History and Cultural Significance of Nanjing Cuisine\",\n \"description\": \"Explore the historical influences that have shaped Nanjing's culinary traditions, including its role as a former capital city. Document the cultural significance of specific dishes and"} +data: {"thread_id": "PDgExJb-Qsq2fNtO4B_sZ", "agent": "planner", "id": "run-f9561a11-723f-4d5f-917c-95f96601f87f", "role": "assistant", "content": ". Identify dishes that are representative of Nanjing's culinary heritage and those that are less well-known but still significant. Document the specific cooking techniques that characterize Nanjing cuisine.\",\n \"step_type\": \"research\"\n },\n {\n \"need_search\": true,\n \"title\": \"Investigate the History and Cultural Significance of Nanjing Cuisine\",\n \"description\": \"Explore the historical influences that have shaped Nanjing's culinary traditions, including its role as a former capital city. Document the cultural significance of specific dishes and"} event: message_chunk -data: {"thread_id": "PDgExJb-Qsq2fNtO4B_sZ", "agent": "planner", "id": "run-f9561a11-723f-4d5f-917c-95f96601f87f", "role": "assistant", "content": " their connection to local customs, festivals, and celebrations. Research the evolution of Nanjing cuisine over time, identifying key periods of change and the factors that contributed to them.\",\n \"step_type\": \"research\"\n },\n {\n \"need_web_search\": true,\n \"title\":"} +data: {"thread_id": "PDgExJb-Qsq2fNtO4B_sZ", "agent": "planner", "id": "run-f9561a11-723f-4d5f-917c-95f96601f87f", "role": "assistant", "content": " their connection to local customs, festivals, and celebrations. Research the evolution of Nanjing cuisine over time, identifying key periods of change and the factors that contributed to them.\",\n \"step_type\": \"research\"\n },\n {\n \"need_search\": true,\n \"title\":"} event: message_chunk data: {"thread_id": "PDgExJb-Qsq2fNtO4B_sZ", "agent": "planner", "id": "run-f9561a11-723f-4d5f-917c-95f96601f87f", "role": "assistant", "content": " \"Analyze the Current State of Nanjing's Culinary Scene and Identify Key Restaurants\",\n \"description\": \"Investigate the current state of Nanjing's culinary scene, identifying key restaurants that specialize in traditional Nanjing cuisine. Gather information on their menus, pricing, and customer reviews. Document any trends or changes in the local food"} diff --git a/web/public/replay/rental-apartment-decoration.txt b/web/public/replay/rental-apartment-decoration.txt index ba9168c..079963c 100644 --- a/web/public/replay/rental-apartment-decoration.txt +++ b/web/public/replay/rental-apartment-decoration.txt @@ -83,7 +83,7 @@ event: message_chunk data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": "\": [\n {\n \""} event: message_chunk -data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": "need_web_search\":"} +data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": "need_search\":"} event: message_chunk data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": " true,\n \""} @@ -134,7 +134,7 @@ event: message_chunk data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": " {\n \""} event: message_chunk -data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": "need_web_search\":"} +data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": "need_search\":"} event: message_chunk data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": " true,\n \"title"} @@ -194,7 +194,7 @@ event: message_chunk data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": "\"\n },\n {\n \""} event: message_chunk -data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": "need_web_search\":"} +data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": "need_search\":"} event: message_chunk data: {"thread_id": "5CG_qm7snTVKbpVCrWTon", "agent": "planner", "id": "run-3006007c-5c06-4500-ba23-3fab94c70ae7", "role": "assistant", "content": " true,\n \"title"} diff --git a/web/public/replay/review-of-the-professional.txt b/web/public/replay/review-of-the-professional.txt index 0fab057..f77707e 100644 --- a/web/public/replay/review-of-the-professional.txt +++ b/web/public/replay/review-of-the-professional.txt @@ -140,7 +140,7 @@ event: message_chunk data: {"thread_id": "01uPkjxNhUsYZHQ1DrkhK", "agent": "planner", "id": "run-77b32288-ec82-4b8e-b815-d403687915bd", "role": "assistant", "content": "research\"\n },\n {\n"} event: message_chunk -data: {"thread_id": "01uPkjxNhUsYZHQ1DrkhK", "agent": "planner", "id": "run-77b32288-ec82-4b8e-b815-d403687915bd", "role": "assistant", "content": " \"need_web_search\":"} +data: {"thread_id": "01uPkjxNhUsYZHQ1DrkhK", "agent": "planner", "id": "run-77b32288-ec82-4b8e-b815-d403687915bd", "role": "assistant", "content": " \"need_search\":"} event: message_chunk data: {"thread_id": "01uPkjxNhUsYZHQ1DrkhK", "agent": "planner", "id": "run-77b32288-ec82-4b8e-b815-d403687915bd", "role": "assistant", "content": " true,\n \"title"} @@ -200,7 +200,7 @@ event: message_chunk data: {"thread_id": "01uPkjxNhUsYZHQ1DrkhK", "agent": "planner", "id": "run-77b32288-ec82-4b8e-b815-d403687915bd", "role": "assistant", "content": "research\"\n },\n {\n"} event: message_chunk -data: {"thread_id": "01uPkjxNhUsYZHQ1DrkhK", "agent": "planner", "id": "run-77b32288-ec82-4b8e-b815-d403687915bd", "role": "assistant", "content": " \"need_web_search\":"} +data: {"thread_id": "01uPkjxNhUsYZHQ1DrkhK", "agent": "planner", "id": "run-77b32288-ec82-4b8e-b815-d403687915bd", "role": "assistant", "content": " \"need_search\":"} event: message_chunk data: {"thread_id": "01uPkjxNhUsYZHQ1DrkhK", "agent": "planner", "id": "run-77b32288-ec82-4b8e-b815-d403687915bd", "role": "assistant", "content": " true,\n \"title"} diff --git a/web/src/app/chat/components/input-box.tsx b/web/src/app/chat/components/input-box.tsx index 6890d3d..0d05784 100644 --- a/web/src/app/chat/components/input-box.tsx +++ b/web/src/app/chat/components/input-box.tsx @@ -3,18 +3,15 @@ import { AnimatePresence, motion } from "framer-motion"; import { ArrowUp, X } from "lucide-react"; -import { - type KeyboardEvent, - useCallback, - useEffect, - useRef, - useState, -} from "react"; +import { useCallback, useRef } from "react"; import { Detective } from "~/components/deer-flow/icons/detective"; +import MessageInput, { + type MessageInputRef, +} from "~/components/deer-flow/message-input"; import { Tooltip } from "~/components/deer-flow/tooltip"; import { Button } from "~/components/ui/button"; -import type { Option } from "~/core/messages"; +import type { Option, Resource } from "~/core/messages"; import { setEnableBackgroundInvestigation, useSettingsStore, @@ -23,7 +20,6 @@ import { cn } from "~/lib/utils"; export function InputBox({ className, - size, responding, feedback, onSend, @@ -34,72 +30,52 @@ export function InputBox({ size?: "large" | "normal"; responding?: boolean; feedback?: { option: Option } | null; - onSend?: (message: string, options?: { interruptFeedback?: string }) => void; + onSend?: ( + message: string, + options?: { + interruptFeedback?: string; + resources?: Array; + }, + ) => void; onCancel?: () => void; onRemoveFeedback?: () => void; }) { - const [message, setMessage] = useState(""); - const [imeStatus, setImeStatus] = useState<"active" | "inactive">("inactive"); - const [indent, setIndent] = useState(0); const backgroundInvestigation = useSettingsStore( (state) => state.general.enableBackgroundInvestigation, ); - const textareaRef = useRef(null); + const containerRef = useRef(null); + const inputRef = useRef(null); const feedbackRef = useRef(null); - useEffect(() => { - if (feedback) { - setMessage(""); - - setTimeout(() => { - if (feedbackRef.current) { - setIndent(feedbackRef.current.offsetWidth); - } - }, 200); - } - setTimeout(() => { - textareaRef.current?.focus(); - }, 0); - }, [feedback]); - - const handleSendMessage = useCallback(() => { - if (responding) { - onCancel?.(); - } else { - if (message.trim() === "") { - return; - } - if (onSend) { - onSend(message, { - interruptFeedback: feedback?.option.value, - }); - setMessage(""); - onRemoveFeedback?.(); - } - } - }, [responding, onCancel, message, onSend, feedback, onRemoveFeedback]); - - const handleKeyDown = useCallback( - (event: KeyboardEvent) => { + const handleSendMessage = useCallback( + (message: string, resources: Array) => { + console.log(message, resources); if (responding) { - return; - } - if ( - event.key === "Enter" && - !event.shiftKey && - !event.metaKey && - !event.ctrlKey && - imeStatus === "inactive" - ) { - event.preventDefault(); - handleSendMessage(); + onCancel?.(); + } else { + if (message.trim() === "") { + return; + } + if (onSend) { + onSend(message, { + interruptFeedback: feedback?.option.value, + resources, + }); + onRemoveFeedback?.(); + } } }, - [responding, imeStatus, handleSendMessage], + [responding, onCancel, onSend, feedback, onRemoveFeedback], ); return ( -
+
{feedback && ( @@ -122,25 +98,10 @@ export function InputBox({ )} -