feat: RAG Integration (#238)

* feat: add rag provider and retriever * feat: retriever tool * feat: add retriever tool to the researcher node * feat: add rag http apis * feat: new message input supports resource mentions * feat: new message input component support resource mentions * refactor: need_web_search to need_search * chore: RAG integration docs * chore: change example api host * fix: user message color in dark mode * fix: mentions style * feat: add local_search_tool to researcher prompt * chore: research prompt * fix: ragflow page size and reporter with * docs: ragflow integration and add acknowledgment projects * chore: format
2026-04-15 19:04:45 +08:00 · 2025-05-28 14:13:46 +08:00
parent 0565ab6d27
commit 462752b462
43 changed files with 1172 additions and 181 deletions
--- a/src/graph/nodes.py
+++ b/src/graph/nodes.py
@@ -17,6 +17,7 @@ from src.tools.search import LoggedTavilySearch
 from src.tools import (
    crawl_tool,
    get_web_search_tool,
+    get_retriever_tool,
    python_repl_tool,
 )

@@ -206,10 +207,11 @@ def human_feedback_node(


 def coordinator_node(
-    state: State,
+    state: State, config: RunnableConfig
 ) -> Command[Literal["planner", "background_investigator", "__end__"]]:
    """Coordinator node that communicate with customers."""
    logger.info("Coordinator talking.")
+    configurable = Configuration.from_runnable_config(config)
    messages = apply_prompt_template("coordinator", state)
    response = (
        get_llm_by_type(AGENT_LLM_MAP["coordinator"])
@@ -242,7 +244,7 @@ def coordinator_node(
        logger.debug(f"Coordinator response: {response}")

    return Command(
-        update={"locale": locale},
+        update={"locale": locale, "resources": configurable.resources},
        goto=goto,
    )

@@ -326,14 +328,14 @@ async def _execute_agent_step(
        logger.warning("No unexecuted step found")
        return Command(goto="research_team")

-    logger.info(f"Executing step: {current_step.title}")
+    logger.info(f"Executing step: {current_step.title}, agent: {agent_name}")

    # Format completed steps information
    completed_steps_info = ""
    if completed_steps:
        completed_steps_info = "# Existing Research Findings\n\n"
        for i, step in enumerate(completed_steps):
-            completed_steps_info += f"## Existing Finding {i+1}: {step.title}\n\n"
+            completed_steps_info += f"## Existing Finding {i + 1}: {step.title}\n\n"
            completed_steps_info += f"<finding>\n{step.execution_res}\n</finding>\n\n"

    # Prepare the input for the agent with completed steps info
@@ -347,6 +349,19 @@ async def _execute_agent_step(

    # Add citation reminder for researcher agent
    if agent_name == "researcher":
+        if state.get("resources"):
+            resources_info = "**The user mentioned the following resource files:**\n\n"
+            for resource in state.get("resources"):
+                resources_info += f"- {resource.title} ({resource.description})\n"
+
+            agent_input["messages"].append(
+                HumanMessage(
+                    content=resources_info
+                    + "\n\n"
+                    + "You MUST use the **local_search_tool** to retrieve the information from the resource files.",
+                )
+            )
+
        agent_input["messages"].append(
            HumanMessage(
                content="IMPORTANT: DO NOT include inline citations in the text. Instead, track all sources and include a References section at the end using link reference format. Include an empty line between each citation for better readability. Use this format for each reference:\n- [Source Title](URL)\n\n- [Another Source](URL)",
@@ -377,6 +392,7 @@ async def _execute_agent_step(
        )
        recursion_limit = default_recursion_limit

+    logger.info(f"Agent input: {agent_input}")
    result = await agent.ainvoke(
        input=agent_input, config={"recursion_limit": recursion_limit}
    )
@@ -468,11 +484,16 @@ async def researcher_node(
    """Researcher node that do research"""
    logger.info("Researcher node is researching.")
    configurable = Configuration.from_runnable_config(config)
+    tools = [get_web_search_tool(configurable.max_search_results), crawl_tool]
+    retriever_tool = get_retriever_tool(state.get("resources", []))
+    if retriever_tool:
+        tools.insert(0, retriever_tool)
+    logger.info(f"Researcher tools: {tools}")
    return await _setup_and_execute_agent_step(
        state,
        config,
        "researcher",
-        [get_web_search_tool(configurable.max_search_results), crawl_tool],
+        tools,
    )


--- a/src/graph/types.py
+++ b/src/graph/types.py
@@ -1,12 +1,10 @@
 # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
 # SPDX-License-Identifier: MIT

-import operator
-from typing import Annotated
-
 from langgraph.graph import MessagesState

 from src.prompts.planner_model import Plan
+from src.rag import Resource


 class State(MessagesState):
@@ -15,6 +13,7 @@ class State(MessagesState):
    # Runtime Variables
    locale: str = "en-US"
    observations: list[str] = []
+    resources: list[Resource] = []
    plan_iterations: int = 0
    current_plan: Plan | str = None
    final_report: str = ""