mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-19 04:14:46 +08:00
Feat: Cross-Language Search for RAGFlow (#469)
* cross-language search * test passed
This commit is contained in:
@@ -35,6 +35,7 @@ TAVILY_API_KEY=tvly-xxx
|
|||||||
# RAGFLOW_API_URL="http://localhost:9388"
|
# RAGFLOW_API_URL="http://localhost:9388"
|
||||||
# RAGFLOW_API_KEY="ragflow-xxx"
|
# RAGFLOW_API_KEY="ragflow-xxx"
|
||||||
# RAGFLOW_RETRIEVAL_SIZE=10
|
# RAGFLOW_RETRIEVAL_SIZE=10
|
||||||
|
# RAGFLOW_CROSS_LANGUAGES=English,Chinese,Spanish,French,German,Japanese,Korean # Optional. To use RAGFlow's cross-language search, please separate each language with a single comma
|
||||||
|
|
||||||
# Optional, volcengine TTS for generating podcast
|
# Optional, volcengine TTS for generating podcast
|
||||||
VOLCENGINE_TTS_APPID=xxx
|
VOLCENGINE_TTS_APPID=xxx
|
||||||
|
|||||||
@@ -184,6 +184,7 @@ DeerFlow support private knowledgebase such as ragflow and vikingdb, so that you
|
|||||||
RAGFLOW_API_URL="http://localhost:9388"
|
RAGFLOW_API_URL="http://localhost:9388"
|
||||||
RAGFLOW_API_KEY="ragflow-xxx"
|
RAGFLOW_API_KEY="ragflow-xxx"
|
||||||
RAGFLOW_RETRIEVAL_SIZE=10
|
RAGFLOW_RETRIEVAL_SIZE=10
|
||||||
|
RAGFLOW_CROSS_LANGUAGES=English,Chinese,Spanish,French,German,Japanese,Korean
|
||||||
```
|
```
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
from src.rag.retriever import Chunk, Document, Resource, Retriever
|
from src.rag.retriever import Chunk, Document, Resource, Retriever
|
||||||
|
from typing import List, Optional
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
|
||||||
@@ -15,6 +16,7 @@ class RAGFlowProvider(Retriever):
|
|||||||
api_url: str
|
api_url: str
|
||||||
api_key: str
|
api_key: str
|
||||||
page_size: int = 10
|
page_size: int = 10
|
||||||
|
cross_languages: Optional[List[str]] = None
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
api_url = os.getenv("RAGFLOW_API_URL")
|
api_url = os.getenv("RAGFLOW_API_URL")
|
||||||
@@ -31,6 +33,11 @@ class RAGFlowProvider(Retriever):
|
|||||||
if page_size:
|
if page_size:
|
||||||
self.page_size = int(page_size)
|
self.page_size = int(page_size)
|
||||||
|
|
||||||
|
self.cross_languages = None
|
||||||
|
cross_languages = os.getenv("RAGFLOW_CROSS_LANGUAGES")
|
||||||
|
if cross_languages:
|
||||||
|
self.cross_languages = cross_languages.split(",")
|
||||||
|
|
||||||
def query_relevant_documents(
|
def query_relevant_documents(
|
||||||
self, query: str, resources: list[Resource] = []
|
self, query: str, resources: list[Resource] = []
|
||||||
) -> list[Document]:
|
) -> list[Document]:
|
||||||
@@ -55,6 +62,9 @@ class RAGFlowProvider(Retriever):
|
|||||||
"page_size": self.page_size,
|
"page_size": self.page_size,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if self.cross_languages:
|
||||||
|
payload["cross_languages"] = self.cross_languages
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{self.api_url}/api/v1/retrieval", headers=headers, json=payload
|
f"{self.api_url}/api/v1/retrieval", headers=headers, json=payload
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -68,6 +68,14 @@ def test_init_page_size(monkeypatch):
|
|||||||
assert provider.page_size == 5
|
assert provider.page_size == 5
|
||||||
|
|
||||||
|
|
||||||
|
def test_init_cross_language(monkeypatch):
|
||||||
|
monkeypatch.setenv("RAGFLOW_API_URL", "http://api")
|
||||||
|
monkeypatch.setenv("RAGFLOW_API_KEY", "key")
|
||||||
|
monkeypatch.setenv("RAGFLOW_CROSS_LANGUAGES", "lang1,lang2")
|
||||||
|
provider = RAGFlowProvider()
|
||||||
|
assert provider.cross_languages == ["lang1", "lang2"]
|
||||||
|
|
||||||
|
|
||||||
def test_init_missing_env(monkeypatch):
|
def test_init_missing_env(monkeypatch):
|
||||||
monkeypatch.delenv("RAGFLOW_API_URL", raising=False)
|
monkeypatch.delenv("RAGFLOW_API_URL", raising=False)
|
||||||
monkeypatch.setenv("RAGFLOW_API_KEY", "key")
|
monkeypatch.setenv("RAGFLOW_API_KEY", "key")
|
||||||
|
|||||||
Reference in New Issue
Block a user