mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-12 01:54:45 +08:00
fix(client): Harden upload validation and conversion flow (#989)
* fix(client): Harden upload validation and conversion flow * test(client): cover event-loop upload conversion reuse * test(client): remove unused import in upload regression coverage * fix(client): load optional shared checkpointer fallback Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> * docs(backend): document config preflight and IM channels Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
@@ -19,6 +19,7 @@ import asyncio
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
@@ -723,52 +724,79 @@ class DeerFlowClient:
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If any file does not exist.
|
||||
ValueError: If any supplied path exists but is not a regular file.
|
||||
"""
|
||||
from src.gateway.routers.uploads import CONVERTIBLE_EXTENSIONS, convert_file_to_markdown
|
||||
|
||||
# Validate all files upfront to avoid partial uploads.
|
||||
resolved_files = []
|
||||
convertible_extensions = {ext.lower() for ext in CONVERTIBLE_EXTENSIONS}
|
||||
has_convertible_file = False
|
||||
for f in files:
|
||||
p = Path(f)
|
||||
if not p.exists():
|
||||
raise FileNotFoundError(f"File not found: {f}")
|
||||
if not p.is_file():
|
||||
raise ValueError(f"Path is not a file: {f}")
|
||||
resolved_files.append(p)
|
||||
if not has_convertible_file and p.suffix.lower() in convertible_extensions:
|
||||
has_convertible_file = True
|
||||
|
||||
uploads_dir = self._get_uploads_dir(thread_id)
|
||||
uploaded_files: list[dict] = []
|
||||
|
||||
for src_path in resolved_files:
|
||||
dest = uploads_dir / src_path.name
|
||||
shutil.copy2(src_path, dest)
|
||||
conversion_pool = None
|
||||
if has_convertible_file:
|
||||
try:
|
||||
asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
conversion_pool = None
|
||||
else:
|
||||
import concurrent.futures
|
||||
|
||||
info: dict[str, Any] = {
|
||||
"filename": src_path.name,
|
||||
"size": str(dest.stat().st_size),
|
||||
"path": str(dest),
|
||||
"virtual_path": f"/mnt/user-data/uploads/{src_path.name}",
|
||||
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{src_path.name}",
|
||||
}
|
||||
# Reuse one worker when already inside an event loop to avoid
|
||||
# creating a new ThreadPoolExecutor per converted file.
|
||||
conversion_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
||||
|
||||
if src_path.suffix.lower() in CONVERTIBLE_EXTENSIONS:
|
||||
try:
|
||||
def _convert_in_thread(path: Path):
|
||||
return asyncio.run(convert_file_to_markdown(path))
|
||||
|
||||
try:
|
||||
for src_path in resolved_files:
|
||||
dest = uploads_dir / src_path.name
|
||||
shutil.copy2(src_path, dest)
|
||||
|
||||
info: dict[str, Any] = {
|
||||
"filename": src_path.name,
|
||||
"size": str(dest.stat().st_size),
|
||||
"path": str(dest),
|
||||
"virtual_path": f"/mnt/user-data/uploads/{src_path.name}",
|
||||
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{src_path.name}",
|
||||
}
|
||||
|
||||
if src_path.suffix.lower() in convertible_extensions:
|
||||
try:
|
||||
asyncio.get_running_loop()
|
||||
import concurrent.futures
|
||||
if conversion_pool is not None:
|
||||
md_path = conversion_pool.submit(_convert_in_thread, dest).result()
|
||||
else:
|
||||
md_path = asyncio.run(convert_file_to_markdown(dest))
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to convert %s to markdown",
|
||||
src_path.name,
|
||||
exc_info=True,
|
||||
)
|
||||
md_path = None
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as pool:
|
||||
md_path = pool.submit(lambda: asyncio.run(convert_file_to_markdown(dest))).result()
|
||||
except RuntimeError:
|
||||
md_path = asyncio.run(convert_file_to_markdown(dest))
|
||||
except Exception:
|
||||
logger.warning("Failed to convert %s to markdown", src_path.name, exc_info=True)
|
||||
md_path = None
|
||||
if md_path is not None:
|
||||
info["markdown_file"] = md_path.name
|
||||
info["markdown_virtual_path"] = f"/mnt/user-data/uploads/{md_path.name}"
|
||||
info["markdown_artifact_url"] = f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{md_path.name}"
|
||||
|
||||
if md_path is not None:
|
||||
info["markdown_file"] = md_path.name
|
||||
info["markdown_virtual_path"] = f"/mnt/user-data/uploads/{md_path.name}"
|
||||
info["markdown_artifact_url"] = f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{md_path.name}"
|
||||
|
||||
uploaded_files.append(info)
|
||||
uploaded_files.append(info)
|
||||
finally:
|
||||
if conversion_pool is not None:
|
||||
conversion_pool.shutdown(wait=True)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
@@ -791,20 +819,23 @@ class DeerFlowClient:
|
||||
return {"files": [], "count": 0}
|
||||
|
||||
files = []
|
||||
for fp in sorted(uploads_dir.iterdir()):
|
||||
if fp.is_file():
|
||||
stat = fp.stat()
|
||||
files.append(
|
||||
{
|
||||
"filename": fp.name,
|
||||
"size": str(stat.st_size),
|
||||
"path": str(fp),
|
||||
"virtual_path": f"/mnt/user-data/uploads/{fp.name}",
|
||||
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{fp.name}",
|
||||
"extension": fp.suffix,
|
||||
"modified": stat.st_mtime,
|
||||
}
|
||||
)
|
||||
with os.scandir(uploads_dir) as entries:
|
||||
file_entries = [entry for entry in entries if entry.is_file()]
|
||||
|
||||
for entry in sorted(file_entries, key=lambda item: item.name):
|
||||
stat = entry.stat()
|
||||
filename = entry.name
|
||||
files.append(
|
||||
{
|
||||
"filename": filename,
|
||||
"size": str(stat.st_size),
|
||||
"path": str(Path(entry.path)),
|
||||
"virtual_path": f"/mnt/user-data/uploads/{filename}",
|
||||
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{filename}",
|
||||
"extension": Path(filename).suffix,
|
||||
"modified": stat.st_mtime,
|
||||
}
|
||||
)
|
||||
return {"files": files, "count": len(files)}
|
||||
|
||||
def delete_upload(self, thread_id: str, filename: str) -> dict:
|
||||
|
||||
Reference in New Issue
Block a user