fix(client): Harden upload validation and conversion flow (#989)

* fix(client): Harden upload validation and conversion flow

* test(client): cover event-loop upload conversion reuse

* test(client): remove unused import in upload regression coverage

* fix(client): load optional shared checkpointer fallback

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>

* docs(backend): document config preflight and IM channels

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>

---------

Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
Ryanba
2026-03-11 15:17:31 +08:00
committed by GitHub
parent a0c38a5cf3
commit 4bae3c724c
4 changed files with 174 additions and 49 deletions

View File

@@ -19,6 +19,7 @@ import asyncio
import json
import logging
import mimetypes
import os
import re
import shutil
import tempfile
@@ -723,52 +724,79 @@ class DeerFlowClient:
Raises:
FileNotFoundError: If any file does not exist.
ValueError: If any supplied path exists but is not a regular file.
"""
from src.gateway.routers.uploads import CONVERTIBLE_EXTENSIONS, convert_file_to_markdown
# Validate all files upfront to avoid partial uploads.
resolved_files = []
convertible_extensions = {ext.lower() for ext in CONVERTIBLE_EXTENSIONS}
has_convertible_file = False
for f in files:
p = Path(f)
if not p.exists():
raise FileNotFoundError(f"File not found: {f}")
if not p.is_file():
raise ValueError(f"Path is not a file: {f}")
resolved_files.append(p)
if not has_convertible_file and p.suffix.lower() in convertible_extensions:
has_convertible_file = True
uploads_dir = self._get_uploads_dir(thread_id)
uploaded_files: list[dict] = []
for src_path in resolved_files:
dest = uploads_dir / src_path.name
shutil.copy2(src_path, dest)
conversion_pool = None
if has_convertible_file:
try:
asyncio.get_running_loop()
except RuntimeError:
conversion_pool = None
else:
import concurrent.futures
info: dict[str, Any] = {
"filename": src_path.name,
"size": str(dest.stat().st_size),
"path": str(dest),
"virtual_path": f"/mnt/user-data/uploads/{src_path.name}",
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{src_path.name}",
}
# Reuse one worker when already inside an event loop to avoid
# creating a new ThreadPoolExecutor per converted file.
conversion_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
if src_path.suffix.lower() in CONVERTIBLE_EXTENSIONS:
try:
def _convert_in_thread(path: Path):
return asyncio.run(convert_file_to_markdown(path))
try:
for src_path in resolved_files:
dest = uploads_dir / src_path.name
shutil.copy2(src_path, dest)
info: dict[str, Any] = {
"filename": src_path.name,
"size": str(dest.stat().st_size),
"path": str(dest),
"virtual_path": f"/mnt/user-data/uploads/{src_path.name}",
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{src_path.name}",
}
if src_path.suffix.lower() in convertible_extensions:
try:
asyncio.get_running_loop()
import concurrent.futures
if conversion_pool is not None:
md_path = conversion_pool.submit(_convert_in_thread, dest).result()
else:
md_path = asyncio.run(convert_file_to_markdown(dest))
except Exception:
logger.warning(
"Failed to convert %s to markdown",
src_path.name,
exc_info=True,
)
md_path = None
with concurrent.futures.ThreadPoolExecutor() as pool:
md_path = pool.submit(lambda: asyncio.run(convert_file_to_markdown(dest))).result()
except RuntimeError:
md_path = asyncio.run(convert_file_to_markdown(dest))
except Exception:
logger.warning("Failed to convert %s to markdown", src_path.name, exc_info=True)
md_path = None
if md_path is not None:
info["markdown_file"] = md_path.name
info["markdown_virtual_path"] = f"/mnt/user-data/uploads/{md_path.name}"
info["markdown_artifact_url"] = f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{md_path.name}"
if md_path is not None:
info["markdown_file"] = md_path.name
info["markdown_virtual_path"] = f"/mnt/user-data/uploads/{md_path.name}"
info["markdown_artifact_url"] = f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{md_path.name}"
uploaded_files.append(info)
uploaded_files.append(info)
finally:
if conversion_pool is not None:
conversion_pool.shutdown(wait=True)
return {
"success": True,
@@ -791,20 +819,23 @@ class DeerFlowClient:
return {"files": [], "count": 0}
files = []
for fp in sorted(uploads_dir.iterdir()):
if fp.is_file():
stat = fp.stat()
files.append(
{
"filename": fp.name,
"size": str(stat.st_size),
"path": str(fp),
"virtual_path": f"/mnt/user-data/uploads/{fp.name}",
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{fp.name}",
"extension": fp.suffix,
"modified": stat.st_mtime,
}
)
with os.scandir(uploads_dir) as entries:
file_entries = [entry for entry in entries if entry.is_file()]
for entry in sorted(file_entries, key=lambda item: item.name):
stat = entry.stat()
filename = entry.name
files.append(
{
"filename": filename,
"size": str(stat.st_size),
"path": str(Path(entry.path)),
"virtual_path": f"/mnt/user-data/uploads/{filename}",
"artifact_url": f"/api/threads/{thread_id}/artifacts/mnt/user-data/uploads/{filename}",
"extension": Path(filename).suffix,
"modified": stat.st_mtime,
}
)
return {"files": files, "count": len(files)}
def delete_upload(self, thread_id: str, filename: str) -> dict: