From 0d3cefaa5a57f9c0bbaeabd2c391d474b33a6757 Mon Sep 17 00:00:00 2001 From: 13ernkastel Date: Thu, 26 Mar 2026 17:44:25 +0800 Subject: [PATCH] fix(gateway): enforce safe download for active artifact MIME types to mitigate stored XSS (#1389) * docs: refocus security review on high-confidence artifact XSS * fix(gateway): block inline active-content artifacts to mitigate XSS * chore: remove security review markdown from PR * Delete SECURITY_REVIEW.md * fix(gateway): harden artifact attachment handling --- README.md | 1 + backend/CLAUDE.md | 2 +- backend/app/gateway/routers/artifacts.py | 55 ++++++++++++----- backend/tests/test_artifacts_router.py | 79 +++++++++++++++++++++++- 4 files changed, 119 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index f5a0451..073099c 100644 --- a/README.md +++ b/README.md @@ -534,6 +534,7 @@ All dict-returning methods are validated against Gateway Pydantic response model We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, workflow, and guidelines. Regression coverage includes Docker sandbox mode detection and provisioner kubeconfig-path handling tests in `backend/tests/`. +Gateway artifact serving now forces active web content types (`text/html`, `application/xhtml+xml`, `image/svg+xml`) to download as attachments instead of inline rendering, reducing XSS risk for generated artifacts. ## License diff --git a/backend/CLAUDE.md b/backend/CLAUDE.md index 98d32d4..c10a693 100644 --- a/backend/CLAUDE.md +++ b/backend/CLAUDE.md @@ -208,7 +208,7 @@ FastAPI application on port 8001 with health check at `GET /health`. | **Memory** (`/api/memory`) | `GET /` - memory data; `POST /reload` - force reload; `GET /config` - config; `GET /status` - config + data | | **Uploads** (`/api/threads/{id}/uploads`) | `POST /` - upload files (auto-converts PDF/PPT/Excel/Word); `GET /list` - list; `DELETE /{filename}` - delete | | **Threads** (`/api/threads/{id}`) | `DELETE /` - remove DeerFlow-managed local thread data after LangGraph thread deletion; unexpected failures are logged server-side and return a generic 500 detail | -| **Artifacts** (`/api/threads/{id}/artifacts`) | `GET /{path}` - serve artifacts; `?download=true` for file download | +| **Artifacts** (`/api/threads/{id}/artifacts`) | `GET /{path}` - serve artifacts; active content types (`text/html`, `application/xhtml+xml`, `image/svg+xml`) are always forced as download attachments to reduce XSS risk; `?download=true` still forces download for other file types | | **Suggestions** (`/api/threads/{id}/suggestions`) | `POST /` - generate follow-up questions; rich list/block model content is normalized before JSON parsing | Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` → Gateway. diff --git a/backend/app/gateway/routers/artifacts.py b/backend/app/gateway/routers/artifacts.py index b9e8afb..a58fd5c 100644 --- a/backend/app/gateway/routers/artifacts.py +++ b/backend/app/gateway/routers/artifacts.py @@ -5,7 +5,7 @@ from pathlib import Path from urllib.parse import quote from fastapi import APIRouter, HTTPException, Request -from fastapi.responses import FileResponse, HTMLResponse, PlainTextResponse, Response +from fastapi.responses import FileResponse, PlainTextResponse, Response from app.gateway.path_utils import resolve_thread_virtual_path @@ -13,6 +13,24 @@ logger = logging.getLogger(__name__) router = APIRouter(prefix="/api", tags=["artifacts"]) +ACTIVE_CONTENT_MIME_TYPES = { + "text/html", + "application/xhtml+xml", + "image/svg+xml", +} + + +def _build_content_disposition(disposition_type: str, filename: str) -> str: + """Build an RFC 5987 encoded Content-Disposition header value.""" + return f"{disposition_type}; filename*=UTF-8''{quote(filename)}" + + +def _build_attachment_headers(filename: str, extra_headers: dict[str, str] | None = None) -> dict[str, str]: + headers = {"Content-Disposition": _build_content_disposition("attachment", filename)} + if extra_headers: + headers.update(extra_headers) + return headers + def is_text_file_by_content(path: Path, sample_size: int = 8192) -> bool: """Check if file is text by examining content for null bytes.""" @@ -61,13 +79,13 @@ def _extract_file_from_skill_archive(zip_path: Path, internal_path: str) -> byte @router.get( "/threads/{thread_id}/artifacts/{path:path}", summary="Get Artifact File", - description="Retrieve an artifact file generated by the AI agent. Supports text, HTML, and binary files.", + description="Retrieve an artifact file generated by the AI agent. Text and binary files can be viewed inline, while active web content is always downloaded.", ) -async def get_artifact(thread_id: str, path: str, request: Request) -> Response: +async def get_artifact(thread_id: str, path: str, request: Request, download: bool = False) -> Response: """Get an artifact file by its path. The endpoint automatically detects file types and returns appropriate content types. - Use the `?download=true` query parameter to force file download. + Use the `download` query parameter to force file download for non-active content. Args: thread_id: The thread ID. @@ -76,7 +94,7 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> Response: Returns: The file content as a FileResponse with appropriate content type: - - HTML files: Rendered as HTML + - Active content (HTML/XHTML/SVG): Served as download attachment - Text files: Plain text with proper MIME type - Binary files: Inline display with download option @@ -87,11 +105,14 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> Response: - 404 if file not found Query Parameters: - download (bool): If true, returns file as attachment for download + download (bool): If true, forces attachment download for file types that are + otherwise returned inline or as plain text. Active HTML/XHTML/SVG content + is always downloaded regardless of this flag. Example: - - Get HTML file: `/api/threads/abc123/artifacts/mnt/user-data/outputs/index.html` + - Get text file inline: `/api/threads/abc123/artifacts/mnt/user-data/outputs/notes.txt` - Download file: `/api/threads/abc123/artifacts/mnt/user-data/outputs/data.csv?download=true` + - Active web content such as `.html`, `.xhtml`, and `.svg` artifacts is always downloaded """ # Check if this is a request for a file inside a .skill archive (e.g., xxx.skill/SKILL.md) if ".skill/" in path: @@ -118,6 +139,10 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> Response: mime_type, _ = mimetypes.guess_type(internal_path) # Add cache headers to avoid repeated ZIP extraction (cache for 5 minutes) cache_headers = {"Cache-Control": "private, max-age=300"} + download_name = Path(internal_path).name or actual_skill_path.stem + if download or mime_type in ACTIVE_CONTENT_MIME_TYPES: + return Response(content=content, media_type=mime_type or "application/octet-stream", headers=_build_attachment_headers(download_name, cache_headers)) + if mime_type and mime_type.startswith("text/"): return PlainTextResponse(content=content.decode("utf-8"), media_type=mime_type, headers=cache_headers) @@ -139,15 +164,13 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> Response: mime_type, _ = mimetypes.guess_type(actual_path) - # Encode filename for Content-Disposition header (RFC 5987) - encoded_filename = quote(actual_path.name) + if download: + return FileResponse(path=actual_path, filename=actual_path.name, media_type=mime_type, headers=_build_attachment_headers(actual_path.name)) - # if `download` query parameter is true, return the file as a download - if request.query_params.get("download"): - return FileResponse(path=actual_path, filename=actual_path.name, media_type=mime_type, headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"}) - - if mime_type and mime_type == "text/html": - return HTMLResponse(content=actual_path.read_text(encoding="utf-8")) + # Always force download for active content types to prevent script execution + # in the application origin when users open generated artifacts. + if mime_type in ACTIVE_CONTENT_MIME_TYPES: + return FileResponse(path=actual_path, filename=actual_path.name, media_type=mime_type, headers=_build_attachment_headers(actual_path.name)) if mime_type and mime_type.startswith("text/"): return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type) @@ -155,4 +178,4 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> Response: if is_text_file_by_content(actual_path): return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type) - return Response(content=actual_path.read_bytes(), media_type=mime_type, headers={"Content-Disposition": f"inline; filename*=UTF-8''{encoded_filename}"}) + return Response(content=actual_path.read_bytes(), media_type=mime_type, headers={"Content-Disposition": _build_content_disposition("inline", actual_path.name)}) diff --git a/backend/tests/test_artifacts_router.py b/backend/tests/test_artifacts_router.py index 9a4e5dd..9a30ff4 100644 --- a/backend/tests/test_artifacts_router.py +++ b/backend/tests/test_artifacts_router.py @@ -1,10 +1,25 @@ import asyncio +import zipfile from pathlib import Path +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient from starlette.requests import Request +from starlette.responses import FileResponse import app.gateway.routers.artifacts as artifacts_router +ACTIVE_ARTIFACT_CASES = [ + ("poc.html", ""), + ("page.xhtml", 'hello'), + ("image.svg", ''), +] + + +def _make_request(query_string: bytes = b"") -> Request: + return Request({"type": "http", "method": "GET", "path": "/", "headers": [], "query_string": query_string}) + def test_get_artifact_reads_utf8_text_file_on_windows_locale(tmp_path, monkeypatch) -> None: artifact_path = tmp_path / "note.txt" @@ -20,8 +35,70 @@ def test_get_artifact_reads_utf8_text_file_on_windows_locale(tmp_path, monkeypat monkeypatch.setattr(Path, "read_text", read_text_with_gbk_default) monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path) - request = Request({"type": "http", "method": "GET", "path": "/", "headers": [], "query_string": b""}) + request = _make_request() response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/note.txt", request)) assert bytes(response.body).decode("utf-8") == text assert response.media_type == "text/plain" + + +@pytest.mark.parametrize(("filename", "content"), ACTIVE_ARTIFACT_CASES) +def test_get_artifact_forces_download_for_active_content(tmp_path, monkeypatch, filename: str, content: str) -> None: + artifact_path = tmp_path / filename + artifact_path.write_text(content, encoding="utf-8") + + monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path) + + response = asyncio.run(artifacts_router.get_artifact("thread-1", f"mnt/user-data/outputs/{filename}", _make_request())) + + assert isinstance(response, FileResponse) + assert response.headers.get("content-disposition", "").startswith("attachment;") + + +@pytest.mark.parametrize(("filename", "content"), ACTIVE_ARTIFACT_CASES) +def test_get_artifact_forces_download_for_active_content_in_skill_archive(tmp_path, monkeypatch, filename: str, content: str) -> None: + skill_path = tmp_path / "sample.skill" + with zipfile.ZipFile(skill_path, "w") as zip_ref: + zip_ref.writestr(filename, content) + + monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: skill_path) + + response = asyncio.run(artifacts_router.get_artifact("thread-1", f"mnt/user-data/outputs/sample.skill/{filename}", _make_request())) + + assert response.headers.get("content-disposition", "").startswith("attachment;") + assert bytes(response.body) == content.encode("utf-8") + + +def test_get_artifact_download_false_does_not_force_attachment(tmp_path, monkeypatch) -> None: + artifact_path = tmp_path / "note.txt" + artifact_path.write_text("hello", encoding="utf-8") + + monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path) + + app = FastAPI() + app.include_router(artifacts_router.router) + + with TestClient(app) as client: + response = client.get("/api/threads/thread-1/artifacts/mnt/user-data/outputs/note.txt?download=false") + + assert response.status_code == 200 + assert response.text == "hello" + assert "content-disposition" not in response.headers + + +def test_get_artifact_download_true_forces_attachment_for_skill_archive(tmp_path, monkeypatch) -> None: + skill_path = tmp_path / "sample.skill" + with zipfile.ZipFile(skill_path, "w") as zip_ref: + zip_ref.writestr("notes.txt", "hello") + + monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: skill_path) + + app = FastAPI() + app.include_router(artifacts_router.router) + + with TestClient(app) as client: + response = client.get("/api/threads/thread-1/artifacts/mnt/user-data/outputs/sample.skill/notes.txt?download=true") + + assert response.status_code == 200 + assert response.text == "hello" + assert response.headers.get("content-disposition", "").startswith("attachment;")