fix(gateway): enforce safe download for active artifact MIME types to mitigate stored XSS (#1389)

* docs: refocus security review on high-confidence artifact XSS

* fix(gateway): block inline active-content artifacts to mitigate XSS

* chore: remove security review markdown from PR

* Delete SECURITY_REVIEW.md

* fix(gateway): harden artifact attachment handling
This commit is contained in:
13ernkastel
2026-03-26 17:44:25 +08:00
committed by GitHub
parent b9583f7204
commit 0d3cefaa5a
4 changed files with 119 additions and 18 deletions

View File

@@ -534,6 +534,7 @@ All dict-returning methods are validated against Gateway Pydantic response model
We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, workflow, and guidelines. We welcome contributions! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, workflow, and guidelines.
Regression coverage includes Docker sandbox mode detection and provisioner kubeconfig-path handling tests in `backend/tests/`. Regression coverage includes Docker sandbox mode detection and provisioner kubeconfig-path handling tests in `backend/tests/`.
Gateway artifact serving now forces active web content types (`text/html`, `application/xhtml+xml`, `image/svg+xml`) to download as attachments instead of inline rendering, reducing XSS risk for generated artifacts.
## License ## License

View File

@@ -208,7 +208,7 @@ FastAPI application on port 8001 with health check at `GET /health`.
| **Memory** (`/api/memory`) | `GET /` - memory data; `POST /reload` - force reload; `GET /config` - config; `GET /status` - config + data | | **Memory** (`/api/memory`) | `GET /` - memory data; `POST /reload` - force reload; `GET /config` - config; `GET /status` - config + data |
| **Uploads** (`/api/threads/{id}/uploads`) | `POST /` - upload files (auto-converts PDF/PPT/Excel/Word); `GET /list` - list; `DELETE /{filename}` - delete | | **Uploads** (`/api/threads/{id}/uploads`) | `POST /` - upload files (auto-converts PDF/PPT/Excel/Word); `GET /list` - list; `DELETE /{filename}` - delete |
| **Threads** (`/api/threads/{id}`) | `DELETE /` - remove DeerFlow-managed local thread data after LangGraph thread deletion; unexpected failures are logged server-side and return a generic 500 detail | | **Threads** (`/api/threads/{id}`) | `DELETE /` - remove DeerFlow-managed local thread data after LangGraph thread deletion; unexpected failures are logged server-side and return a generic 500 detail |
| **Artifacts** (`/api/threads/{id}/artifacts`) | `GET /{path}` - serve artifacts; `?download=true` for file download | | **Artifacts** (`/api/threads/{id}/artifacts`) | `GET /{path}` - serve artifacts; active content types (`text/html`, `application/xhtml+xml`, `image/svg+xml`) are always forced as download attachments to reduce XSS risk; `?download=true` still forces download for other file types |
| **Suggestions** (`/api/threads/{id}/suggestions`) | `POST /` - generate follow-up questions; rich list/block model content is normalized before JSON parsing | | **Suggestions** (`/api/threads/{id}/suggestions`) | `POST /` - generate follow-up questions; rich list/block model content is normalized before JSON parsing |
Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` → Gateway. Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` → Gateway.

View File

@@ -5,7 +5,7 @@ from pathlib import Path
from urllib.parse import quote from urllib.parse import quote
from fastapi import APIRouter, HTTPException, Request from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import FileResponse, HTMLResponse, PlainTextResponse, Response from fastapi.responses import FileResponse, PlainTextResponse, Response
from app.gateway.path_utils import resolve_thread_virtual_path from app.gateway.path_utils import resolve_thread_virtual_path
@@ -13,6 +13,24 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api", tags=["artifacts"]) router = APIRouter(prefix="/api", tags=["artifacts"])
ACTIVE_CONTENT_MIME_TYPES = {
"text/html",
"application/xhtml+xml",
"image/svg+xml",
}
def _build_content_disposition(disposition_type: str, filename: str) -> str:
"""Build an RFC 5987 encoded Content-Disposition header value."""
return f"{disposition_type}; filename*=UTF-8''{quote(filename)}"
def _build_attachment_headers(filename: str, extra_headers: dict[str, str] | None = None) -> dict[str, str]:
headers = {"Content-Disposition": _build_content_disposition("attachment", filename)}
if extra_headers:
headers.update(extra_headers)
return headers
def is_text_file_by_content(path: Path, sample_size: int = 8192) -> bool: def is_text_file_by_content(path: Path, sample_size: int = 8192) -> bool:
"""Check if file is text by examining content for null bytes.""" """Check if file is text by examining content for null bytes."""
@@ -61,13 +79,13 @@ def _extract_file_from_skill_archive(zip_path: Path, internal_path: str) -> byte
@router.get( @router.get(
"/threads/{thread_id}/artifacts/{path:path}", "/threads/{thread_id}/artifacts/{path:path}",
summary="Get Artifact File", summary="Get Artifact File",
description="Retrieve an artifact file generated by the AI agent. Supports text, HTML, and binary files.", description="Retrieve an artifact file generated by the AI agent. Text and binary files can be viewed inline, while active web content is always downloaded.",
) )
async def get_artifact(thread_id: str, path: str, request: Request) -> Response: async def get_artifact(thread_id: str, path: str, request: Request, download: bool = False) -> Response:
"""Get an artifact file by its path. """Get an artifact file by its path.
The endpoint automatically detects file types and returns appropriate content types. The endpoint automatically detects file types and returns appropriate content types.
Use the `?download=true` query parameter to force file download. Use the `download` query parameter to force file download for non-active content.
Args: Args:
thread_id: The thread ID. thread_id: The thread ID.
@@ -76,7 +94,7 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> Response:
Returns: Returns:
The file content as a FileResponse with appropriate content type: The file content as a FileResponse with appropriate content type:
- HTML files: Rendered as HTML - Active content (HTML/XHTML/SVG): Served as download attachment
- Text files: Plain text with proper MIME type - Text files: Plain text with proper MIME type
- Binary files: Inline display with download option - Binary files: Inline display with download option
@@ -87,11 +105,14 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> Response:
- 404 if file not found - 404 if file not found
Query Parameters: Query Parameters:
download (bool): If true, returns file as attachment for download download (bool): If true, forces attachment download for file types that are
otherwise returned inline or as plain text. Active HTML/XHTML/SVG content
is always downloaded regardless of this flag.
Example: Example:
- Get HTML file: `/api/threads/abc123/artifacts/mnt/user-data/outputs/index.html` - Get text file inline: `/api/threads/abc123/artifacts/mnt/user-data/outputs/notes.txt`
- Download file: `/api/threads/abc123/artifacts/mnt/user-data/outputs/data.csv?download=true` - Download file: `/api/threads/abc123/artifacts/mnt/user-data/outputs/data.csv?download=true`
- Active web content such as `.html`, `.xhtml`, and `.svg` artifacts is always downloaded
""" """
# Check if this is a request for a file inside a .skill archive (e.g., xxx.skill/SKILL.md) # Check if this is a request for a file inside a .skill archive (e.g., xxx.skill/SKILL.md)
if ".skill/" in path: if ".skill/" in path:
@@ -118,6 +139,10 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> Response:
mime_type, _ = mimetypes.guess_type(internal_path) mime_type, _ = mimetypes.guess_type(internal_path)
# Add cache headers to avoid repeated ZIP extraction (cache for 5 minutes) # Add cache headers to avoid repeated ZIP extraction (cache for 5 minutes)
cache_headers = {"Cache-Control": "private, max-age=300"} cache_headers = {"Cache-Control": "private, max-age=300"}
download_name = Path(internal_path).name or actual_skill_path.stem
if download or mime_type in ACTIVE_CONTENT_MIME_TYPES:
return Response(content=content, media_type=mime_type or "application/octet-stream", headers=_build_attachment_headers(download_name, cache_headers))
if mime_type and mime_type.startswith("text/"): if mime_type and mime_type.startswith("text/"):
return PlainTextResponse(content=content.decode("utf-8"), media_type=mime_type, headers=cache_headers) return PlainTextResponse(content=content.decode("utf-8"), media_type=mime_type, headers=cache_headers)
@@ -139,15 +164,13 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> Response:
mime_type, _ = mimetypes.guess_type(actual_path) mime_type, _ = mimetypes.guess_type(actual_path)
# Encode filename for Content-Disposition header (RFC 5987) if download:
encoded_filename = quote(actual_path.name) return FileResponse(path=actual_path, filename=actual_path.name, media_type=mime_type, headers=_build_attachment_headers(actual_path.name))
# if `download` query parameter is true, return the file as a download # Always force download for active content types to prevent script execution
if request.query_params.get("download"): # in the application origin when users open generated artifacts.
return FileResponse(path=actual_path, filename=actual_path.name, media_type=mime_type, headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"}) if mime_type in ACTIVE_CONTENT_MIME_TYPES:
return FileResponse(path=actual_path, filename=actual_path.name, media_type=mime_type, headers=_build_attachment_headers(actual_path.name))
if mime_type and mime_type == "text/html":
return HTMLResponse(content=actual_path.read_text(encoding="utf-8"))
if mime_type and mime_type.startswith("text/"): if mime_type and mime_type.startswith("text/"):
return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type) return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
@@ -155,4 +178,4 @@ async def get_artifact(thread_id: str, path: str, request: Request) -> Response:
if is_text_file_by_content(actual_path): if is_text_file_by_content(actual_path):
return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type) return PlainTextResponse(content=actual_path.read_text(encoding="utf-8"), media_type=mime_type)
return Response(content=actual_path.read_bytes(), media_type=mime_type, headers={"Content-Disposition": f"inline; filename*=UTF-8''{encoded_filename}"}) return Response(content=actual_path.read_bytes(), media_type=mime_type, headers={"Content-Disposition": _build_content_disposition("inline", actual_path.name)})

View File

@@ -1,10 +1,25 @@
import asyncio import asyncio
import zipfile
from pathlib import Path from pathlib import Path
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from starlette.requests import Request from starlette.requests import Request
from starlette.responses import FileResponse
import app.gateway.routers.artifacts as artifacts_router import app.gateway.routers.artifacts as artifacts_router
ACTIVE_ARTIFACT_CASES = [
("poc.html", "<html><body><script>alert('xss')</script></body></html>"),
("page.xhtml", '<?xml version="1.0"?><html xmlns="http://www.w3.org/1999/xhtml"><body>hello</body></html>'),
("image.svg", '<svg xmlns="http://www.w3.org/2000/svg"><script>alert("xss")</script></svg>'),
]
def _make_request(query_string: bytes = b"") -> Request:
return Request({"type": "http", "method": "GET", "path": "/", "headers": [], "query_string": query_string})
def test_get_artifact_reads_utf8_text_file_on_windows_locale(tmp_path, monkeypatch) -> None: def test_get_artifact_reads_utf8_text_file_on_windows_locale(tmp_path, monkeypatch) -> None:
artifact_path = tmp_path / "note.txt" artifact_path = tmp_path / "note.txt"
@@ -20,8 +35,70 @@ def test_get_artifact_reads_utf8_text_file_on_windows_locale(tmp_path, monkeypat
monkeypatch.setattr(Path, "read_text", read_text_with_gbk_default) monkeypatch.setattr(Path, "read_text", read_text_with_gbk_default)
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path) monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
request = Request({"type": "http", "method": "GET", "path": "/", "headers": [], "query_string": b""}) request = _make_request()
response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/note.txt", request)) response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/note.txt", request))
assert bytes(response.body).decode("utf-8") == text assert bytes(response.body).decode("utf-8") == text
assert response.media_type == "text/plain" assert response.media_type == "text/plain"
@pytest.mark.parametrize(("filename", "content"), ACTIVE_ARTIFACT_CASES)
def test_get_artifact_forces_download_for_active_content(tmp_path, monkeypatch, filename: str, content: str) -> None:
artifact_path = tmp_path / filename
artifact_path.write_text(content, encoding="utf-8")
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
response = asyncio.run(artifacts_router.get_artifact("thread-1", f"mnt/user-data/outputs/{filename}", _make_request()))
assert isinstance(response, FileResponse)
assert response.headers.get("content-disposition", "").startswith("attachment;")
@pytest.mark.parametrize(("filename", "content"), ACTIVE_ARTIFACT_CASES)
def test_get_artifact_forces_download_for_active_content_in_skill_archive(tmp_path, monkeypatch, filename: str, content: str) -> None:
skill_path = tmp_path / "sample.skill"
with zipfile.ZipFile(skill_path, "w") as zip_ref:
zip_ref.writestr(filename, content)
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: skill_path)
response = asyncio.run(artifacts_router.get_artifact("thread-1", f"mnt/user-data/outputs/sample.skill/{filename}", _make_request()))
assert response.headers.get("content-disposition", "").startswith("attachment;")
assert bytes(response.body) == content.encode("utf-8")
def test_get_artifact_download_false_does_not_force_attachment(tmp_path, monkeypatch) -> None:
artifact_path = tmp_path / "note.txt"
artifact_path.write_text("hello", encoding="utf-8")
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
app = FastAPI()
app.include_router(artifacts_router.router)
with TestClient(app) as client:
response = client.get("/api/threads/thread-1/artifacts/mnt/user-data/outputs/note.txt?download=false")
assert response.status_code == 200
assert response.text == "hello"
assert "content-disposition" not in response.headers
def test_get_artifact_download_true_forces_attachment_for_skill_archive(tmp_path, monkeypatch) -> None:
skill_path = tmp_path / "sample.skill"
with zipfile.ZipFile(skill_path, "w") as zip_ref:
zip_ref.writestr("notes.txt", "hello")
monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: skill_path)
app = FastAPI()
app.include_router(artifacts_router.router)
with TestClient(app) as client:
response = client.get("/api/threads/thread-1/artifacts/mnt/user-data/outputs/sample.skill/notes.txt?download=true")
assert response.status_code == 200
assert response.text == "hello"
assert response.headers.get("content-disposition", "").startswith("attachment;")