Implement optimistic UI for file uploads and enhance message handling (#967)

* feat(upload): implement optimistic UI for file uploads and enhance message handling * feat(middleware): enhance file handling by collecting historical uploads from directory * feat(thread-title): update page title handling for new threads and improve loading state * feat(uploads-middleware): enhance file extraction by verifying file existence in uploads directory * feat(thread-stream): update file path reference to use virtual_path for uploads * feat(tests): add core behaviour tests for UploadsMiddleware * feat(tests): remove unused pytest import from test_uploads_middleware_core_logic.py * feat: enhance file upload handling and localization support - Update UploadsMiddleware to validate filenames more robustly. - Modify MessageListItem to parse uploaded files from raw content for backward compatibility. - Add localization for uploading messages in English and Chinese. - Introduce parseUploadedFiles utility to extract uploaded files from message content.
2026-04-26 07:14:47 +08:00 · 2026-03-05 11:16:34 +08:00
parent 3ada4f98b1
commit b17c087174
9 changed files with 790 additions and 258 deletions
--- a/backend/src/agents/middlewares/uploads_middleware.py
+++ b/backend/src/agents/middlewares/uploads_middleware.py
@@ -1,6 +1,6 @@
 """Middleware to inject uploaded files information into agent context."""

-import re
+import logging
 from pathlib import Path
 from typing import NotRequired, override

@@ -11,6 +11,8 @@ from langgraph.runtime import Runtime

 from src.config.paths import Paths, get_paths

+logger = logging.getLogger(__name__)
+

 class UploadsMiddlewareState(AgentState):
    """State schema for uploads middleware."""
@@ -21,8 +23,9 @@ class UploadsMiddlewareState(AgentState):
 class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
    """Middleware to inject uploaded files information into the agent context.

-    This middleware lists all files in the thread's uploads directory and
-    adds a system message with the file list before the agent processes the request.
+    Reads file metadata from the current message's additional_kwargs.files
+    (set by the frontend after upload) and prepends an <uploaded_files> block
+    to the last human message so the model knows which files are available.
    """

    state_schema = UploadsMiddlewareState
@@ -36,111 +39,91 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
        super().__init__()
        self._paths = Paths(base_dir) if base_dir else get_paths()

-    def _get_uploads_dir(self, thread_id: str) -> Path:
-        """Get the uploads directory for a thread.
-
-        Args:
-            thread_id: The thread ID.
-
-        Returns:
-            Path to the uploads directory.
-        """
-        return self._paths.sandbox_uploads_dir(thread_id)
-
-    def _list_newly_uploaded_files(self, thread_id: str, last_message_files: set[str]) -> list[dict]:
-        """List only newly uploaded files that weren't in the last message.
-
-        Args:
-            thread_id: The thread ID.
-            last_message_files: Set of filenames that were already shown in previous messages.
-
-        Returns:
-            List of new file information dictionaries.
-        """
-        uploads_dir = self._get_uploads_dir(thread_id)
-
-        if not uploads_dir.exists():
-            return []
-
-        files = []
-        for file_path in sorted(uploads_dir.iterdir()):
-            if file_path.is_file() and file_path.name not in last_message_files:
-                stat = file_path.stat()
-                files.append(
-                    {
-                        "filename": file_path.name,
-                        "size": stat.st_size,
-                        "path": f"/mnt/user-data/uploads/{file_path.name}",
-                        "extension": file_path.suffix,
-                    }
-                )
-
-        return files
-
-    def _create_files_message(self, files: list[dict]) -> str:
+    def _create_files_message(self, new_files: list[dict], historical_files: list[dict]) -> str:
        """Create a formatted message listing uploaded files.

        Args:
-            files: List of file information dictionaries.
+            new_files: Files uploaded in the current message.
+            historical_files: Files uploaded in previous messages.

        Returns:
-            Formatted string listing the files.
+            Formatted string inside <uploaded_files> tags.
        """
-        if not files:
-            return "<uploaded_files>\nNo files have been uploaded yet.\n</uploaded_files>"
+        lines = ["<uploaded_files>"]

-        lines = ["<uploaded_files>", "The following files have been uploaded and are available for use:", ""]
-
-        for file in files:
+        lines.append("The following files were uploaded in this message:")
+        lines.append("")
+        for file in new_files:
            size_kb = file["size"] / 1024
-            if size_kb < 1024:
-                size_str = f"{size_kb:.1f} KB"
-            else:
-                size_str = f"{size_kb / 1024:.1f} MB"
-
+            size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
            lines.append(f"- {file['filename']} ({size_str})")
            lines.append(f"  Path: {file['path']}")
            lines.append("")

+        if historical_files:
+            lines.append("The following files were uploaded in previous messages and are still available:")
+            lines.append("")
+            for file in historical_files:
+                size_kb = file["size"] / 1024
+                size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
+                lines.append(f"- {file['filename']} ({size_str})")
+                lines.append(f"  Path: {file['path']}")
+                lines.append("")
+
        lines.append("You can read these files using the `read_file` tool with the paths shown above.")
        lines.append("</uploaded_files>")

        return "\n".join(lines)

-    def _extract_files_from_message(self, content: str) -> set[str]:
-        """Extract filenames from uploaded_files tag in message content.
+    def _files_from_kwargs(self, message: HumanMessage, uploads_dir: Path | None = None) -> list[dict] | None:
+        """Extract file info from message additional_kwargs.files.
+
+        The frontend sends uploaded file metadata in additional_kwargs.files
+        after a successful upload. Each entry has: filename, size (bytes),
+        path (virtual path), status.

        Args:
-            content: Message content that may contain <uploaded_files> tag.
+            message: The human message to inspect.
+            uploads_dir: Physical uploads directory used to verify file existence.
+                         When provided, entries whose files no longer exist are skipped.

        Returns:
-            Set of filenames mentioned in the tag.
+            List of file dicts with virtual paths, or None if the field is absent or empty.
        """
-        # Match <uploaded_files>...</uploaded_files> tag
-        match = re.search(r"<uploaded_files>([\s\S]*?)</uploaded_files>", content)
-        if not match:
-            return set()
+        kwargs_files = (message.additional_kwargs or {}).get("files")
+        if not isinstance(kwargs_files, list) or not kwargs_files:
+            return None

-        files_content = match.group(1)
-
-        # Extract filenames from lines like "- filename.ext (size)"
-        # Need to capture everything before the opening parenthesis, including spaces
-        filenames = set()
-        for line in files_content.split("\n"):
-            # Match pattern: - filename with spaces.ext (size)
-            # Changed from [^\s(]+ to [^(]+ to allow spaces in filename
-            file_match = re.match(r"^-\s+(.+?)\s*\(", line.strip())
-            if file_match:
-                filenames.add(file_match.group(1).strip())
-
-        return filenames
+        files = []
+        for f in kwargs_files:
+            if not isinstance(f, dict):
+                continue
+            filename = f.get("filename") or ""
+            if not filename or Path(filename).name != filename:
+                continue
+            if uploads_dir is not None and not (uploads_dir / filename).is_file():
+                continue
+            files.append(
+                {
+                    "filename": filename,
+                    "size": int(f.get("size") or 0),
+                    "path": f"/mnt/user-data/uploads/{filename}",
+                    "extension": Path(filename).suffix,
+                }
+            )
+        return files if files else None

    @override
    def before_agent(self, state: UploadsMiddlewareState, runtime: Runtime) -> dict | None:
        """Inject uploaded files information before agent execution.

-        Only injects files that weren't already shown in previous messages.
-        Prepends file info to the last human message content.
+        New files come from the current message's additional_kwargs.files.
+        Historical files are scanned from the thread's uploads directory,
+        excluding the new ones.
+
+        Prepends <uploaded_files> context to the last human message content.
+        The original additional_kwargs (including files metadata) is preserved
+        on the updated message so the frontend can read it from the stream.

        Args:
            state: Current agent state.
@@ -149,72 +132,70 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
        Returns:
            State updates including uploaded files list.
        """
-        import logging
-
-        logger = logging.getLogger(__name__)
-
-        thread_id = runtime.context.get("thread_id")
-        if thread_id is None:
-            return None
-
        messages = list(state.get("messages", []))
        if not messages:
            return None

-        # Track all filenames that have been shown in previous messages (EXCEPT the last one)
-        shown_files: set[str] = set()
-        for msg in messages[:-1]:  # Scan all messages except the last one
-            if isinstance(msg, HumanMessage):
-                content = msg.content if isinstance(msg.content, str) else ""
-                extracted = self._extract_files_from_message(content)
-                shown_files.update(extracted)
-                if extracted:
-                    logger.info(f"Found previously shown files: {extracted}")
-
-        logger.info(f"Total shown files from history: {shown_files}")
-
-        # List only newly uploaded files
-        files = self._list_newly_uploaded_files(thread_id, shown_files)
-        logger.info(f"Newly uploaded files to inject: {[f['filename'] for f in files]}")
-
-        if not files:
-            return None
-
-        # Find the last human message and prepend file info to it
        last_message_index = len(messages) - 1
        last_message = messages[last_message_index]

        if not isinstance(last_message, HumanMessage):
            return None

+        # Resolve uploads directory for existence checks
+        thread_id = runtime.context.get("thread_id")
+        uploads_dir = self._paths.sandbox_uploads_dir(thread_id) if thread_id else None
+
+        # Get newly uploaded files from the current message's additional_kwargs.files
+        new_files = self._files_from_kwargs(last_message, uploads_dir) or []
+
+        # Collect historical files from the uploads directory (all except the new ones)
+        new_filenames = {f["filename"] for f in new_files}
+        historical_files: list[dict] = []
+        if uploads_dir and uploads_dir.exists():
+            for file_path in sorted(uploads_dir.iterdir()):
+                if file_path.is_file() and file_path.name not in new_filenames:
+                    stat = file_path.stat()
+                    historical_files.append(
+                        {
+                            "filename": file_path.name,
+                            "size": stat.st_size,
+                            "path": f"/mnt/user-data/uploads/{file_path.name}",
+                            "extension": file_path.suffix,
+                        }
+                    )
+
+        if not new_files and not historical_files:
+            return None
+
+        logger.debug(f"New files: {[f['filename'] for f in new_files]}, historical: {[f['filename'] for f in historical_files]}")
+
        # Create files message and prepend to the last human message content
-        files_message = self._create_files_message(files)
+        files_message = self._create_files_message(new_files, historical_files)

        # Extract original content - handle both string and list formats
        original_content = ""
        if isinstance(last_message.content, str):
            original_content = last_message.content
        elif isinstance(last_message.content, list):
-            # Content is a list of content blocks (e.g., [{"type": "text", "text": "..."}])
            text_parts = []
            for block in last_message.content:
                if isinstance(block, dict) and block.get("type") == "text":
                    text_parts.append(block.get("text", ""))
            original_content = "\n".join(text_parts)

-        logger.info(f"Original message content: {original_content[:100] if original_content else '(empty)'}")
-
-        # Create new message with combined content
+        # Create new message with combined content.
+        # Preserve additional_kwargs (including files metadata) so the frontend
+        # can read structured file info from the streamed message.
        updated_message = HumanMessage(
            content=f"{files_message}\n\n{original_content}",
            id=last_message.id,
            additional_kwargs=last_message.additional_kwargs,
        )

-        # Replace the last message
        messages[last_message_index] = updated_message

        return {
-            "uploaded_files": files,
+            "uploaded_files": new_files,
            "messages": messages,
        }