mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-25 07:04:44 +08:00
fix(memory): prevent file upload events from persisting in long-term memory (#971)
* fix(memory): prevent file upload events from persisting in long-term memory Uploaded files are session-scoped and unavailable in future sessions. Previously, upload interactions were recorded in memory, causing the agent to search for non-existent files in subsequent conversations. Changes: - memory_middleware: skip human messages containing <uploaded_files> and their paired AI responses from the memory queue - updater: post-process generated memory to strip upload mentions before saving to file - prompt: instruct the memory LLM to ignore file upload events Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(memory): address Copilot review feedback on upload filtering - memory_middleware: strip <uploaded_files> block from human messages instead of dropping the entire turn; only skip the turn (and paired AI response) when nothing remains after stripping - updater: narrow the upload-scrubbing regex to explicit upload events (avoids false-positive removal of "User works with CSV files" etc.); also filter upload-event facts from the facts array - prompt: move `import re` to module scope; skip upload-only human messages (empty after stripping) rather than appending "User: " Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(memory): allow optional words between 'upload' and 'file' in scrub regex The previous pattern required 'uploading file' with no intervening words, so 'uploading a test file' was not matched and leaked into long-term memory. Allow up to 3 modifier words between the verb and noun (e.g. 'uploading a test file', 'uploaded the attachment'). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * test(memory): add unit tests for upload filtering in memory pipeline Covers _filter_messages_for_memory and _strip_upload_mentions_from_memory per Copilot review suggestion. 15 test cases verify: - Upload-only turns (and paired AI responses) are excluded from memory queue - User's real question is preserved when combined with an upload block - Upload file paths are never present in filtered message content - Intermediate tool messages are always excluded - Multi-turn conversations: only the upload turn is dropped - Multimodal (list-content) human messages are handled - Upload-event sentences are removed from summaries and facts - Legitimate file-related facts (CSV preferences, PDF exports) are preserved - "uploading a test file" (words between verb and noun) is caught by regex Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""Memory updater for reading, writing, and updating memory data."""
|
||||
|
||||
import json
|
||||
import re
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
@@ -135,6 +136,47 @@ def _load_memory_from_file(agent_name: str | None = None) -> dict[str, Any]:
|
||||
return _create_empty_memory()
|
||||
|
||||
|
||||
# Matches sentences that describe a file-upload *event* rather than general
|
||||
# file-related work. Deliberately narrow to avoid removing legitimate facts
|
||||
# such as "User works with CSV files" or "prefers PDF export".
|
||||
_UPLOAD_SENTENCE_RE = re.compile(
|
||||
r"[^.!?]*\b(?:"
|
||||
r"upload(?:ed|ing)?(?:\s+\w+){0,3}\s+(?:file|files?|document|documents?|attachment|attachments?)"
|
||||
r"|file\s+upload"
|
||||
r"|/mnt/user-data/uploads/"
|
||||
r"|<uploaded_files>"
|
||||
r")[^.!?]*[.!?]?\s*",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _strip_upload_mentions_from_memory(memory_data: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Remove sentences about file uploads from all memory summaries and facts.
|
||||
|
||||
Uploaded files are session-scoped; persisting upload events in long-term
|
||||
memory causes the agent to search for non-existent files in future sessions.
|
||||
"""
|
||||
# Scrub summaries in user/history sections
|
||||
for section in ("user", "history"):
|
||||
section_data = memory_data.get(section, {})
|
||||
for _key, val in section_data.items():
|
||||
if isinstance(val, dict) and "summary" in val:
|
||||
cleaned = _UPLOAD_SENTENCE_RE.sub("", val["summary"]).strip()
|
||||
cleaned = re.sub(r" +", " ", cleaned)
|
||||
val["summary"] = cleaned
|
||||
|
||||
# Also remove any facts that describe upload events
|
||||
facts = memory_data.get("facts", [])
|
||||
if facts:
|
||||
memory_data["facts"] = [
|
||||
f
|
||||
for f in facts
|
||||
if not _UPLOAD_SENTENCE_RE.search(f.get("content", ""))
|
||||
]
|
||||
|
||||
return memory_data
|
||||
|
||||
|
||||
def _save_memory_to_file(memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
|
||||
"""Save memory data to file and update cache.
|
||||
|
||||
@@ -244,6 +286,12 @@ class MemoryUpdater:
|
||||
# Apply updates
|
||||
updated_memory = self._apply_updates(current_memory, update_data, thread_id)
|
||||
|
||||
# Strip file-upload mentions from all summaries before saving.
|
||||
# Uploaded files are session-scoped and won't exist in future sessions,
|
||||
# so recording upload events in long-term memory causes the agent to
|
||||
# try (and fail) to locate those files in subsequent conversations.
|
||||
updated_memory = _strip_upload_mentions_from_memory(updated_memory)
|
||||
|
||||
# Save
|
||||
return _save_memory_to_file(updated_memory, agent_name)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user