mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-19 04:14:46 +08:00
fix(memory): prevent file upload events from persisting in long-term memory (#971)
* fix(memory): prevent file upload events from persisting in long-term memory Uploaded files are session-scoped and unavailable in future sessions. Previously, upload interactions were recorded in memory, causing the agent to search for non-existent files in subsequent conversations. Changes: - memory_middleware: skip human messages containing <uploaded_files> and their paired AI responses from the memory queue - updater: post-process generated memory to strip upload mentions before saving to file - prompt: instruct the memory LLM to ignore file upload events Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(memory): address Copilot review feedback on upload filtering - memory_middleware: strip <uploaded_files> block from human messages instead of dropping the entire turn; only skip the turn (and paired AI response) when nothing remains after stripping - updater: narrow the upload-scrubbing regex to explicit upload events (avoids false-positive removal of "User works with CSV files" etc.); also filter upload-event facts from the facts array - prompt: move `import re` to module scope; skip upload-only human messages (empty after stripping) rather than appending "User: " Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(memory): allow optional words between 'upload' and 'file' in scrub regex The previous pattern required 'uploading file' with no intervening words, so 'uploading a test file' was not matched and leaked into long-term memory. Allow up to 3 modifier words between the verb and noun (e.g. 'uploading a test file', 'uploaded the attachment'). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * test(memory): add unit tests for upload filtering in memory pipeline Covers _filter_messages_for_memory and _strip_upload_mentions_from_memory per Copilot review suggestion. 15 test cases verify: - Upload-only turns (and paired AI responses) are excluded from memory queue - User's real question is preserved when combined with an upload block - Upload file paths are never present in filtered message content - Intermediate tool messages are always excluded - Multi-turn conversations: only the upload turn is dropped - Multimodal (list-content) human messages are handled - Upload-event sentences are removed from summaries and facts - Legitimate file-related facts (CSV preferences, PDF exports) are preserved - "uploading a test file" (words between verb and noun) is caught by regex Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
"""Prompt templates for memory update and injection."""
|
"""Prompt templates for memory update and injection."""
|
||||||
|
|
||||||
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -108,6 +109,9 @@ Important Rules:
|
|||||||
- For history sections, integrate new information chronologically into appropriate time period
|
- For history sections, integrate new information chronologically into appropriate time period
|
||||||
- Preserve technical accuracy - keep exact names of technologies, companies, projects
|
- Preserve technical accuracy - keep exact names of technologies, companies, projects
|
||||||
- Focus on information useful for future interactions and personalization
|
- Focus on information useful for future interactions and personalization
|
||||||
|
- IMPORTANT: Do NOT record file upload events in memory. Uploaded files are
|
||||||
|
session-specific and ephemeral — they will not be accessible in future sessions.
|
||||||
|
Recording upload events causes confusion in subsequent conversations.
|
||||||
|
|
||||||
Return ONLY valid JSON, no explanation or markdown."""
|
Return ONLY valid JSON, no explanation or markdown."""
|
||||||
|
|
||||||
@@ -249,6 +253,16 @@ def format_conversation_for_update(messages: list[Any]) -> str:
|
|||||||
text_parts = [p.get("text", "") for p in content if isinstance(p, dict) and "text" in p]
|
text_parts = [p.get("text", "") for p in content if isinstance(p, dict) and "text" in p]
|
||||||
content = " ".join(text_parts) if text_parts else str(content)
|
content = " ".join(text_parts) if text_parts else str(content)
|
||||||
|
|
||||||
|
# Strip uploaded_files tags from human messages to avoid persisting
|
||||||
|
# ephemeral file path info into long-term memory. Skip the turn entirely
|
||||||
|
# when nothing remains after stripping (upload-only message).
|
||||||
|
if role == "human":
|
||||||
|
content = re.sub(
|
||||||
|
r"<uploaded_files>[\s\S]*?</uploaded_files>\n*", "", str(content)
|
||||||
|
).strip()
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
|
|
||||||
# Truncate very long messages
|
# Truncate very long messages
|
||||||
if len(str(content)) > 1000:
|
if len(str(content)) > 1000:
|
||||||
content = str(content)[:1000] + "..."
|
content = str(content)[:1000] + "..."
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
"""Memory updater for reading, writing, and updating memory data."""
|
"""Memory updater for reading, writing, and updating memory data."""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -135,6 +136,47 @@ def _load_memory_from_file(agent_name: str | None = None) -> dict[str, Any]:
|
|||||||
return _create_empty_memory()
|
return _create_empty_memory()
|
||||||
|
|
||||||
|
|
||||||
|
# Matches sentences that describe a file-upload *event* rather than general
|
||||||
|
# file-related work. Deliberately narrow to avoid removing legitimate facts
|
||||||
|
# such as "User works with CSV files" or "prefers PDF export".
|
||||||
|
_UPLOAD_SENTENCE_RE = re.compile(
|
||||||
|
r"[^.!?]*\b(?:"
|
||||||
|
r"upload(?:ed|ing)?(?:\s+\w+){0,3}\s+(?:file|files?|document|documents?|attachment|attachments?)"
|
||||||
|
r"|file\s+upload"
|
||||||
|
r"|/mnt/user-data/uploads/"
|
||||||
|
r"|<uploaded_files>"
|
||||||
|
r")[^.!?]*[.!?]?\s*",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_upload_mentions_from_memory(memory_data: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""Remove sentences about file uploads from all memory summaries and facts.
|
||||||
|
|
||||||
|
Uploaded files are session-scoped; persisting upload events in long-term
|
||||||
|
memory causes the agent to search for non-existent files in future sessions.
|
||||||
|
"""
|
||||||
|
# Scrub summaries in user/history sections
|
||||||
|
for section in ("user", "history"):
|
||||||
|
section_data = memory_data.get(section, {})
|
||||||
|
for _key, val in section_data.items():
|
||||||
|
if isinstance(val, dict) and "summary" in val:
|
||||||
|
cleaned = _UPLOAD_SENTENCE_RE.sub("", val["summary"]).strip()
|
||||||
|
cleaned = re.sub(r" +", " ", cleaned)
|
||||||
|
val["summary"] = cleaned
|
||||||
|
|
||||||
|
# Also remove any facts that describe upload events
|
||||||
|
facts = memory_data.get("facts", [])
|
||||||
|
if facts:
|
||||||
|
memory_data["facts"] = [
|
||||||
|
f
|
||||||
|
for f in facts
|
||||||
|
if not _UPLOAD_SENTENCE_RE.search(f.get("content", ""))
|
||||||
|
]
|
||||||
|
|
||||||
|
return memory_data
|
||||||
|
|
||||||
|
|
||||||
def _save_memory_to_file(memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
|
def _save_memory_to_file(memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
|
||||||
"""Save memory data to file and update cache.
|
"""Save memory data to file and update cache.
|
||||||
|
|
||||||
@@ -244,6 +286,12 @@ class MemoryUpdater:
|
|||||||
# Apply updates
|
# Apply updates
|
||||||
updated_memory = self._apply_updates(current_memory, update_data, thread_id)
|
updated_memory = self._apply_updates(current_memory, update_data, thread_id)
|
||||||
|
|
||||||
|
# Strip file-upload mentions from all summaries before saving.
|
||||||
|
# Uploaded files are session-scoped and won't exist in future sessions,
|
||||||
|
# so recording upload events in long-term memory causes the agent to
|
||||||
|
# try (and fail) to locate those files in subsequent conversations.
|
||||||
|
updated_memory = _strip_upload_mentions_from_memory(updated_memory)
|
||||||
|
|
||||||
# Save
|
# Save
|
||||||
return _save_memory_to_file(updated_memory, agent_name)
|
return _save_memory_to_file(updated_memory, agent_name)
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
"""Middleware for memory mechanism."""
|
"""Middleware for memory mechanism."""
|
||||||
|
|
||||||
|
import re
|
||||||
from typing import Any, override
|
from typing import Any, override
|
||||||
|
|
||||||
from langchain.agents import AgentState
|
from langchain.agents import AgentState
|
||||||
@@ -22,10 +23,16 @@ def _filter_messages_for_memory(messages: list[Any]) -> list[Any]:
|
|||||||
This filters out:
|
This filters out:
|
||||||
- Tool messages (intermediate tool call results)
|
- Tool messages (intermediate tool call results)
|
||||||
- AI messages with tool_calls (intermediate steps, not final responses)
|
- AI messages with tool_calls (intermediate steps, not final responses)
|
||||||
|
- The <uploaded_files> block injected by UploadsMiddleware into human messages
|
||||||
|
(file paths are session-scoped and must not persist in long-term memory).
|
||||||
|
The user's actual question is preserved; only turns whose content is entirely
|
||||||
|
the upload block (nothing remains after stripping) are dropped along with
|
||||||
|
their paired assistant response.
|
||||||
|
|
||||||
Only keeps:
|
Only keeps:
|
||||||
- Human messages (user input)
|
- Human messages (with the ephemeral upload block removed)
|
||||||
- AI messages without tool_calls (final assistant responses)
|
- AI messages without tool_calls (final assistant responses), unless the
|
||||||
|
paired human turn was upload-only and had no real user text.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
messages: List of all conversation messages.
|
messages: List of all conversation messages.
|
||||||
@@ -33,17 +40,47 @@ def _filter_messages_for_memory(messages: list[Any]) -> list[Any]:
|
|||||||
Returns:
|
Returns:
|
||||||
Filtered list containing only user inputs and final assistant responses.
|
Filtered list containing only user inputs and final assistant responses.
|
||||||
"""
|
"""
|
||||||
|
_UPLOAD_BLOCK_RE = re.compile(
|
||||||
|
r"<uploaded_files>[\s\S]*?</uploaded_files>\n*", re.IGNORECASE
|
||||||
|
)
|
||||||
|
|
||||||
filtered = []
|
filtered = []
|
||||||
|
skip_next_ai = False
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
msg_type = getattr(msg, "type", None)
|
msg_type = getattr(msg, "type", None)
|
||||||
|
|
||||||
if msg_type == "human":
|
if msg_type == "human":
|
||||||
# Always keep user messages
|
content = getattr(msg, "content", "")
|
||||||
filtered.append(msg)
|
if isinstance(content, list):
|
||||||
|
content = " ".join(
|
||||||
|
p.get("text", "") for p in content if isinstance(p, dict)
|
||||||
|
)
|
||||||
|
content_str = str(content)
|
||||||
|
if "<uploaded_files>" in content_str:
|
||||||
|
# Strip the ephemeral upload block; keep the user's real question.
|
||||||
|
stripped = _UPLOAD_BLOCK_RE.sub("", content_str).strip()
|
||||||
|
if not stripped:
|
||||||
|
# Nothing left — the entire turn was upload bookkeeping;
|
||||||
|
# skip it and the paired assistant response.
|
||||||
|
skip_next_ai = True
|
||||||
|
continue
|
||||||
|
# Rebuild the message with cleaned content so the user's question
|
||||||
|
# is still available for memory summarisation.
|
||||||
|
from copy import copy
|
||||||
|
|
||||||
|
clean_msg = copy(msg)
|
||||||
|
clean_msg.content = stripped
|
||||||
|
filtered.append(clean_msg)
|
||||||
|
skip_next_ai = False
|
||||||
|
else:
|
||||||
|
filtered.append(msg)
|
||||||
|
skip_next_ai = False
|
||||||
elif msg_type == "ai":
|
elif msg_type == "ai":
|
||||||
# Only keep AI messages that are final responses (no tool_calls)
|
|
||||||
tool_calls = getattr(msg, "tool_calls", None)
|
tool_calls = getattr(msg, "tool_calls", None)
|
||||||
if not tool_calls:
|
if not tool_calls:
|
||||||
|
if skip_next_ai:
|
||||||
|
skip_next_ai = False
|
||||||
|
continue
|
||||||
filtered.append(msg)
|
filtered.append(msg)
|
||||||
# Skip tool messages and AI messages with tool_calls
|
# Skip tool messages and AI messages with tool_calls
|
||||||
|
|
||||||
|
|||||||
232
backend/tests/test_memory_upload_filtering.py
Normal file
232
backend/tests/test_memory_upload_filtering.py
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
"""Tests for upload-event filtering in the memory pipeline.
|
||||||
|
|
||||||
|
Covers two functions introduced to prevent ephemeral file-upload context from
|
||||||
|
persisting in long-term memory:
|
||||||
|
|
||||||
|
- _filter_messages_for_memory (memory_middleware)
|
||||||
|
- _strip_upload_mentions_from_memory (updater)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
|
||||||
|
|
||||||
|
from src.agents.memory.updater import _strip_upload_mentions_from_memory
|
||||||
|
from src.agents.middlewares.memory_middleware import _filter_messages_for_memory
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_UPLOAD_BLOCK = (
|
||||||
|
"<uploaded_files>\n"
|
||||||
|
"The following files have been uploaded and are available for use:\n\n"
|
||||||
|
"- filename: secret.txt\n"
|
||||||
|
" path: /mnt/user-data/uploads/abc123/secret.txt\n"
|
||||||
|
" size: 42 bytes\n"
|
||||||
|
"</uploaded_files>"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _human(text: str) -> HumanMessage:
|
||||||
|
return HumanMessage(content=text)
|
||||||
|
|
||||||
|
|
||||||
|
def _ai(text: str, tool_calls=None) -> AIMessage:
|
||||||
|
msg = AIMessage(content=text)
|
||||||
|
if tool_calls:
|
||||||
|
msg.tool_calls = tool_calls
|
||||||
|
return msg
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# _filter_messages_for_memory
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestFilterMessagesForMemory:
|
||||||
|
# --- upload-only turns are excluded ---
|
||||||
|
|
||||||
|
def test_upload_only_turn_is_excluded(self):
|
||||||
|
"""A human turn containing only <uploaded_files> (no real question)
|
||||||
|
and its paired AI response must both be dropped."""
|
||||||
|
msgs = [
|
||||||
|
_human(_UPLOAD_BLOCK),
|
||||||
|
_ai("I have read the file. It says: Hello."),
|
||||||
|
]
|
||||||
|
result = _filter_messages_for_memory(msgs)
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_upload_with_real_question_preserves_question(self):
|
||||||
|
"""When the user asks a question alongside an upload, the question text
|
||||||
|
must reach the memory queue (upload block stripped, AI response kept)."""
|
||||||
|
combined = _UPLOAD_BLOCK + "\n\nWhat does this file contain?"
|
||||||
|
msgs = [
|
||||||
|
_human(combined),
|
||||||
|
_ai("The file contains: Hello DeerFlow."),
|
||||||
|
]
|
||||||
|
result = _filter_messages_for_memory(msgs)
|
||||||
|
|
||||||
|
assert len(result) == 2
|
||||||
|
human_result = result[0]
|
||||||
|
assert "<uploaded_files>" not in human_result.content
|
||||||
|
assert "What does this file contain?" in human_result.content
|
||||||
|
assert result[1].content == "The file contains: Hello DeerFlow."
|
||||||
|
|
||||||
|
# --- non-upload turns pass through unchanged ---
|
||||||
|
|
||||||
|
def test_plain_conversation_passes_through(self):
|
||||||
|
msgs = [
|
||||||
|
_human("What is the capital of France?"),
|
||||||
|
_ai("The capital of France is Paris."),
|
||||||
|
]
|
||||||
|
result = _filter_messages_for_memory(msgs)
|
||||||
|
assert len(result) == 2
|
||||||
|
assert result[0].content == "What is the capital of France?"
|
||||||
|
assert result[1].content == "The capital of France is Paris."
|
||||||
|
|
||||||
|
def test_tool_messages_are_excluded(self):
|
||||||
|
"""Intermediate tool messages must never reach memory."""
|
||||||
|
msgs = [
|
||||||
|
_human("Search for something"),
|
||||||
|
_ai("Calling search tool", tool_calls=[{"name": "search", "id": "1", "args": {}}]),
|
||||||
|
ToolMessage(content="Search results", tool_call_id="1"),
|
||||||
|
_ai("Here are the results."),
|
||||||
|
]
|
||||||
|
result = _filter_messages_for_memory(msgs)
|
||||||
|
human_msgs = [m for m in result if m.type == "human"]
|
||||||
|
ai_msgs = [m for m in result if m.type == "ai"]
|
||||||
|
assert len(human_msgs) == 1
|
||||||
|
assert len(ai_msgs) == 1
|
||||||
|
assert ai_msgs[0].content == "Here are the results."
|
||||||
|
|
||||||
|
def test_multi_turn_with_upload_in_middle(self):
|
||||||
|
"""Only the upload turn is dropped; surrounding non-upload turns survive."""
|
||||||
|
msgs = [
|
||||||
|
_human("Hello, how are you?"),
|
||||||
|
_ai("I'm doing well, thank you!"),
|
||||||
|
_human(_UPLOAD_BLOCK), # upload-only → dropped
|
||||||
|
_ai("I read the uploaded file."), # paired AI → dropped
|
||||||
|
_human("What is 2 + 2?"),
|
||||||
|
_ai("4"),
|
||||||
|
]
|
||||||
|
result = _filter_messages_for_memory(msgs)
|
||||||
|
human_contents = [m.content for m in result if m.type == "human"]
|
||||||
|
ai_contents = [m.content for m in result if m.type == "ai"]
|
||||||
|
|
||||||
|
assert "Hello, how are you?" in human_contents
|
||||||
|
assert "What is 2 + 2?" in human_contents
|
||||||
|
assert _UPLOAD_BLOCK not in human_contents
|
||||||
|
assert "I'm doing well, thank you!" in ai_contents
|
||||||
|
assert "4" in ai_contents
|
||||||
|
# The upload-paired AI response must NOT appear
|
||||||
|
assert "I read the uploaded file." not in ai_contents
|
||||||
|
|
||||||
|
def test_multimodal_content_list_handled(self):
|
||||||
|
"""Human messages with list-style content (multimodal) are handled."""
|
||||||
|
msg = HumanMessage(content=[
|
||||||
|
{"type": "text", "text": _UPLOAD_BLOCK},
|
||||||
|
])
|
||||||
|
msgs = [msg, _ai("Done.")]
|
||||||
|
result = _filter_messages_for_memory(msgs)
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
def test_file_path_not_in_filtered_content(self):
|
||||||
|
"""After filtering, no upload file path should appear in any message."""
|
||||||
|
combined = _UPLOAD_BLOCK + "\n\nSummarise the file please."
|
||||||
|
msgs = [_human(combined), _ai("It says hello.")]
|
||||||
|
result = _filter_messages_for_memory(msgs)
|
||||||
|
all_content = " ".join(
|
||||||
|
m.content for m in result if isinstance(m.content, str)
|
||||||
|
)
|
||||||
|
assert "/mnt/user-data/uploads/" not in all_content
|
||||||
|
assert "<uploaded_files>" not in all_content
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# _strip_upload_mentions_from_memory
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestStripUploadMentionsFromMemory:
|
||||||
|
def _make_memory(self, summary: str, facts: list[dict] | None = None) -> dict:
|
||||||
|
return {
|
||||||
|
"user": {"topOfMind": {"summary": summary}},
|
||||||
|
"history": {"recentMonths": {"summary": ""}},
|
||||||
|
"facts": facts or [],
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- summaries ---
|
||||||
|
|
||||||
|
def test_upload_event_sentence_removed_from_summary(self):
|
||||||
|
mem = self._make_memory(
|
||||||
|
"User is interested in AI. "
|
||||||
|
"User uploaded a test file for verification purposes. "
|
||||||
|
"User prefers concise answers."
|
||||||
|
)
|
||||||
|
result = _strip_upload_mentions_from_memory(mem)
|
||||||
|
summary = result["user"]["topOfMind"]["summary"]
|
||||||
|
assert "uploaded a test file" not in summary
|
||||||
|
assert "User is interested in AI" in summary
|
||||||
|
assert "User prefers concise answers" in summary
|
||||||
|
|
||||||
|
def test_upload_path_sentence_removed_from_summary(self):
|
||||||
|
mem = self._make_memory(
|
||||||
|
"User uses Python. "
|
||||||
|
"User uploaded file to /mnt/user-data/uploads/tid/data.csv. "
|
||||||
|
"User likes clean code."
|
||||||
|
)
|
||||||
|
result = _strip_upload_mentions_from_memory(mem)
|
||||||
|
summary = result["user"]["topOfMind"]["summary"]
|
||||||
|
assert "/mnt/user-data/uploads/" not in summary
|
||||||
|
assert "User uses Python" in summary
|
||||||
|
|
||||||
|
def test_legitimate_csv_mention_is_preserved(self):
|
||||||
|
"""'User works with CSV files' must NOT be deleted — it's not an upload event."""
|
||||||
|
mem = self._make_memory("User regularly works with CSV files for data analysis.")
|
||||||
|
result = _strip_upload_mentions_from_memory(mem)
|
||||||
|
assert "CSV files" in result["user"]["topOfMind"]["summary"]
|
||||||
|
|
||||||
|
def test_pdf_export_preference_preserved(self):
|
||||||
|
"""'Prefers PDF export' is a legitimate preference, not an upload event."""
|
||||||
|
mem = self._make_memory("User prefers PDF export for reports.")
|
||||||
|
result = _strip_upload_mentions_from_memory(mem)
|
||||||
|
assert "PDF export" in result["user"]["topOfMind"]["summary"]
|
||||||
|
|
||||||
|
def test_uploading_a_test_file_removed(self):
|
||||||
|
"""'uploading a test file' (with intervening words) must be caught."""
|
||||||
|
mem = self._make_memory(
|
||||||
|
"User conducted a hands-on test by uploading a test file titled "
|
||||||
|
"'test_deerflow_memory_bug.txt'. User is also learning Python."
|
||||||
|
)
|
||||||
|
result = _strip_upload_mentions_from_memory(mem)
|
||||||
|
summary = result["user"]["topOfMind"]["summary"]
|
||||||
|
assert "test_deerflow_memory_bug.txt" not in summary
|
||||||
|
assert "uploading a test file" not in summary
|
||||||
|
|
||||||
|
# --- facts ---
|
||||||
|
|
||||||
|
def test_upload_fact_removed_from_facts(self):
|
||||||
|
facts = [
|
||||||
|
{"content": "User uploaded a file titled secret.txt", "category": "behavior"},
|
||||||
|
{"content": "User prefers dark mode", "category": "preference"},
|
||||||
|
{"content": "User is uploading document attachments regularly", "category": "behavior"},
|
||||||
|
]
|
||||||
|
mem = self._make_memory("summary", facts=facts)
|
||||||
|
result = _strip_upload_mentions_from_memory(mem)
|
||||||
|
remaining = [f["content"] for f in result["facts"]]
|
||||||
|
assert "User prefers dark mode" in remaining
|
||||||
|
assert not any("uploaded a file" in c for c in remaining)
|
||||||
|
assert not any("uploading document" in c for c in remaining)
|
||||||
|
|
||||||
|
def test_non_upload_facts_preserved(self):
|
||||||
|
facts = [
|
||||||
|
{"content": "User graduated from Peking University", "category": "context"},
|
||||||
|
{"content": "User prefers Python over JavaScript", "category": "preference"},
|
||||||
|
]
|
||||||
|
mem = self._make_memory("", facts=facts)
|
||||||
|
result = _strip_upload_mentions_from_memory(mem)
|
||||||
|
assert len(result["facts"]) == 2
|
||||||
|
|
||||||
|
def test_empty_memory_handled_gracefully(self):
|
||||||
|
mem = {"user": {}, "history": {}, "facts": []}
|
||||||
|
result = _strip_upload_mentions_from_memory(mem)
|
||||||
|
assert result == {"user": {}, "history": {}, "facts": []}
|
||||||
Reference in New Issue
Block a user