mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-02 22:02:13 +08:00
fix(harness): skip duplicate memory facts (#1193)
* fix(harness): skip duplicate memory facts Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> * docs: note memory fact deduplication Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> * Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -437,6 +437,8 @@ Most agents forget everything the moment a conversation ends. DeerFlow remembers
|
||||
|
||||
Across sessions, DeerFlow builds a persistent memory of your profile, preferences, and accumulated knowledge. The more you use it, the better it knows you — your writing style, your technical stack, your recurring workflows. Memory is stored locally and stays under your control.
|
||||
|
||||
Memory updates now skip duplicate fact entries at apply time, so repeated preferences and context do not accumulate endlessly across sessions.
|
||||
|
||||
## Recommended Models
|
||||
|
||||
DeerFlow is model-agnostic — it works with any LLM that implements the OpenAI-compatible API. That said, it performs best with models that support:
|
||||
|
||||
@@ -312,7 +312,7 @@ Bridges external messaging platforms (Feishu, Slack, Telegram) to the DeerFlow a
|
||||
### Memory System (`packages/harness/deerflow/agents/memory/`)
|
||||
|
||||
**Components**:
|
||||
- `updater.py` - LLM-based memory updates with fact extraction and atomic file I/O
|
||||
- `updater.py` - LLM-based memory updates with fact extraction, whitespace-normalized fact deduplication (trims leading/trailing whitespace before comparing), and atomic file I/O
|
||||
- `queue.py` - Debounced update queue (per-thread deduplication, configurable wait time)
|
||||
- `prompt.py` - Prompt templates for memory updates
|
||||
|
||||
@@ -325,9 +325,11 @@ Bridges external messaging platforms (Feishu, Slack, Telegram) to the DeerFlow a
|
||||
1. `MemoryMiddleware` filters messages (user inputs + final AI responses) and queues conversation
|
||||
2. Queue debounces (30s default), batches updates, deduplicates per-thread
|
||||
3. Background thread invokes LLM to extract context updates and facts
|
||||
4. Applies updates atomically (temp file + rename) with cache invalidation
|
||||
4. Applies updates atomically (temp file + rename) with cache invalidation, skipping duplicate fact content before append
|
||||
5. Next interaction injects top 15 facts + context into `<memory>` tags in system prompt
|
||||
|
||||
Focused regression coverage for the updater lives in `backend/tests/test_memory_updater.py`.
|
||||
|
||||
**Configuration** (`config.yaml` → `memory`):
|
||||
- `enabled` / `injection_enabled` - Master switches
|
||||
- `storage_path` - Path to memory.json
|
||||
|
||||
@@ -173,6 +173,15 @@ def _strip_upload_mentions_from_memory(memory_data: dict[str, Any]) -> dict[str,
|
||||
return memory_data
|
||||
|
||||
|
||||
def _fact_content_key(content: Any) -> str | None:
|
||||
if not isinstance(content, str):
|
||||
return None
|
||||
stripped = content.strip()
|
||||
if not stripped:
|
||||
return None
|
||||
return stripped
|
||||
|
||||
|
||||
def _save_memory_to_file(memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
|
||||
"""Save memory data to file and update cache.
|
||||
|
||||
@@ -343,19 +352,35 @@ class MemoryUpdater:
|
||||
current_memory["facts"] = [f for f in current_memory.get("facts", []) if f.get("id") not in facts_to_remove]
|
||||
|
||||
# Add new facts
|
||||
existing_fact_keys = {
|
||||
fact_key
|
||||
for fact_key in (
|
||||
_fact_content_key(fact.get("content"))
|
||||
for fact in current_memory.get("facts", [])
|
||||
)
|
||||
if fact_key is not None
|
||||
}
|
||||
new_facts = update_data.get("newFacts", [])
|
||||
for fact in new_facts:
|
||||
confidence = fact.get("confidence", 0.5)
|
||||
if confidence >= config.fact_confidence_threshold:
|
||||
raw_content = fact.get("content", "")
|
||||
normalized_content = raw_content.strip()
|
||||
fact_key = _fact_content_key(normalized_content)
|
||||
if fact_key is not None and fact_key in existing_fact_keys:
|
||||
continue
|
||||
|
||||
fact_entry = {
|
||||
"id": f"fact_{uuid.uuid4().hex[:8]}",
|
||||
"content": fact.get("content", ""),
|
||||
"content": normalized_content,
|
||||
"category": fact.get("category", "context"),
|
||||
"confidence": confidence,
|
||||
"createdAt": now,
|
||||
"source": thread_id or "unknown",
|
||||
}
|
||||
current_memory["facts"].append(fact_entry)
|
||||
if fact_key is not None:
|
||||
existing_fact_keys.add(fact_key)
|
||||
|
||||
# Enforce max facts limit
|
||||
if len(current_memory["facts"]) > config.max_facts:
|
||||
|
||||
137
backend/tests/test_memory_updater.py
Normal file
137
backend/tests/test_memory_updater.py
Normal file
@@ -0,0 +1,137 @@
|
||||
from unittest.mock import patch
|
||||
|
||||
from deerflow.agents.memory.updater import MemoryUpdater
|
||||
from deerflow.config.memory_config import MemoryConfig
|
||||
|
||||
|
||||
def _make_memory(facts: list[dict[str, object]] | None = None) -> dict[str, object]:
|
||||
return {
|
||||
"version": "1.0",
|
||||
"lastUpdated": "",
|
||||
"user": {
|
||||
"workContext": {"summary": "", "updatedAt": ""},
|
||||
"personalContext": {"summary": "", "updatedAt": ""},
|
||||
"topOfMind": {"summary": "", "updatedAt": ""},
|
||||
},
|
||||
"history": {
|
||||
"recentMonths": {"summary": "", "updatedAt": ""},
|
||||
"earlierContext": {"summary": "", "updatedAt": ""},
|
||||
"longTermBackground": {"summary": "", "updatedAt": ""},
|
||||
},
|
||||
"facts": facts or [],
|
||||
}
|
||||
|
||||
|
||||
def _memory_config(**overrides: object) -> MemoryConfig:
|
||||
config = MemoryConfig()
|
||||
for key, value in overrides.items():
|
||||
setattr(config, key, value)
|
||||
return config
|
||||
|
||||
|
||||
def test_apply_updates_skips_existing_duplicate_and_preserves_removals() -> None:
|
||||
updater = MemoryUpdater()
|
||||
current_memory = _make_memory(
|
||||
facts=[
|
||||
{
|
||||
"id": "fact_existing",
|
||||
"content": "User likes Python",
|
||||
"category": "preference",
|
||||
"confidence": 0.9,
|
||||
"createdAt": "2026-03-18T00:00:00Z",
|
||||
"source": "thread-a",
|
||||
},
|
||||
{
|
||||
"id": "fact_remove",
|
||||
"content": "Old context to remove",
|
||||
"category": "context",
|
||||
"confidence": 0.8,
|
||||
"createdAt": "2026-03-18T00:00:00Z",
|
||||
"source": "thread-a",
|
||||
},
|
||||
]
|
||||
)
|
||||
update_data = {
|
||||
"factsToRemove": ["fact_remove"],
|
||||
"newFacts": [
|
||||
{"content": "User likes Python", "category": "preference", "confidence": 0.95},
|
||||
],
|
||||
}
|
||||
|
||||
with patch(
|
||||
"deerflow.agents.memory.updater.get_memory_config",
|
||||
return_value=_memory_config(max_facts=100, fact_confidence_threshold=0.7),
|
||||
):
|
||||
result = updater._apply_updates(current_memory, update_data, thread_id="thread-b")
|
||||
|
||||
assert [fact["content"] for fact in result["facts"]] == ["User likes Python"]
|
||||
assert all(fact["id"] != "fact_remove" for fact in result["facts"])
|
||||
|
||||
|
||||
def test_apply_updates_skips_same_batch_duplicates_and_keeps_source_metadata() -> None:
|
||||
updater = MemoryUpdater()
|
||||
current_memory = _make_memory()
|
||||
update_data = {
|
||||
"newFacts": [
|
||||
{"content": "User prefers dark mode", "category": "preference", "confidence": 0.91},
|
||||
{"content": "User prefers dark mode", "category": "preference", "confidence": 0.92},
|
||||
{"content": "User works on DeerFlow", "category": "context", "confidence": 0.87},
|
||||
],
|
||||
}
|
||||
|
||||
with patch(
|
||||
"deerflow.agents.memory.updater.get_memory_config",
|
||||
return_value=_memory_config(max_facts=100, fact_confidence_threshold=0.7),
|
||||
):
|
||||
result = updater._apply_updates(current_memory, update_data, thread_id="thread-42")
|
||||
|
||||
assert [fact["content"] for fact in result["facts"]] == [
|
||||
"User prefers dark mode",
|
||||
"User works on DeerFlow",
|
||||
]
|
||||
assert all(fact["id"].startswith("fact_") for fact in result["facts"])
|
||||
assert all(fact["source"] == "thread-42" for fact in result["facts"])
|
||||
|
||||
|
||||
def test_apply_updates_preserves_threshold_and_max_facts_trimming() -> None:
|
||||
updater = MemoryUpdater()
|
||||
current_memory = _make_memory(
|
||||
facts=[
|
||||
{
|
||||
"id": "fact_python",
|
||||
"content": "User likes Python",
|
||||
"category": "preference",
|
||||
"confidence": 0.95,
|
||||
"createdAt": "2026-03-18T00:00:00Z",
|
||||
"source": "thread-a",
|
||||
},
|
||||
{
|
||||
"id": "fact_dark_mode",
|
||||
"content": "User prefers dark mode",
|
||||
"category": "preference",
|
||||
"confidence": 0.8,
|
||||
"createdAt": "2026-03-18T00:00:00Z",
|
||||
"source": "thread-a",
|
||||
},
|
||||
]
|
||||
)
|
||||
update_data = {
|
||||
"newFacts": [
|
||||
{"content": "User prefers dark mode", "category": "preference", "confidence": 0.9},
|
||||
{"content": "User uses uv", "category": "context", "confidence": 0.85},
|
||||
{"content": "User likes noisy logs", "category": "behavior", "confidence": 0.6},
|
||||
],
|
||||
}
|
||||
|
||||
with patch(
|
||||
"deerflow.agents.memory.updater.get_memory_config",
|
||||
return_value=_memory_config(max_facts=2, fact_confidence_threshold=0.7),
|
||||
):
|
||||
result = updater._apply_updates(current_memory, update_data, thread_id="thread-9")
|
||||
|
||||
assert [fact["content"] for fact in result["facts"]] == [
|
||||
"User likes Python",
|
||||
"User uses uv",
|
||||
]
|
||||
assert all(fact["content"] != "User likes noisy logs" for fact in result["facts"])
|
||||
assert result["facts"][1]["source"] == "thread-9"
|
||||
Reference in New Issue
Block a user