mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-24 22:54:46 +08:00
fix(harness): skip duplicate memory facts (#1193)
* fix(harness): skip duplicate memory facts Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> * docs: note memory fact deduplication Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> * Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai> Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -437,6 +437,8 @@ Most agents forget everything the moment a conversation ends. DeerFlow remembers
|
|||||||
|
|
||||||
Across sessions, DeerFlow builds a persistent memory of your profile, preferences, and accumulated knowledge. The more you use it, the better it knows you — your writing style, your technical stack, your recurring workflows. Memory is stored locally and stays under your control.
|
Across sessions, DeerFlow builds a persistent memory of your profile, preferences, and accumulated knowledge. The more you use it, the better it knows you — your writing style, your technical stack, your recurring workflows. Memory is stored locally and stays under your control.
|
||||||
|
|
||||||
|
Memory updates now skip duplicate fact entries at apply time, so repeated preferences and context do not accumulate endlessly across sessions.
|
||||||
|
|
||||||
## Recommended Models
|
## Recommended Models
|
||||||
|
|
||||||
DeerFlow is model-agnostic — it works with any LLM that implements the OpenAI-compatible API. That said, it performs best with models that support:
|
DeerFlow is model-agnostic — it works with any LLM that implements the OpenAI-compatible API. That said, it performs best with models that support:
|
||||||
|
|||||||
@@ -312,7 +312,7 @@ Bridges external messaging platforms (Feishu, Slack, Telegram) to the DeerFlow a
|
|||||||
### Memory System (`packages/harness/deerflow/agents/memory/`)
|
### Memory System (`packages/harness/deerflow/agents/memory/`)
|
||||||
|
|
||||||
**Components**:
|
**Components**:
|
||||||
- `updater.py` - LLM-based memory updates with fact extraction and atomic file I/O
|
- `updater.py` - LLM-based memory updates with fact extraction, whitespace-normalized fact deduplication (trims leading/trailing whitespace before comparing), and atomic file I/O
|
||||||
- `queue.py` - Debounced update queue (per-thread deduplication, configurable wait time)
|
- `queue.py` - Debounced update queue (per-thread deduplication, configurable wait time)
|
||||||
- `prompt.py` - Prompt templates for memory updates
|
- `prompt.py` - Prompt templates for memory updates
|
||||||
|
|
||||||
@@ -325,9 +325,11 @@ Bridges external messaging platforms (Feishu, Slack, Telegram) to the DeerFlow a
|
|||||||
1. `MemoryMiddleware` filters messages (user inputs + final AI responses) and queues conversation
|
1. `MemoryMiddleware` filters messages (user inputs + final AI responses) and queues conversation
|
||||||
2. Queue debounces (30s default), batches updates, deduplicates per-thread
|
2. Queue debounces (30s default), batches updates, deduplicates per-thread
|
||||||
3. Background thread invokes LLM to extract context updates and facts
|
3. Background thread invokes LLM to extract context updates and facts
|
||||||
4. Applies updates atomically (temp file + rename) with cache invalidation
|
4. Applies updates atomically (temp file + rename) with cache invalidation, skipping duplicate fact content before append
|
||||||
5. Next interaction injects top 15 facts + context into `<memory>` tags in system prompt
|
5. Next interaction injects top 15 facts + context into `<memory>` tags in system prompt
|
||||||
|
|
||||||
|
Focused regression coverage for the updater lives in `backend/tests/test_memory_updater.py`.
|
||||||
|
|
||||||
**Configuration** (`config.yaml` → `memory`):
|
**Configuration** (`config.yaml` → `memory`):
|
||||||
- `enabled` / `injection_enabled` - Master switches
|
- `enabled` / `injection_enabled` - Master switches
|
||||||
- `storage_path` - Path to memory.json
|
- `storage_path` - Path to memory.json
|
||||||
|
|||||||
@@ -173,6 +173,15 @@ def _strip_upload_mentions_from_memory(memory_data: dict[str, Any]) -> dict[str,
|
|||||||
return memory_data
|
return memory_data
|
||||||
|
|
||||||
|
|
||||||
|
def _fact_content_key(content: Any) -> str | None:
|
||||||
|
if not isinstance(content, str):
|
||||||
|
return None
|
||||||
|
stripped = content.strip()
|
||||||
|
if not stripped:
|
||||||
|
return None
|
||||||
|
return stripped
|
||||||
|
|
||||||
|
|
||||||
def _save_memory_to_file(memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
|
def _save_memory_to_file(memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
|
||||||
"""Save memory data to file and update cache.
|
"""Save memory data to file and update cache.
|
||||||
|
|
||||||
@@ -343,19 +352,35 @@ class MemoryUpdater:
|
|||||||
current_memory["facts"] = [f for f in current_memory.get("facts", []) if f.get("id") not in facts_to_remove]
|
current_memory["facts"] = [f for f in current_memory.get("facts", []) if f.get("id") not in facts_to_remove]
|
||||||
|
|
||||||
# Add new facts
|
# Add new facts
|
||||||
|
existing_fact_keys = {
|
||||||
|
fact_key
|
||||||
|
for fact_key in (
|
||||||
|
_fact_content_key(fact.get("content"))
|
||||||
|
for fact in current_memory.get("facts", [])
|
||||||
|
)
|
||||||
|
if fact_key is not None
|
||||||
|
}
|
||||||
new_facts = update_data.get("newFacts", [])
|
new_facts = update_data.get("newFacts", [])
|
||||||
for fact in new_facts:
|
for fact in new_facts:
|
||||||
confidence = fact.get("confidence", 0.5)
|
confidence = fact.get("confidence", 0.5)
|
||||||
if confidence >= config.fact_confidence_threshold:
|
if confidence >= config.fact_confidence_threshold:
|
||||||
|
raw_content = fact.get("content", "")
|
||||||
|
normalized_content = raw_content.strip()
|
||||||
|
fact_key = _fact_content_key(normalized_content)
|
||||||
|
if fact_key is not None and fact_key in existing_fact_keys:
|
||||||
|
continue
|
||||||
|
|
||||||
fact_entry = {
|
fact_entry = {
|
||||||
"id": f"fact_{uuid.uuid4().hex[:8]}",
|
"id": f"fact_{uuid.uuid4().hex[:8]}",
|
||||||
"content": fact.get("content", ""),
|
"content": normalized_content,
|
||||||
"category": fact.get("category", "context"),
|
"category": fact.get("category", "context"),
|
||||||
"confidence": confidence,
|
"confidence": confidence,
|
||||||
"createdAt": now,
|
"createdAt": now,
|
||||||
"source": thread_id or "unknown",
|
"source": thread_id or "unknown",
|
||||||
}
|
}
|
||||||
current_memory["facts"].append(fact_entry)
|
current_memory["facts"].append(fact_entry)
|
||||||
|
if fact_key is not None:
|
||||||
|
existing_fact_keys.add(fact_key)
|
||||||
|
|
||||||
# Enforce max facts limit
|
# Enforce max facts limit
|
||||||
if len(current_memory["facts"]) > config.max_facts:
|
if len(current_memory["facts"]) > config.max_facts:
|
||||||
|
|||||||
137
backend/tests/test_memory_updater.py
Normal file
137
backend/tests/test_memory_updater.py
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from deerflow.agents.memory.updater import MemoryUpdater
|
||||||
|
from deerflow.config.memory_config import MemoryConfig
|
||||||
|
|
||||||
|
|
||||||
|
def _make_memory(facts: list[dict[str, object]] | None = None) -> dict[str, object]:
|
||||||
|
return {
|
||||||
|
"version": "1.0",
|
||||||
|
"lastUpdated": "",
|
||||||
|
"user": {
|
||||||
|
"workContext": {"summary": "", "updatedAt": ""},
|
||||||
|
"personalContext": {"summary": "", "updatedAt": ""},
|
||||||
|
"topOfMind": {"summary": "", "updatedAt": ""},
|
||||||
|
},
|
||||||
|
"history": {
|
||||||
|
"recentMonths": {"summary": "", "updatedAt": ""},
|
||||||
|
"earlierContext": {"summary": "", "updatedAt": ""},
|
||||||
|
"longTermBackground": {"summary": "", "updatedAt": ""},
|
||||||
|
},
|
||||||
|
"facts": facts or [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _memory_config(**overrides: object) -> MemoryConfig:
|
||||||
|
config = MemoryConfig()
|
||||||
|
for key, value in overrides.items():
|
||||||
|
setattr(config, key, value)
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_updates_skips_existing_duplicate_and_preserves_removals() -> None:
|
||||||
|
updater = MemoryUpdater()
|
||||||
|
current_memory = _make_memory(
|
||||||
|
facts=[
|
||||||
|
{
|
||||||
|
"id": "fact_existing",
|
||||||
|
"content": "User likes Python",
|
||||||
|
"category": "preference",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"createdAt": "2026-03-18T00:00:00Z",
|
||||||
|
"source": "thread-a",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "fact_remove",
|
||||||
|
"content": "Old context to remove",
|
||||||
|
"category": "context",
|
||||||
|
"confidence": 0.8,
|
||||||
|
"createdAt": "2026-03-18T00:00:00Z",
|
||||||
|
"source": "thread-a",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
update_data = {
|
||||||
|
"factsToRemove": ["fact_remove"],
|
||||||
|
"newFacts": [
|
||||||
|
{"content": "User likes Python", "category": "preference", "confidence": 0.95},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"deerflow.agents.memory.updater.get_memory_config",
|
||||||
|
return_value=_memory_config(max_facts=100, fact_confidence_threshold=0.7),
|
||||||
|
):
|
||||||
|
result = updater._apply_updates(current_memory, update_data, thread_id="thread-b")
|
||||||
|
|
||||||
|
assert [fact["content"] for fact in result["facts"]] == ["User likes Python"]
|
||||||
|
assert all(fact["id"] != "fact_remove" for fact in result["facts"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_updates_skips_same_batch_duplicates_and_keeps_source_metadata() -> None:
|
||||||
|
updater = MemoryUpdater()
|
||||||
|
current_memory = _make_memory()
|
||||||
|
update_data = {
|
||||||
|
"newFacts": [
|
||||||
|
{"content": "User prefers dark mode", "category": "preference", "confidence": 0.91},
|
||||||
|
{"content": "User prefers dark mode", "category": "preference", "confidence": 0.92},
|
||||||
|
{"content": "User works on DeerFlow", "category": "context", "confidence": 0.87},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"deerflow.agents.memory.updater.get_memory_config",
|
||||||
|
return_value=_memory_config(max_facts=100, fact_confidence_threshold=0.7),
|
||||||
|
):
|
||||||
|
result = updater._apply_updates(current_memory, update_data, thread_id="thread-42")
|
||||||
|
|
||||||
|
assert [fact["content"] for fact in result["facts"]] == [
|
||||||
|
"User prefers dark mode",
|
||||||
|
"User works on DeerFlow",
|
||||||
|
]
|
||||||
|
assert all(fact["id"].startswith("fact_") for fact in result["facts"])
|
||||||
|
assert all(fact["source"] == "thread-42" for fact in result["facts"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_updates_preserves_threshold_and_max_facts_trimming() -> None:
|
||||||
|
updater = MemoryUpdater()
|
||||||
|
current_memory = _make_memory(
|
||||||
|
facts=[
|
||||||
|
{
|
||||||
|
"id": "fact_python",
|
||||||
|
"content": "User likes Python",
|
||||||
|
"category": "preference",
|
||||||
|
"confidence": 0.95,
|
||||||
|
"createdAt": "2026-03-18T00:00:00Z",
|
||||||
|
"source": "thread-a",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "fact_dark_mode",
|
||||||
|
"content": "User prefers dark mode",
|
||||||
|
"category": "preference",
|
||||||
|
"confidence": 0.8,
|
||||||
|
"createdAt": "2026-03-18T00:00:00Z",
|
||||||
|
"source": "thread-a",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
update_data = {
|
||||||
|
"newFacts": [
|
||||||
|
{"content": "User prefers dark mode", "category": "preference", "confidence": 0.9},
|
||||||
|
{"content": "User uses uv", "category": "context", "confidence": 0.85},
|
||||||
|
{"content": "User likes noisy logs", "category": "behavior", "confidence": 0.6},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"deerflow.agents.memory.updater.get_memory_config",
|
||||||
|
return_value=_memory_config(max_facts=2, fact_confidence_threshold=0.7),
|
||||||
|
):
|
||||||
|
result = updater._apply_updates(current_memory, update_data, thread_id="thread-9")
|
||||||
|
|
||||||
|
assert [fact["content"] for fact in result["facts"]] == [
|
||||||
|
"User likes Python",
|
||||||
|
"User uses uv",
|
||||||
|
]
|
||||||
|
assert all(fact["content"] != "User likes noisy logs" for fact in result["facts"])
|
||||||
|
assert result["facts"][1]["source"] == "thread-9"
|
||||||
Reference in New Issue
Block a user