mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-03 06:12:14 +08:00
* refactor: extract shared utils to break harness→app cross-layer imports Move _validate_skill_frontmatter to src/skills/validation.py and CONVERTIBLE_EXTENSIONS + convert_file_to_markdown to src/utils/file_conversion.py. This eliminates the two reverse dependencies from client.py (harness layer) into gateway/routers/ (app layer), preparing for the harness/app package split. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * refactor: split backend/src into harness (deerflow.*) and app (app.*) Physically split the monolithic backend/src/ package into two layers: - **Harness** (`packages/harness/deerflow/`): publishable agent framework package with import prefix `deerflow.*`. Contains agents, sandbox, tools, models, MCP, skills, config, and all core infrastructure. - **App** (`app/`): unpublished application code with import prefix `app.*`. Contains gateway (FastAPI REST API) and channels (IM integrations). Key changes: - Move 13 harness modules to packages/harness/deerflow/ via git mv - Move gateway + channels to app/ via git mv - Rename all imports: src.* → deerflow.* (harness) / app.* (app layer) - Set up uv workspace with deerflow-harness as workspace member - Update langgraph.json, config.example.yaml, all scripts, Docker files - Add build-system (hatchling) to harness pyproject.toml - Add PYTHONPATH=. to gateway startup commands for app.* resolution - Update ruff.toml with known-first-party for import sorting - Update all documentation to reflect new directory structure Boundary rule enforced: harness code never imports from app. All 429 tests pass. Lint clean. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: add harness→app boundary check test and update docs Add test_harness_boundary.py that scans all Python files in packages/harness/deerflow/ and fails if any `from app.*` or `import app.*` statement is found. This enforces the architectural rule that the harness layer never depends on the app layer. Update CLAUDE.md to document the harness/app split architecture, import conventions, and the boundary enforcement test. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add config versioning with auto-upgrade on startup When config.example.yaml schema changes, developers' local config.yaml files can silently become outdated. This adds a config_version field and auto-upgrade mechanism so breaking changes (like src.* → deerflow.* renames) are applied automatically before services start. - Add config_version: 1 to config.example.yaml - Add startup version check warning in AppConfig.from_file() - Add scripts/config-upgrade.sh with migration registry for value replacements - Add `make config-upgrade` target - Auto-run config-upgrade in serve.sh and start-daemon.sh before starting services - Add config error hints in service failure messages Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix comments * fix: update src.* import in test_sandbox_tools_security to deerflow.* Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: handle empty config and search parent dirs for config.example.yaml Address Copilot review comments on PR #1131: - Guard against yaml.safe_load() returning None for empty config files - Search parent directories for config.example.yaml instead of only looking next to config.yaml, fixing detection in common setups Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: correct skills root path depth and config_version type coercion - loader.py: fix get_skills_root_path() to use 5 parent levels (was 3) after harness split, file lives at packages/harness/deerflow/skills/ so parent×3 resolved to backend/packages/harness/ instead of backend/ - app_config.py: coerce config_version to int() before comparison in _check_config_version() to prevent TypeError when YAML stores value as string (e.g. config_version: "1") - tests: add regression tests for both fixes Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix: update test imports from src.* to deerflow.*/app.* after harness refactor Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
174 lines
6.1 KiB
Python
174 lines
6.1 KiB
Python
"""MessageBus — async pub/sub hub that decouples channels from the agent dispatcher."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
from collections.abc import Callable, Coroutine
|
|
from dataclasses import dataclass, field
|
|
from enum import StrEnum
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Message types
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class InboundMessageType(StrEnum):
|
|
"""Types of messages arriving from IM channels."""
|
|
|
|
CHAT = "chat"
|
|
COMMAND = "command"
|
|
|
|
|
|
@dataclass
|
|
class InboundMessage:
|
|
"""A message arriving from an IM channel toward the agent dispatcher.
|
|
|
|
Attributes:
|
|
channel_name: Name of the source channel (e.g. "feishu", "slack").
|
|
chat_id: Platform-specific chat/conversation identifier.
|
|
user_id: Platform-specific user identifier.
|
|
text: The message text.
|
|
msg_type: Whether this is a regular chat message or a command.
|
|
thread_ts: Optional platform thread identifier (for threaded replies).
|
|
topic_id: Conversation topic identifier used to map to a DeerFlow thread.
|
|
Messages sharing the same ``topic_id`` within a ``chat_id`` will
|
|
reuse the same DeerFlow thread. When ``None``, each message
|
|
creates a new thread (one-shot Q&A).
|
|
files: Optional list of file attachments (platform-specific dicts).
|
|
metadata: Arbitrary extra data from the channel.
|
|
created_at: Unix timestamp when the message was created.
|
|
"""
|
|
|
|
channel_name: str
|
|
chat_id: str
|
|
user_id: str
|
|
text: str
|
|
msg_type: InboundMessageType = InboundMessageType.CHAT
|
|
thread_ts: str | None = None
|
|
topic_id: str | None = None
|
|
files: list[dict[str, Any]] = field(default_factory=list)
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
created_at: float = field(default_factory=time.time)
|
|
|
|
|
|
@dataclass
|
|
class ResolvedAttachment:
|
|
"""A file attachment resolved to a host filesystem path, ready for upload.
|
|
|
|
Attributes:
|
|
virtual_path: Original virtual path (e.g. /mnt/user-data/outputs/report.pdf).
|
|
actual_path: Resolved host filesystem path.
|
|
filename: Basename of the file.
|
|
mime_type: MIME type (e.g. "application/pdf").
|
|
size: File size in bytes.
|
|
is_image: True for image/* MIME types (platforms may handle images differently).
|
|
"""
|
|
|
|
virtual_path: str
|
|
actual_path: Path
|
|
filename: str
|
|
mime_type: str
|
|
size: int
|
|
is_image: bool
|
|
|
|
|
|
@dataclass
|
|
class OutboundMessage:
|
|
"""A message from the agent dispatcher back to a channel.
|
|
|
|
Attributes:
|
|
channel_name: Target channel name (used for routing).
|
|
chat_id: Target chat/conversation identifier.
|
|
thread_id: DeerFlow thread ID that produced this response.
|
|
text: The response text.
|
|
artifacts: List of artifact paths produced by the agent.
|
|
is_final: Whether this is the final message in the response stream.
|
|
thread_ts: Optional platform thread identifier for threaded replies.
|
|
metadata: Arbitrary extra data.
|
|
created_at: Unix timestamp.
|
|
"""
|
|
|
|
channel_name: str
|
|
chat_id: str
|
|
thread_id: str
|
|
text: str
|
|
artifacts: list[str] = field(default_factory=list)
|
|
attachments: list[ResolvedAttachment] = field(default_factory=list)
|
|
is_final: bool = True
|
|
thread_ts: str | None = None
|
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
created_at: float = field(default_factory=time.time)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# MessageBus
|
|
# ---------------------------------------------------------------------------
|
|
|
|
OutboundCallback = Callable[[OutboundMessage], Coroutine[Any, Any, None]]
|
|
|
|
|
|
class MessageBus:
|
|
"""Async pub/sub hub connecting channels and the agent dispatcher.
|
|
|
|
Channels publish inbound messages; the dispatcher consumes them.
|
|
The dispatcher publishes outbound messages; channels receive them
|
|
via registered callbacks.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self._inbound_queue: asyncio.Queue[InboundMessage] = asyncio.Queue()
|
|
self._outbound_listeners: list[OutboundCallback] = []
|
|
|
|
# -- inbound -----------------------------------------------------------
|
|
|
|
async def publish_inbound(self, msg: InboundMessage) -> None:
|
|
"""Enqueue an inbound message from a channel."""
|
|
await self._inbound_queue.put(msg)
|
|
logger.info(
|
|
"[Bus] inbound enqueued: channel=%s, chat_id=%s, type=%s, queue_size=%d",
|
|
msg.channel_name,
|
|
msg.chat_id,
|
|
msg.msg_type.value,
|
|
self._inbound_queue.qsize(),
|
|
)
|
|
|
|
async def get_inbound(self) -> InboundMessage:
|
|
"""Block until the next inbound message is available."""
|
|
return await self._inbound_queue.get()
|
|
|
|
@property
|
|
def inbound_queue(self) -> asyncio.Queue[InboundMessage]:
|
|
return self._inbound_queue
|
|
|
|
# -- outbound ----------------------------------------------------------
|
|
|
|
def subscribe_outbound(self, callback: OutboundCallback) -> None:
|
|
"""Register an async callback for outbound messages."""
|
|
self._outbound_listeners.append(callback)
|
|
|
|
def unsubscribe_outbound(self, callback: OutboundCallback) -> None:
|
|
"""Remove a previously registered outbound callback."""
|
|
self._outbound_listeners = [cb for cb in self._outbound_listeners if cb is not callback]
|
|
|
|
async def publish_outbound(self, msg: OutboundMessage) -> None:
|
|
"""Dispatch an outbound message to all registered listeners."""
|
|
logger.info(
|
|
"[Bus] outbound dispatching: channel=%s, chat_id=%s, listeners=%d, text_len=%d",
|
|
msg.channel_name,
|
|
msg.chat_id,
|
|
len(self._outbound_listeners),
|
|
len(msg.text),
|
|
)
|
|
for callback in self._outbound_listeners:
|
|
try:
|
|
await callback(msg)
|
|
except Exception:
|
|
logger.exception("Error in outbound callback for channel=%s", msg.channel_name)
|