2026-01-23 18:47:39 +08:00
|
|
|
"""Middleware to inject uploaded files information into agent context."""
|
|
|
|
|
|
2026-03-05 11:16:34 +08:00
|
|
|
import logging
|
2026-01-23 18:47:39 +08:00
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import NotRequired, override
|
|
|
|
|
|
|
|
|
|
from langchain.agents import AgentState
|
|
|
|
|
from langchain.agents.middleware import AgentMiddleware
|
2026-01-28 14:03:43 +08:00
|
|
|
from langchain_core.messages import HumanMessage
|
2026-01-23 18:47:39 +08:00
|
|
|
from langgraph.runtime import Runtime
|
|
|
|
|
|
refactor: split backend into harness (deerflow.*) and app (app.*) (#1131)
* refactor: extract shared utils to break harness→app cross-layer imports
Move _validate_skill_frontmatter to src/skills/validation.py and
CONVERTIBLE_EXTENSIONS + convert_file_to_markdown to src/utils/file_conversion.py.
This eliminates the two reverse dependencies from client.py (harness layer)
into gateway/routers/ (app layer), preparing for the harness/app package split.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* refactor: split backend/src into harness (deerflow.*) and app (app.*)
Physically split the monolithic backend/src/ package into two layers:
- **Harness** (`packages/harness/deerflow/`): publishable agent framework
package with import prefix `deerflow.*`. Contains agents, sandbox, tools,
models, MCP, skills, config, and all core infrastructure.
- **App** (`app/`): unpublished application code with import prefix `app.*`.
Contains gateway (FastAPI REST API) and channels (IM integrations).
Key changes:
- Move 13 harness modules to packages/harness/deerflow/ via git mv
- Move gateway + channels to app/ via git mv
- Rename all imports: src.* → deerflow.* (harness) / app.* (app layer)
- Set up uv workspace with deerflow-harness as workspace member
- Update langgraph.json, config.example.yaml, all scripts, Docker files
- Add build-system (hatchling) to harness pyproject.toml
- Add PYTHONPATH=. to gateway startup commands for app.* resolution
- Update ruff.toml with known-first-party for import sorting
- Update all documentation to reflect new directory structure
Boundary rule enforced: harness code never imports from app.
All 429 tests pass. Lint clean.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* chore: add harness→app boundary check test and update docs
Add test_harness_boundary.py that scans all Python files in
packages/harness/deerflow/ and fails if any `from app.*` or
`import app.*` statement is found. This enforces the architectural
rule that the harness layer never depends on the app layer.
Update CLAUDE.md to document the harness/app split architecture,
import conventions, and the boundary enforcement test.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* feat: add config versioning with auto-upgrade on startup
When config.example.yaml schema changes, developers' local config.yaml
files can silently become outdated. This adds a config_version field and
auto-upgrade mechanism so breaking changes (like src.* → deerflow.*
renames) are applied automatically before services start.
- Add config_version: 1 to config.example.yaml
- Add startup version check warning in AppConfig.from_file()
- Add scripts/config-upgrade.sh with migration registry for value replacements
- Add `make config-upgrade` target
- Auto-run config-upgrade in serve.sh and start-daemon.sh before starting services
- Add config error hints in service failure messages
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* fix comments
* fix: update src.* import in test_sandbox_tools_security to deerflow.*
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* fix: handle empty config and search parent dirs for config.example.yaml
Address Copilot review comments on PR #1131:
- Guard against yaml.safe_load() returning None for empty config files
- Search parent directories for config.example.yaml instead of only
looking next to config.yaml, fixing detection in common setups
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* fix: correct skills root path depth and config_version type coercion
- loader.py: fix get_skills_root_path() to use 5 parent levels (was 3)
after harness split, file lives at packages/harness/deerflow/skills/
so parent×3 resolved to backend/packages/harness/ instead of backend/
- app_config.py: coerce config_version to int() before comparison in
_check_config_version() to prevent TypeError when YAML stores value
as string (e.g. config_version: "1")
- tests: add regression tests for both fixes
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* fix: update test imports from src.* to deerflow.*/app.* after harness refactor
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-14 22:55:52 +08:00
|
|
|
from deerflow.config.paths import Paths, get_paths
|
2026-01-23 18:47:39 +08:00
|
|
|
|
2026-03-05 11:16:34 +08:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
2026-01-23 18:47:39 +08:00
|
|
|
|
|
|
|
|
class UploadsMiddlewareState(AgentState):
|
|
|
|
|
"""State schema for uploads middleware."""
|
|
|
|
|
|
|
|
|
|
uploaded_files: NotRequired[list[dict] | None]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
|
|
|
|
|
"""Middleware to inject uploaded files information into the agent context.
|
|
|
|
|
|
2026-03-05 11:16:34 +08:00
|
|
|
Reads file metadata from the current message's additional_kwargs.files
|
|
|
|
|
(set by the frontend after upload) and prepends an <uploaded_files> block
|
|
|
|
|
to the last human message so the model knows which files are available.
|
2026-01-23 18:47:39 +08:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
state_schema = UploadsMiddlewareState
|
|
|
|
|
|
|
|
|
|
def __init__(self, base_dir: str | None = None):
|
|
|
|
|
"""Initialize the middleware.
|
|
|
|
|
|
|
|
|
|
Args:
|
2026-02-25 21:30:33 +08:00
|
|
|
base_dir: Base directory for thread data. Defaults to Paths resolution.
|
2026-01-23 18:47:39 +08:00
|
|
|
"""
|
|
|
|
|
super().__init__()
|
2026-02-25 21:30:33 +08:00
|
|
|
self._paths = Paths(base_dir) if base_dir else get_paths()
|
2026-01-23 18:47:39 +08:00
|
|
|
|
2026-03-05 11:16:34 +08:00
|
|
|
def _create_files_message(self, new_files: list[dict], historical_files: list[dict]) -> str:
|
2026-01-23 18:47:39 +08:00
|
|
|
"""Create a formatted message listing uploaded files.
|
|
|
|
|
|
|
|
|
|
Args:
|
2026-03-05 11:16:34 +08:00
|
|
|
new_files: Files uploaded in the current message.
|
|
|
|
|
historical_files: Files uploaded in previous messages.
|
2026-01-23 18:47:39 +08:00
|
|
|
|
|
|
|
|
Returns:
|
2026-03-05 11:16:34 +08:00
|
|
|
Formatted string inside <uploaded_files> tags.
|
2026-01-23 18:47:39 +08:00
|
|
|
"""
|
2026-03-05 11:16:34 +08:00
|
|
|
lines = ["<uploaded_files>"]
|
2026-01-23 18:47:39 +08:00
|
|
|
|
2026-03-05 11:16:34 +08:00
|
|
|
lines.append("The following files were uploaded in this message:")
|
|
|
|
|
lines.append("")
|
2026-03-05 17:45:25 +08:00
|
|
|
if new_files:
|
|
|
|
|
for file in new_files:
|
|
|
|
|
size_kb = file["size"] / 1024
|
|
|
|
|
size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
|
|
|
|
|
lines.append(f"- {file['filename']} ({size_str})")
|
|
|
|
|
lines.append(f" Path: {file['path']}")
|
|
|
|
|
lines.append("")
|
|
|
|
|
else:
|
|
|
|
|
lines.append("(empty)")
|
2026-01-23 18:47:39 +08:00
|
|
|
|
2026-03-05 11:16:34 +08:00
|
|
|
if historical_files:
|
|
|
|
|
lines.append("The following files were uploaded in previous messages and are still available:")
|
|
|
|
|
lines.append("")
|
|
|
|
|
for file in historical_files:
|
|
|
|
|
size_kb = file["size"] / 1024
|
|
|
|
|
size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
|
|
|
|
|
lines.append(f"- {file['filename']} ({size_str})")
|
|
|
|
|
lines.append(f" Path: {file['path']}")
|
|
|
|
|
lines.append("")
|
|
|
|
|
|
2026-01-23 18:47:39 +08:00
|
|
|
lines.append("You can read these files using the `read_file` tool with the paths shown above.")
|
|
|
|
|
lines.append("</uploaded_files>")
|
|
|
|
|
|
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
2026-03-05 11:16:34 +08:00
|
|
|
def _files_from_kwargs(self, message: HumanMessage, uploads_dir: Path | None = None) -> list[dict] | None:
|
|
|
|
|
"""Extract file info from message additional_kwargs.files.
|
|
|
|
|
|
|
|
|
|
The frontend sends uploaded file metadata in additional_kwargs.files
|
|
|
|
|
after a successful upload. Each entry has: filename, size (bytes),
|
|
|
|
|
path (virtual path), status.
|
2026-01-29 12:29:13 +08:00
|
|
|
|
|
|
|
|
Args:
|
2026-03-05 11:16:34 +08:00
|
|
|
message: The human message to inspect.
|
|
|
|
|
uploads_dir: Physical uploads directory used to verify file existence.
|
|
|
|
|
When provided, entries whose files no longer exist are skipped.
|
2026-01-29 12:29:13 +08:00
|
|
|
|
|
|
|
|
Returns:
|
2026-03-05 11:16:34 +08:00
|
|
|
List of file dicts with virtual paths, or None if the field is absent or empty.
|
2026-01-29 12:29:13 +08:00
|
|
|
"""
|
2026-03-05 11:16:34 +08:00
|
|
|
kwargs_files = (message.additional_kwargs or {}).get("files")
|
|
|
|
|
if not isinstance(kwargs_files, list) or not kwargs_files:
|
|
|
|
|
return None
|
2026-01-29 12:29:13 +08:00
|
|
|
|
2026-03-05 11:16:34 +08:00
|
|
|
files = []
|
|
|
|
|
for f in kwargs_files:
|
|
|
|
|
if not isinstance(f, dict):
|
|
|
|
|
continue
|
|
|
|
|
filename = f.get("filename") or ""
|
|
|
|
|
if not filename or Path(filename).name != filename:
|
|
|
|
|
continue
|
|
|
|
|
if uploads_dir is not None and not (uploads_dir / filename).is_file():
|
|
|
|
|
continue
|
|
|
|
|
files.append(
|
|
|
|
|
{
|
|
|
|
|
"filename": filename,
|
|
|
|
|
"size": int(f.get("size") or 0),
|
|
|
|
|
"path": f"/mnt/user-data/uploads/{filename}",
|
|
|
|
|
"extension": Path(filename).suffix,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
return files if files else None
|
2026-01-29 12:29:13 +08:00
|
|
|
|
2026-01-23 18:47:39 +08:00
|
|
|
@override
|
|
|
|
|
def before_agent(self, state: UploadsMiddlewareState, runtime: Runtime) -> dict | None:
|
|
|
|
|
"""Inject uploaded files information before agent execution.
|
|
|
|
|
|
2026-03-05 11:16:34 +08:00
|
|
|
New files come from the current message's additional_kwargs.files.
|
|
|
|
|
Historical files are scanned from the thread's uploads directory,
|
|
|
|
|
excluding the new ones.
|
|
|
|
|
|
|
|
|
|
Prepends <uploaded_files> context to the last human message content.
|
|
|
|
|
The original additional_kwargs (including files metadata) is preserved
|
|
|
|
|
on the updated message so the frontend can read it from the stream.
|
2026-01-29 12:29:13 +08:00
|
|
|
|
2026-01-23 18:47:39 +08:00
|
|
|
Args:
|
|
|
|
|
state: Current agent state.
|
|
|
|
|
runtime: Runtime context containing thread_id.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
State updates including uploaded files list.
|
|
|
|
|
"""
|
2026-01-29 12:29:13 +08:00
|
|
|
messages = list(state.get("messages", []))
|
|
|
|
|
if not messages:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
last_message_index = len(messages) - 1
|
|
|
|
|
last_message = messages[last_message_index]
|
2026-01-23 18:47:39 +08:00
|
|
|
|
2026-01-29 12:29:13 +08:00
|
|
|
if not isinstance(last_message, HumanMessage):
|
|
|
|
|
return None
|
2026-01-23 18:47:39 +08:00
|
|
|
|
2026-03-05 11:16:34 +08:00
|
|
|
# Resolve uploads directory for existence checks
|
2026-03-25 21:01:10 +08:00
|
|
|
thread_id = (runtime.context or {}).get("thread_id")
|
2026-03-05 11:16:34 +08:00
|
|
|
uploads_dir = self._paths.sandbox_uploads_dir(thread_id) if thread_id else None
|
|
|
|
|
|
|
|
|
|
# Get newly uploaded files from the current message's additional_kwargs.files
|
|
|
|
|
new_files = self._files_from_kwargs(last_message, uploads_dir) or []
|
|
|
|
|
|
|
|
|
|
# Collect historical files from the uploads directory (all except the new ones)
|
|
|
|
|
new_filenames = {f["filename"] for f in new_files}
|
|
|
|
|
historical_files: list[dict] = []
|
|
|
|
|
if uploads_dir and uploads_dir.exists():
|
|
|
|
|
for file_path in sorted(uploads_dir.iterdir()):
|
|
|
|
|
if file_path.is_file() and file_path.name not in new_filenames:
|
|
|
|
|
stat = file_path.stat()
|
|
|
|
|
historical_files.append(
|
|
|
|
|
{
|
|
|
|
|
"filename": file_path.name,
|
|
|
|
|
"size": stat.st_size,
|
|
|
|
|
"path": f"/mnt/user-data/uploads/{file_path.name}",
|
|
|
|
|
"extension": file_path.suffix,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not new_files and not historical_files:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
logger.debug(f"New files: {[f['filename'] for f in new_files]}, historical: {[f['filename'] for f in historical_files]}")
|
|
|
|
|
|
2026-01-29 12:29:13 +08:00
|
|
|
# Create files message and prepend to the last human message content
|
2026-03-05 11:16:34 +08:00
|
|
|
files_message = self._create_files_message(new_files, historical_files)
|
2026-02-05 19:59:25 +08:00
|
|
|
|
2026-01-29 12:51:21 +08:00
|
|
|
# Extract original content - handle both string and list formats
|
|
|
|
|
original_content = ""
|
|
|
|
|
if isinstance(last_message.content, str):
|
|
|
|
|
original_content = last_message.content
|
|
|
|
|
elif isinstance(last_message.content, list):
|
|
|
|
|
text_parts = []
|
|
|
|
|
for block in last_message.content:
|
|
|
|
|
if isinstance(block, dict) and block.get("type") == "text":
|
|
|
|
|
text_parts.append(block.get("text", ""))
|
|
|
|
|
original_content = "\n".join(text_parts)
|
2026-02-05 19:59:25 +08:00
|
|
|
|
2026-03-05 11:16:34 +08:00
|
|
|
# Create new message with combined content.
|
|
|
|
|
# Preserve additional_kwargs (including files metadata) so the frontend
|
|
|
|
|
# can read structured file info from the streamed message.
|
2026-01-29 12:29:13 +08:00
|
|
|
updated_message = HumanMessage(
|
|
|
|
|
content=f"{files_message}\n\n{original_content}",
|
|
|
|
|
id=last_message.id,
|
|
|
|
|
additional_kwargs=last_message.additional_kwargs,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
messages[last_message_index] = updated_message
|
2026-01-23 18:47:39 +08:00
|
|
|
|
|
|
|
|
return {
|
2026-03-05 11:16:34 +08:00
|
|
|
"uploaded_files": new_files,
|
2026-01-23 18:47:39 +08:00
|
|
|
"messages": messages,
|
|
|
|
|
}
|