feat: implement summarization (#14)

This commit is contained in:
DanielWalnut
2026-01-19 16:17:31 +08:00
committed by GitHub
parent 1ef04c94ee
commit 9a3eaea54e
8 changed files with 555 additions and 5 deletions

View File

@@ -1,4 +1,5 @@
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langchain_core.runnables import RunnableConfig
from src.agents.lead_agent.prompt import apply_prompt_template
@@ -6,12 +7,66 @@ from src.agents.middlewares.clarification_middleware import ClarificationMiddlew
from src.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
from src.agents.middlewares.title_middleware import TitleMiddleware
from src.agents.thread_state import ThreadState
from src.config.summarization_config import get_summarization_config
from src.models import create_chat_model
from src.sandbox.middleware import SandboxMiddleware
def _create_summarization_middleware() -> SummarizationMiddleware | None:
"""Create and configure the summarization middleware from config."""
config = get_summarization_config()
if not config.enabled:
return None
# Prepare trigger parameter
trigger = None
if config.trigger is not None:
if isinstance(config.trigger, list):
trigger = [t.to_tuple() for t in config.trigger]
else:
trigger = config.trigger.to_tuple()
# Prepare keep parameter
keep = config.keep.to_tuple()
# Prepare model parameter
if config.model_name:
model = config.model_name
else:
# Use a lightweight model for summarization to save costs
# Falls back to default model if not explicitly specified
model = create_chat_model(thinking_enabled=False)
# Prepare kwargs
kwargs = {
"model": model,
"trigger": trigger,
"keep": keep,
}
if config.trim_tokens_to_summarize is not None:
kwargs["trim_tokens_to_summarize"] = config.trim_tokens_to_summarize
if config.summary_prompt is not None:
kwargs["summary_prompt"] = config.summary_prompt
return SummarizationMiddleware(**kwargs)
# ThreadDataMiddleware must be before SandboxMiddleware to ensure thread_id is available
# SummarizationMiddleware should be early to reduce context before other processing
# ClarificationMiddleware should be last to intercept clarification requests after model calls
middlewares = [ThreadDataMiddleware(), SandboxMiddleware(), TitleMiddleware(), ClarificationMiddleware()]
def _build_middlewares():
middlewares = [ThreadDataMiddleware(), SandboxMiddleware()]
# Add summarization middleware if enabled
summarization_middleware = _create_summarization_middleware()
if summarization_middleware is not None:
middlewares.append(summarization_middleware)
middlewares.extend([TitleMiddleware(), ClarificationMiddleware()])
return middlewares
def make_lead_agent(config: RunnableConfig):
@@ -24,7 +79,7 @@ def make_lead_agent(config: RunnableConfig):
return create_agent(
model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
tools=get_available_tools(),
middleware=middlewares,
middleware=_build_middlewares(),
system_prompt=apply_prompt_template(),
state_schema=ThreadState,
)

View File

@@ -89,7 +89,7 @@ You: "Deploying to staging..." [proceed]
You have access to skills that provide optimized workflows for specific tasks. Each skill contains best practices, frameworks, and references to additional resources.
**Progressive Loading Pattern:**
1. When a user query matches a skill's use case, immediately call `view` on the skill's main file using the path attribute provided in the skill tag below
1. When a user query matches a skill's use case, immediately call `read_file` on the skill's main file using the path attribute provided in the skill tag below
2. Read and understand the skill's workflow and instructions
3. The skill file contains references to external resources under the same folder
4. Load referenced resources only when needed during execution

View File

@@ -9,6 +9,7 @@ from pydantic import BaseModel, ConfigDict, Field
from src.config.model_config import ModelConfig
from src.config.sandbox_config import SandboxConfig
from src.config.skills_config import SkillsConfig
from src.config.summarization_config import load_summarization_config_from_dict
from src.config.title_config import load_title_config_from_dict
from src.config.tool_config import ToolConfig, ToolGroupConfig
@@ -75,6 +76,10 @@ class AppConfig(BaseModel):
if "title" in config_data:
load_title_config_from_dict(config_data["title"])
# Load summarization config if present
if "summarization" in config_data:
load_summarization_config_from_dict(config_data["summarization"])
result = cls.model_validate(config_data)
return result

View File

@@ -0,0 +1,74 @@
"""Configuration for conversation summarization."""
from typing import Literal
from pydantic import BaseModel, Field
ContextSizeType = Literal["fraction", "tokens", "messages"]
class ContextSize(BaseModel):
"""Context size specification for trigger or keep parameters."""
type: ContextSizeType = Field(description="Type of context size specification")
value: int | float = Field(description="Value for the context size specification")
def to_tuple(self) -> tuple[ContextSizeType, int | float]:
"""Convert to tuple format expected by SummarizationMiddleware."""
return (self.type, self.value)
class SummarizationConfig(BaseModel):
"""Configuration for automatic conversation summarization."""
enabled: bool = Field(
default=False,
description="Whether to enable automatic conversation summarization",
)
model_name: str | None = Field(
default=None,
description="Model name to use for summarization (None = use a lightweight model)",
)
trigger: ContextSize | list[ContextSize] | None = Field(
default=None,
description="One or more thresholds that trigger summarization. When any threshold is met, summarization runs. "
"Examples: {'type': 'messages', 'value': 50} triggers at 50 messages, "
"{'type': 'tokens', 'value': 4000} triggers at 4000 tokens, "
"{'type': 'fraction', 'value': 0.8} triggers at 80% of model's max input tokens",
)
keep: ContextSize = Field(
default_factory=lambda: ContextSize(type="messages", value=20),
description="Context retention policy after summarization. Specifies how much history to preserve. "
"Examples: {'type': 'messages', 'value': 20} keeps 20 messages, "
"{'type': 'tokens', 'value': 3000} keeps 3000 tokens, "
"{'type': 'fraction', 'value': 0.3} keeps 30% of model's max input tokens",
)
trim_tokens_to_summarize: int | None = Field(
default=4000,
description="Maximum tokens to keep when preparing messages for summarization. Pass null to skip trimming.",
)
summary_prompt: str | None = Field(
default=None,
description="Custom prompt template for generating summaries. If not provided, uses the default LangChain prompt.",
)
# Global configuration instance
_summarization_config: SummarizationConfig = SummarizationConfig()
def get_summarization_config() -> SummarizationConfig:
"""Get the current summarization configuration."""
return _summarization_config
def set_summarization_config(config: SummarizationConfig) -> None:
"""Set the summarization configuration."""
global _summarization_config
_summarization_config = config
def load_summarization_config_from_dict(config_dict: dict) -> None:
"""Load summarization configuration from a dictionary."""
global _summarization_config
_summarization_config = SummarizationConfig(**config_dict)