From a376b0cb4e28a4a17c2ba2b2baa38e55e36e3eb3 Mon Sep 17 00:00:00 2001 From: Willem Jiang Date: Wed, 7 Jan 2026 20:36:15 +0800 Subject: [PATCH] fix: Add runtime parameter to compress_messages method(#803) * fix: Add runtime parameter to compress_messages method(#803) The compress_messages method was being called by PreModelHookMiddleware with both state and runtime parameters, but only accepted state parameter. This caused a TypeError when the middleware executed the pre_model_hook. Added optional runtime parameter to compress_messages signature to match the expected interface while maintaining backward compatibility. * Update the code with the review comments --- src/utils/context_manager.py | 5 ++++- tests/unit/utils/test_context_manager.py | 26 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/utils/context_manager.py b/src/utils/context_manager.py index 72267d6..b05ec89 100644 --- a/src/utils/context_manager.py +++ b/src/utils/context_manager.py @@ -3,6 +3,8 @@ import copy import logging from typing import List +from langgraph.runtime import Runtime + from langchain_core.messages import ( AIMessage, BaseMessage, @@ -144,12 +146,13 @@ class ContextManager: """ return self.count_tokens(messages) > self.token_limit - def compress_messages(self, state: dict) -> List[BaseMessage]: + def compress_messages(self, state: dict, runtime: Runtime | None = None) -> dict: """ Compress messages to fit within token limit Args: state: state with original messages + runtime: Optional runtime parameter (not used but required for middleware compatibility) Returns: Compressed state with compressed messages diff --git a/tests/unit/utils/test_context_manager.py b/tests/unit/utils/test_context_manager.py index 4967c46..1ac88fb 100644 --- a/tests/unit/utils/test_context_manager.py +++ b/tests/unit/utils/test_context_manager.py @@ -181,3 +181,29 @@ class TestContextManager: text = "Hello world 这是一些中文" token_count = context_manager._count_text_tokens(text) assert token_count > 6 + + def test_compress_messages_with_runtime_when_not_over_limit(self): + """compress_messages accepts runtime param when under limit""" + context_manager = ContextManager(token_limit=1000) + messages = [HumanMessage(content="Short message"), AIMessage(content="OK")] + compressed = context_manager.compress_messages({"messages": messages}, runtime=object()) + assert isinstance(compressed, dict) + assert "messages" in compressed + assert len(compressed["messages"]) == len(messages) + + def test_compress_messages_with_runtime_when_over_limit(self): + """compress_messages accepts runtime param and still compresses""" + limited_cm = ContextManager(token_limit=200) + messages = [ + SystemMessage(content="You are a helpful assistant."), + HumanMessage(content="Hello"), + AIMessage(content="Hi there!"), + HumanMessage( + content="Can you tell me a very long story that would exceed token limits? " * 100 + ), + ] + compressed = limited_cm.compress_messages({"messages": messages}, runtime=object()) + assert isinstance(compressed, dict) + assert "messages" in compressed + # Should preserve only what fits; with this setup we expect heavy compression + assert len(compressed["messages"]) == 1