fix: apply context compression to prevent token overflow (Issue #721) (#722)

* fix: apply context compression to prevent token overflow (Issue #721)

- Add token_limit configuration to conf.yaml.example for BASIC_MODEL and REASONING_MODEL
- Implement context compression in _execute_agent_step() before agent invocation
- Preserve first 3 messages (system prompt + context) during compression
- Enhance ContextManager logging with better token count reporting
- Prevent 400 Input tokens exceeded errors by automatically compressing message history

* feat: add model-based token limit inference for Issue #721

- Add smart default token limits based on common LLM models
- Support model name inference when token_limit not explicitly configured
- Models include: OpenAI (GPT-4o, GPT-4, etc.), Claude, Gemini, Doubao, DeepSeek, etc.
- Conservative defaults prevent token overflow even without explicit configuration
- Priority: explicit config > model inference > safe default (100,000 tokens)
- Ensures Issue #721 protection for all users, not just those with token_limit set
This commit is contained in:
Willem Jiang
2025-11-28 18:52:42 +08:00
committed by GitHub
parent 223ec57fe4
commit b24f4d3f38
4 changed files with 110 additions and 8 deletions

View File

@@ -178,23 +178,101 @@ def get_configured_llm_models() -> dict[str, list[str]]:
return {}
def _get_model_token_limit_defaults() -> dict[str, int]:
"""
Get default token limits for common LLM models.
These are conservative limits to prevent token overflow errors (Issue #721).
Users can override by setting token_limit in their config.
"""
return {
# OpenAI models
"gpt-4o": 120000,
"gpt-4-turbo": 120000,
"gpt-4": 8000,
"gpt-3.5-turbo": 4000,
# Anthropic Claude
"claude-3": 180000,
"claude-2": 100000,
# Google Gemini
"gemini-2": 180000,
"gemini-1.5-pro": 180000,
"gemini-1.5-flash": 180000,
"gemini-pro": 30000,
# Bytedance Doubao
"doubao": 200000,
# DeepSeek
"deepseek": 100000,
# Ollama/local
"qwen": 30000,
"llama": 4000,
# Default fallback for unknown models
"default": 100000,
}
def _infer_token_limit_from_model(model_name: str) -> int:
"""
Infer a reasonable token limit from the model name.
This helps protect against token overflow errors when token_limit is not explicitly configured.
Args:
model_name: The model name from configuration
Returns:
A conservative token limit based on known model capabilities
"""
if not model_name:
return 100000 # Safe default
model_name_lower = model_name.lower()
defaults = _get_model_token_limit_defaults()
# Try exact or prefix matches
for key, limit in defaults.items():
if key in model_name_lower:
return limit
# Return safe default if no match found
return defaults["default"]
def get_llm_token_limit_by_type(llm_type: str) -> int:
"""
Get the maximum token limit for a given LLM type.
Priority order:
1. Explicitly configured token_limit in conf.yaml
2. Inferred from model name based on known model capabilities
3. Safe default (100,000 tokens)
This helps prevent token overflow errors (Issue #721) even when token_limit is not configured.
Args:
llm_type (str): The type of LLM.
llm_type (str): The type of LLM (e.g., 'basic', 'reasoning', 'vision', 'code').
Returns:
int: The maximum token limit for the specified LLM type.
int: The maximum token limit for the specified LLM type (conservative estimate).
"""
llm_type_config_keys = _get_llm_type_config_keys()
config_key = llm_type_config_keys.get(llm_type)
conf = load_yaml_config(_get_config_file_path())
llm_max_token = conf.get(config_key, {}).get("token_limit")
return llm_max_token
model_config = conf.get(config_key, {})
# First priority: explicitly configured token_limit
if "token_limit" in model_config:
configured_limit = model_config["token_limit"]
if configured_limit is not None:
return configured_limit
# Second priority: infer from model name
model_name = model_config.get("model")
if model_name:
inferred_limit = _infer_token_limit_from_model(model_name)
return inferred_limit
# Fallback: safe default
return _get_model_token_limit_defaults()["default"]
# In the future, we will use reasoning_llm and vl_llm for different purposes