feat: add context compress (#590)

* feat:Add context compress

* feat: Add unit test

* feat: add unit test for context manager

* feat: add postprocessor param && code format

* feat: add configuration guide

* fix: fix the configuration_guide

* fix: fix the unit test

* fix: fix the default value

* feat: add test and log for context_manager
This commit is contained in:
Fancy-hjyp
2025-09-27 06:42:22 -07:00
committed by GitHub
parent c214999606
commit 5f4eb38fdb
9 changed files with 1032 additions and 7 deletions

View File

@@ -67,6 +67,10 @@ def _create_llm_use_conf(llm_type: LLMType, conf: Dict[str, Any]) -> BaseChatMod
# Merge configurations, with environment variables taking precedence
merged_conf = {**llm_conf, **env_conf}
# Remove unnecessary parameters when initializing the client
if "token_limit" in merged_conf:
merged_conf.pop("token_limit")
if not merged_conf:
raise ValueError(f"No configuration found for LLM type: {llm_type}")
@@ -174,6 +178,25 @@ def get_configured_llm_models() -> dict[str, list[str]]:
return {}
def get_llm_token_limit_by_type(llm_type: str) -> int:
"""
Get the maximum token limit for a given LLM type.
Args:
llm_type (str): The type of LLM.
Returns:
int: The maximum token limit for the specified LLM type.
"""
llm_type_config_keys = _get_llm_type_config_keys()
config_key = llm_type_config_keys.get(llm_type)
conf = load_yaml_config(_get_config_file_path())
llm_max_token = conf.get(config_key, {}).get("token_limit")
return llm_max_token
# In the future, we will use reasoning_llm and vl_llm for different purposes
# reasoning_llm = get_llm_by_type("reasoning")
# vl_llm = get_llm_by_type("vision")