2025-04-17 11:34:42 +08:00
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
2025-04-07 16:25:55 +08:00
import json
2025-04-21 19:50:34 +08:00
import logging
2025-05-18 11:37:03 +08:00
import os
2025-10-13 22:35:57 -07:00
from functools import partial
2025-11-24 21:13:15 +08:00
from typing import Any , Annotated , Literal
2025-04-07 16:25:55 +08:00
2025-09-27 06:42:22 -07:00
from langchain_core . messages import AIMessage , HumanMessage , ToolMessage
2025-04-07 16:25:55 +08:00
from langchain_core . runnables import RunnableConfig
2025-04-21 19:50:34 +08:00
from langchain_core . tools import tool
2025-04-23 16:00:01 +08:00
from langchain_mcp_adapters . client import MultiServerMCPClient
2025-08-17 22:57:23 +08:00
from langgraph . types import Command , interrupt
2025-04-23 16:00:01 +08:00
2025-05-17 22:23:52 -07:00
from src . agents import create_agent
2025-04-07 16:25:55 +08:00
from src . config . agents import AGENT_LLM_MAP
from src . config . configuration import Configuration
2025-09-27 06:42:22 -07:00
from src . llms . llm import get_llm_by_type , get_llm_token_limit_by_type
2025-06-04 21:47:17 -07:00
from src . prompts . planner_model import Plan
2025-04-21 19:50:34 +08:00
from src . prompts . template import apply_prompt_template
2025-08-17 22:57:23 +08:00
from src . tools import (
crawl_tool ,
get_retriever_tool ,
get_web_search_tool ,
python_repl_tool ,
)
from src . tools . search import LoggedTavilySearch
2025-10-23 22:08:14 +08:00
from src . utils . context_manager import ContextManager , validate_message_content
2025-10-26 07:24:25 +08:00
from src . utils . json_utils import repair_json_output , sanitize_tool_response
2025-04-21 19:50:34 +08:00
2025-05-17 22:23:52 -07:00
from . . config import SELECTED_SEARCH_ENGINE , SearchEngine
2025-08-17 22:57:23 +08:00
from . types import State
2025-10-22 22:49:07 +08:00
from . utils import (
build_clarified_topic_from_history ,
get_message_content ,
is_user_message ,
reconstruct_clarification_history ,
)
2025-04-07 16:25:55 +08:00
logger = logging . getLogger ( __name__ )
@tool
def handoff_to_planner (
2025-06-11 11:10:02 +08:00
research_topic : Annotated [ str , " The topic of the research task to be handed off. " ] ,
2025-04-26 17:51:04 +08:00
locale : Annotated [ str , " The user ' s detected language locale (e.g., en-US, zh-CN). " ] ,
2025-04-07 16:25:55 +08:00
) :
""" Handoff to planner agent to do plan. """
# This tool is not returning anything: we're just using it
# as a way for LLM to signal that it needs to hand off to planner agent
return
2025-10-13 22:35:57 -07:00
@tool
def handoff_after_clarification (
locale : Annotated [ str , " The user ' s detected language locale (e.g., en-US, zh-CN). " ] ,
2025-10-22 22:49:07 +08:00
research_topic : Annotated [
str , " The clarified research topic based on all clarification rounds. "
] ,
2025-10-13 22:35:57 -07:00
) :
""" Handoff to planner after clarification rounds are complete. Pass all clarification history to planner for analysis. """
return
def needs_clarification ( state : dict ) - > bool :
"""
Check if clarification is needed based on current state .
Centralized logic for determining when to continue clarification .
"""
if not state . get ( " enable_clarification " , False ) :
return False
clarification_rounds = state . get ( " clarification_rounds " , 0 )
is_clarification_complete = state . get ( " is_clarification_complete " , False )
max_clarification_rounds = state . get ( " max_clarification_rounds " , 3 )
# Need clarification if: enabled + has rounds + not complete + not exceeded max
# Use <= because after asking the Nth question, we still need to wait for the Nth answer
return (
clarification_rounds > 0
and not is_clarification_complete
and clarification_rounds < = max_clarification_rounds
)
2025-10-28 21:45:29 +08:00
def preserve_state_meta_fields ( state : State ) - > dict :
"""
Extract meta / config fields that should be preserved across state transitions .
These fields are critical for workflow continuity and should be explicitly
included in all Command . update dicts to prevent them from reverting to defaults .
Args :
state : Current state object
Returns :
Dict of meta fields to preserve
"""
return {
" locale " : state . get ( " locale " , " en-US " ) ,
" research_topic " : state . get ( " research_topic " , " " ) ,
" clarified_research_topic " : state . get ( " clarified_research_topic " , " " ) ,
" clarification_history " : state . get ( " clarification_history " , [ ] ) ,
" enable_clarification " : state . get ( " enable_clarification " , False ) ,
" max_clarification_rounds " : state . get ( " max_clarification_rounds " , 3 ) ,
" clarification_rounds " : state . get ( " clarification_rounds " , 0 ) ,
" resources " : state . get ( " resources " , [ ] ) ,
}
2025-10-22 08:27:06 +08:00
def validate_and_fix_plan ( plan : dict , enforce_web_search : bool = False ) - > dict :
"""
Validate and fix a plan to ensure it meets requirements .
2025-10-22 22:49:07 +08:00
2025-10-22 08:27:06 +08:00
Args :
plan : The plan dict to validate
enforce_web_search : If True , ensure at least one step has need_search = true
2025-10-22 22:49:07 +08:00
2025-10-22 08:27:06 +08:00
Returns :
The validated / fixed plan dict
"""
if not isinstance ( plan , dict ) :
return plan
2025-10-22 22:49:07 +08:00
2025-10-22 08:27:06 +08:00
steps = plan . get ( " steps " , [ ] )
2025-10-22 22:49:07 +08:00
2025-10-24 21:26:48 +08:00
# ============================================================
# SECTION 1: Repair missing step_type fields (Issue #650 fix)
# ============================================================
for idx , step in enumerate ( steps ) :
if not isinstance ( step , dict ) :
continue
# Check if step_type is missing or empty
if " step_type " not in step or not step . get ( " step_type " ) :
# Infer step_type based on need_search value
inferred_type = " research " if step . get ( " need_search " , False ) else " processing "
step [ " step_type " ] = inferred_type
logger . info (
f " Repaired missing step_type for step { idx } ( { step . get ( ' title ' , ' Untitled ' ) } ): "
f " inferred as ' { inferred_type } ' based on need_search= { step . get ( ' need_search ' , False ) } "
)
# ============================================================
# SECTION 2: Enforce web search requirements
# ============================================================
2025-10-22 08:27:06 +08:00
if enforce_web_search :
2025-11-27 19:39:25 +08:00
# Check if any step has need_search=true (only check dict steps)
has_search_step = any (
step . get ( " need_search " , False )
for step in steps
if isinstance ( step , dict )
)
2025-10-22 22:49:07 +08:00
2025-10-22 08:27:06 +08:00
if not has_search_step and steps :
# Ensure first research step has web search enabled
for idx , step in enumerate ( steps ) :
2025-11-27 19:39:25 +08:00
if isinstance ( step , dict ) and step . get ( " step_type " ) == " research " :
2025-10-22 08:27:06 +08:00
step [ " need_search " ] = True
logger . info ( f " Enforced web search on research step at index { idx } " )
break
else :
# Fallback: If no research step exists, convert the first step to a research step with web search enabled.
# This ensures that at least one step will perform a web search as required.
2025-11-27 19:39:25 +08:00
if isinstance ( steps [ 0 ] , dict ) :
steps [ 0 ] [ " step_type " ] = " research "
steps [ 0 ] [ " need_search " ] = True
logger . info (
" Converted first step to research with web search enforcement "
)
2025-10-22 08:27:06 +08:00
elif not has_search_step and not steps :
# Add a default research step if no steps exist
logger . warning ( " Plan has no steps. Adding default research step. " )
plan [ " steps " ] = [
{
" need_search " : True ,
" title " : " Initial Research " ,
" description " : " Gather information about the topic " ,
" step_type " : " research " ,
}
]
2025-10-22 22:49:07 +08:00
2025-11-27 19:39:25 +08:00
# ============================================================
# SECTION 3: Ensure required Plan fields are present (Issue #710 fix)
# ============================================================
# Set locale from state if not present
if " locale " not in plan or not plan . get ( " locale " ) :
plan [ " locale " ] = " en-US " # Default locale
logger . info ( " Added missing locale field with default value ' en-US ' " )
# Ensure has_enough_context is present
if " has_enough_context " not in plan :
plan [ " has_enough_context " ] = False # Default value
logger . info ( " Added missing has_enough_context field with default value ' False ' " )
# Ensure title is present
if " title " not in plan or not plan . get ( " title " ) :
# Try to infer title from steps or use a default
if steps and isinstance ( steps [ 0 ] , dict ) and " title " in steps [ 0 ] :
plan [ " title " ] = steps [ 0 ] [ " title " ]
logger . info ( f " Inferred missing title from first step: { plan [ ' title ' ] } " )
else :
plan [ " title " ] = " Research Plan " # Default title
logger . info ( " Added missing title field with default value ' Research Plan ' " )
2025-10-22 08:27:06 +08:00
return plan
2025-06-04 21:47:17 -07:00
def background_investigation_node ( state : State , config : RunnableConfig ) :
2025-04-27 20:15:42 +08:00
logger . info ( " background investigation node is running. " )
2025-05-17 22:23:52 -07:00
configurable = Configuration . from_runnable_config ( config )
2025-10-22 22:49:07 +08:00
query = state . get ( " clarified_research_topic " ) or state . get ( " research_topic " )
2025-10-23 22:08:14 +08:00
background_investigation_results = [ ]
2025-05-27 23:05:34 -07:00
if SELECTED_SEARCH_ENGINE == SearchEngine . TAVILY . value :
searched_content = LoggedTavilySearch (
max_results = configurable . max_search_results
) . invoke ( query )
2025-09-04 15:45:30 +08:00
# check if the searched_content is a tuple, then we need to unpack it
if isinstance ( searched_content , tuple ) :
searched_content = searched_content [ 0 ]
2025-10-23 22:08:14 +08:00
# Handle string JSON response (new format from fixed Tavily tool)
if isinstance ( searched_content , str ) :
try :
parsed = json . loads ( searched_content )
if isinstance ( parsed , dict ) and " error " in parsed :
logger . error ( f " Tavily search error: { parsed [ ' error ' ] } " )
background_investigation_results = [ ]
elif isinstance ( parsed , list ) :
background_investigation_results = [
f " ## { elem . get ( ' title ' , ' Untitled ' ) } \n \n { elem . get ( ' content ' , ' No content ' ) } "
for elem in parsed
]
else :
logger . error ( f " Unexpected Tavily response format: { searched_content } " )
background_investigation_results = [ ]
except json . JSONDecodeError :
logger . error ( f " Failed to parse Tavily response as JSON: { searched_content } " )
background_investigation_results = [ ]
# Handle legacy list format
elif isinstance ( searched_content , list ) :
2025-05-27 23:05:34 -07:00
background_investigation_results = [
2025-06-04 21:47:17 -07:00
f " ## { elem [ ' title ' ] } \n \n { elem [ ' content ' ] } " for elem in searched_content
2025-05-27 23:05:34 -07:00
]
2025-06-04 21:47:17 -07:00
return {
" background_investigation_results " : " \n \n " . join (
background_investigation_results
)
}
2025-05-27 23:05:34 -07:00
else :
logger . error (
f " Tavily search returned malformed response: { searched_content } "
)
2025-10-23 22:08:14 +08:00
background_investigation_results = [ ]
2025-05-27 23:05:34 -07:00
else :
background_investigation_results = get_web_search_tool (
configurable . max_search_results
) . invoke ( query )
2025-10-23 22:08:14 +08:00
2025-06-04 21:47:17 -07:00
return {
" background_investigation_results " : json . dumps (
background_investigation_results , ensure_ascii = False
)
}
2025-04-27 20:15:42 +08:00
2025-04-07 16:25:55 +08:00
def planner_node (
state : State , config : RunnableConfig
2025-04-14 18:01:50 +08:00
) - > Command [ Literal [ " human_feedback " , " reporter " ] ] :
2025-04-07 16:25:55 +08:00
""" Planner node that generate the full plan. """
2025-10-28 21:45:29 +08:00
logger . info ( " Planner generating full plan with locale: %s " , state . get ( " locale " , " en-US " ) )
2025-04-07 16:25:55 +08:00
configurable = Configuration . from_runnable_config ( config )
2025-04-27 20:15:42 +08:00
plan_iterations = state [ " plan_iterations " ] if state . get ( " plan_iterations " , 0 ) else 0
2025-10-13 22:35:57 -07:00
2025-10-24 16:43:39 +08:00
# For clarification feature: use the clarified research topic (complete history)
if state . get ( " enable_clarification " , False ) and state . get (
" clarified_research_topic "
) :
# Modify state to use clarified research topic instead of full conversation
modified_state = state . copy ( )
modified_state [ " messages " ] = [
{ " role " : " user " , " content " : state [ " clarified_research_topic " ] }
]
modified_state [ " research_topic " ] = state [ " clarified_research_topic " ]
messages = apply_prompt_template ( " planner " , modified_state , configurable , state . get ( " locale " , " en-US " ) )
2025-10-13 22:35:57 -07:00
logger . info (
2025-10-24 16:43:39 +08:00
f " Clarification mode: Using clarified research topic: { state [ ' clarified_research_topic ' ] } "
2025-10-13 22:35:57 -07:00
)
else :
# Normal mode: use full conversation history
2025-10-24 16:31:19 +08:00
messages = apply_prompt_template ( " planner " , state , configurable , state . get ( " locale " , " en-US " ) )
2025-04-27 20:15:42 +08:00
2025-06-11 11:10:02 +08:00
if state . get ( " enable_background_investigation " ) and state . get (
" background_investigation_results "
2025-04-27 20:15:42 +08:00
) :
messages + = [
{
" role " : " user " ,
" content " : (
" background investigation results of user query: \n "
+ state [ " background_investigation_results " ]
+ " \n "
) ,
}
]
2025-06-14 13:12:43 +08:00
if configurable . enable_deep_thinking :
llm = get_llm_by_type ( " reasoning " )
elif AGENT_LLM_MAP [ " planner " ] == " basic " :
2025-11-21 09:41:34 +08:00
llm = get_llm_by_type ( " basic " )
2025-04-07 16:25:55 +08:00
else :
llm = get_llm_by_type ( AGENT_LLM_MAP [ " planner " ] )
# if the plan iterations is greater than the max plan iterations, return the reporter node
if plan_iterations > = configurable . max_plan_iterations :
2025-10-28 21:45:29 +08:00
return Command (
update = preserve_state_meta_fields ( state ) ,
goto = " reporter "
)
2025-04-07 16:25:55 +08:00
full_response = " "
2025-06-14 13:12:43 +08:00
if AGENT_LLM_MAP [ " planner " ] == " basic " and not configurable . enable_deep_thinking :
2025-04-07 16:25:55 +08:00
response = llm . invoke ( messages )
2025-11-21 09:41:34 +08:00
if hasattr ( response , " model_dump_json " ) :
full_response = response . model_dump_json ( indent = 4 , exclude_none = True )
else :
full_response = get_message_content ( response ) or " "
2025-04-07 16:25:55 +08:00
else :
response = llm . stream ( messages )
for chunk in response :
full_response + = chunk . content
logger . debug ( f " Current state messages: { state [ ' messages ' ] } " )
2025-04-17 11:17:03 +08:00
logger . info ( f " Planner response: { full_response } " )
2025-04-07 16:25:55 +08:00
2025-11-21 09:41:34 +08:00
# Validate explicitly that response content is valid JSON before proceeding to parse it
if not full_response . strip ( ) . startswith ( ' { ' ) and not full_response . strip ( ) . startswith ( ' [ ' ) :
logger . warning ( " Planner response does not appear to be valid JSON " )
if plan_iterations > 0 :
return Command (
update = preserve_state_meta_fields ( state ) ,
goto = " reporter "
)
else :
return Command (
update = preserve_state_meta_fields ( state ) ,
goto = " __end__ "
)
2025-04-29 18:17:27 +08:00
try :
curr_plan = json . loads ( repair_json_output ( full_response ) )
except json . JSONDecodeError :
logger . warning ( " Planner response is not a valid JSON " )
if plan_iterations > 0 :
2025-10-28 21:45:29 +08:00
return Command (
update = preserve_state_meta_fields ( state ) ,
goto = " reporter "
)
2025-04-29 18:17:27 +08:00
else :
2025-10-28 21:45:29 +08:00
return Command (
update = preserve_state_meta_fields ( state ) ,
goto = " __end__ "
)
2025-10-22 22:49:07 +08:00
2025-10-22 08:27:06 +08:00
# Validate and fix plan to ensure web search requirements are met
if isinstance ( curr_plan , dict ) :
curr_plan = validate_and_fix_plan ( curr_plan , configurable . enforce_web_search )
2025-10-22 22:49:07 +08:00
2025-07-18 09:27:15 +08:00
if isinstance ( curr_plan , dict ) and curr_plan . get ( " has_enough_context " ) :
2025-04-29 18:17:27 +08:00
logger . info ( " Planner response has enough context. " )
new_plan = Plan . model_validate ( curr_plan )
return Command (
update = {
2025-05-17 20:29:41 -07:00
" messages " : [ AIMessage ( content = full_response , name = " planner " ) ] ,
2025-04-29 18:17:27 +08:00
" current_plan " : new_plan ,
2025-10-28 21:45:29 +08:00
* * preserve_state_meta_fields ( state ) ,
2025-04-29 18:17:27 +08:00
} ,
goto = " reporter " ,
)
2025-04-14 18:01:50 +08:00
return Command (
update = {
2025-05-17 20:29:41 -07:00
" messages " : [ AIMessage ( content = full_response , name = " planner " ) ] ,
2025-04-14 18:01:50 +08:00
" current_plan " : full_response ,
2025-10-28 21:45:29 +08:00
* * preserve_state_meta_fields ( state ) ,
2025-04-14 18:01:50 +08:00
} ,
goto = " human_feedback " ,
)
2025-11-24 21:13:15 +08:00
def extract_plan_content ( plan_data : str | dict | Any ) - > str :
"""
Safely extract plan content from different types of plan data .
Args :
plan_data : The plan data which can be a string , AIMessage , or dict
Returns :
str : The plan content as a string ( JSON string for dict inputs , or
extracted / original string for other types )
"""
if isinstance ( plan_data , str ) :
# If it's already a string, return as is
return plan_data
elif hasattr ( plan_data , ' content ' ) and isinstance ( plan_data . content , str ) :
# If it's an AIMessage or similar object with a content attribute
logger . debug ( f " Extracting plan content from message object of type { type ( plan_data ) . __name__ } " )
return plan_data . content
elif isinstance ( plan_data , dict ) :
# If it's already a dictionary, convert to JSON string
2025-11-27 19:39:25 +08:00
# Need to check if it's dict with content field (AIMessage-like)
if " content " in plan_data :
if isinstance ( plan_data [ " content " ] , str ) :
logger . debug ( " Extracting plan content from dict with content field " )
return plan_data [ " content " ]
if isinstance ( plan_data [ " content " ] , dict ) :
logger . debug ( " Converting content field dict to JSON string " )
return json . dumps ( plan_data [ " content " ] , ensure_ascii = False )
else :
logger . warning ( f " Unexpected type for ' content ' field in plan_data dict: { type ( plan_data [ ' content ' ] ) . __name__ } , converting to string " )
return str ( plan_data [ " content " ] )
else :
logger . debug ( " Converting plan dictionary to JSON string " )
return json . dumps ( plan_data )
2025-11-24 21:13:15 +08:00
else :
# For any other type, try to convert to string
logger . warning ( f " Unexpected plan data type { type ( plan_data ) . __name__ } , attempting to convert to string " )
return str ( plan_data )
2025-04-14 18:01:50 +08:00
def human_feedback_node (
2025-10-19 17:35:06 +08:00
state : State , config : RunnableConfig
2025-04-14 18:01:50 +08:00
) - > Command [ Literal [ " planner " , " research_team " , " reporter " , " __end__ " ] ] :
current_plan = state . get ( " current_plan " , " " )
# check if the plan is auto accepted
auto_accepted_plan = state . get ( " auto_accepted_plan " , False )
if not auto_accepted_plan :
2025-04-15 16:36:02 +08:00
feedback = interrupt ( " Please Review the Plan. " )
2025-04-14 18:01:50 +08:00
2025-10-25 22:06:19 +08:00
# Handle None or empty feedback
if not feedback :
logger . warning ( f " Received empty or None feedback: { feedback } . Returning to planner for new plan. " )
2025-10-28 21:45:29 +08:00
return Command (
update = preserve_state_meta_fields ( state ) ,
goto = " planner "
)
2025-10-25 22:06:19 +08:00
# Normalize feedback string
feedback_normalized = str ( feedback ) . strip ( ) . upper ( )
2025-04-14 18:01:50 +08:00
# if the feedback is not accepted, return the planner node
2025-10-25 22:06:19 +08:00
if feedback_normalized . startswith ( " [EDIT_PLAN] " ) :
logger . info ( f " Plan edit requested by user: { feedback } " )
2025-04-14 18:01:50 +08:00
return Command (
update = {
" messages " : [
2025-05-17 20:29:41 -07:00
HumanMessage ( content = feedback , name = " feedback " ) ,
2025-04-14 18:01:50 +08:00
] ,
2025-10-28 21:45:29 +08:00
* * preserve_state_meta_fields ( state ) ,
2025-04-14 18:01:50 +08:00
} ,
goto = " planner " ,
)
2025-10-25 22:06:19 +08:00
elif feedback_normalized . startswith ( " [ACCEPTED] " ) :
2025-04-14 18:01:50 +08:00
logger . info ( " Plan is accepted by user. " )
else :
2025-10-25 22:06:19 +08:00
logger . warning ( f " Unsupported feedback format: { feedback } . Please use ' [ACCEPTED] ' to accept or ' [EDIT_PLAN] ' to edit. " )
2025-10-28 21:45:29 +08:00
return Command (
update = preserve_state_meta_fields ( state ) ,
goto = " planner "
)
2025-04-14 18:01:50 +08:00
# if the plan is accepted, run the following node
plan_iterations = state [ " plan_iterations " ] if state . get ( " plan_iterations " , 0 ) else 0
2025-04-07 16:25:55 +08:00
goto = " research_team "
try :
2025-11-24 21:13:15 +08:00
# Safely extract plan content from different types (string, AIMessage, dict)
original_plan = current_plan
current_plan_content = extract_plan_content ( current_plan )
logger . debug ( f " Extracted plan content type: { type ( current_plan_content ) . __name__ } " )
# Repair the JSON output
current_plan = repair_json_output ( current_plan_content )
2025-04-07 16:25:55 +08:00
# increment the plan iterations
plan_iterations + = 1
# parse the plan
2025-04-14 18:01:50 +08:00
new_plan = json . loads ( current_plan )
2025-10-22 08:27:06 +08:00
# Validate and fix plan to ensure web search requirements are met
configurable = Configuration . from_runnable_config ( config )
new_plan = validate_and_fix_plan ( new_plan , configurable . enforce_web_search )
2025-11-24 21:13:15 +08:00
except ( json . JSONDecodeError , AttributeError ) as e :
logger . warning ( f " Failed to parse plan: { str ( e ) } . Plan data type: { type ( current_plan ) . __name__ } " )
if isinstance ( current_plan , dict ) and " content " in original_plan :
logger . warning ( f " Plan appears to be an AIMessage object with content field " )
2025-06-18 10:05:02 +08:00
if plan_iterations > 1 : # the plan_iterations is increased before this check
2025-10-28 21:45:29 +08:00
return Command (
update = preserve_state_meta_fields ( state ) ,
goto = " reporter "
)
2025-04-07 16:25:55 +08:00
else :
2025-10-28 21:45:29 +08:00
return Command (
update = preserve_state_meta_fields ( state ) ,
goto = " __end__ "
)
2025-04-07 16:25:55 +08:00
2025-10-28 21:45:29 +08:00
# Build update dict with safe locale handling
update_dict = {
" current_plan " : Plan . model_validate ( new_plan ) ,
" plan_iterations " : plan_iterations ,
* * preserve_state_meta_fields ( state ) ,
}
# Only override locale if new_plan provides a valid value, otherwise use preserved locale
if new_plan . get ( " locale " ) :
update_dict [ " locale " ] = new_plan [ " locale " ]
2025-04-07 16:25:55 +08:00
return Command (
2025-10-28 21:45:29 +08:00
update = update_dict ,
2025-04-07 16:25:55 +08:00
goto = goto ,
)
2025-04-27 20:15:42 +08:00
def coordinator_node (
2025-05-28 14:13:46 +08:00
state : State , config : RunnableConfig
2025-10-13 22:35:57 -07:00
) - > Command [ Literal [ " planner " , " background_investigator " , " coordinator " , " __end__ " ] ] :
""" Coordinator node that communicate with customers and handle clarification. """
2025-04-07 16:25:55 +08:00
logger . info ( " Coordinator talking. " )
2025-05-28 14:13:46 +08:00
configurable = Configuration . from_runnable_config ( config )
2025-04-07 16:25:55 +08:00
2025-10-13 22:35:57 -07:00
# Check if clarification is enabled
enable_clarification = state . get ( " enable_clarification " , False )
2025-10-22 22:49:07 +08:00
initial_topic = state . get ( " research_topic " , " " )
clarified_topic = initial_topic
2025-10-13 22:35:57 -07:00
# ============================================================
# BRANCH 1: Clarification DISABLED (Legacy Mode)
# ============================================================
if not enable_clarification :
# Use normal prompt with explicit instruction to skip clarification
2025-10-24 16:31:19 +08:00
messages = apply_prompt_template ( " coordinator " , state , locale = state . get ( " locale " , " en-US " ) )
2025-10-13 22:35:57 -07:00
messages . append (
{
" role " : " system " ,
" content " : " CRITICAL: Clarification is DISABLED. You MUST immediately call handoff_to_planner tool with the user ' s query as-is. Do NOT ask questions or mention needing more information. " ,
}
)
# Only bind handoff_to_planner tool
tools = [ handoff_to_planner ]
response = (
get_llm_by_type ( AGENT_LLM_MAP [ " coordinator " ] )
. bind_tools ( tools )
. invoke ( messages )
)
goto = " __end__ "
locale = state . get ( " locale " , " en-US " )
2025-10-28 21:45:29 +08:00
logger . info ( f " Coordinator locale: { locale } " )
2025-10-13 22:35:57 -07:00
research_topic = state . get ( " research_topic " , " " )
2025-04-26 17:51:04 +08:00
2025-10-13 22:35:57 -07:00
# Process tool calls for legacy mode
if response . tool_calls :
try :
for tool_call in response . tool_calls :
tool_name = tool_call . get ( " name " , " " )
tool_args = tool_call . get ( " args " , { } )
if tool_name == " handoff_to_planner " :
logger . info ( " Handing off to planner " )
goto = " planner "
2025-10-28 21:45:29 +08:00
# Extract research_topic if provided
if tool_args . get ( " research_topic " ) :
2025-10-13 22:35:57 -07:00
research_topic = tool_args . get ( " research_topic " )
break
except Exception as e :
logger . error ( f " Error processing tool calls: { e } " )
goto = " planner "
# ============================================================
# BRANCH 2: Clarification ENABLED (New Feature)
# ============================================================
else :
# Load clarification state
clarification_rounds = state . get ( " clarification_rounds " , 0 )
2025-10-22 22:49:07 +08:00
clarification_history = list ( state . get ( " clarification_history " , [ ] ) or [ ] )
clarification_history = [ item for item in clarification_history if item ]
2025-10-13 22:35:57 -07:00
max_clarification_rounds = state . get ( " max_clarification_rounds " , 3 )
# Prepare the messages for the coordinator
2025-10-22 22:49:07 +08:00
state_messages = list ( state . get ( " messages " , [ ] ) )
2025-10-24 16:31:19 +08:00
messages = apply_prompt_template ( " coordinator " , state , locale = state . get ( " locale " , " en-US " ) )
2025-10-13 22:35:57 -07:00
2025-10-22 22:49:07 +08:00
clarification_history = reconstruct_clarification_history (
state_messages , clarification_history , initial_topic
)
clarified_topic , clarification_history = build_clarified_topic_from_history (
clarification_history
)
logger . debug ( " Clarification history rebuilt: %s " , clarification_history )
if clarification_history :
initial_topic = clarification_history [ 0 ]
latest_user_content = clarification_history [ - 1 ]
else :
latest_user_content = " "
2025-10-13 22:35:57 -07:00
# Add clarification status for first round
if clarification_rounds == 0 :
messages . append (
{
" role " : " system " ,
" content " : " Clarification mode is ENABLED. Follow the ' Clarification Process ' guidelines in your instructions. " ,
}
)
2025-10-24 16:43:39 +08:00
current_response = latest_user_content or " No response "
2025-10-22 22:49:07 +08:00
logger . info (
2025-10-24 16:43:39 +08:00
" Clarification round %s / %s | topic: %s | current user response: %s " ,
2025-10-22 22:49:07 +08:00
clarification_rounds ,
max_clarification_rounds ,
clarified_topic or initial_topic ,
2025-10-24 16:43:39 +08:00
current_response ,
2025-10-22 22:49:07 +08:00
)
2025-10-13 22:35:57 -07:00
2025-10-22 22:49:07 +08:00
clarification_context = f """ Continuing clarification (round { clarification_rounds } / { max_clarification_rounds } ):
2025-10-13 22:35:57 -07:00
User ' s latest response: {current_response}
Ask for remaining missing dimensions . Do NOT repeat questions or start new topics . """
2025-10-22 22:49:07 +08:00
messages . append ( { " role " : " system " , " content " : clarification_context } )
2025-10-13 22:35:57 -07:00
2025-10-24 16:43:39 +08:00
# Bind both clarification tools - let LLM choose the appropriate one
2025-10-13 22:35:57 -07:00
tools = [ handoff_to_planner , handoff_after_clarification ]
2025-10-24 16:43:39 +08:00
# Check if we've already reached max rounds
if clarification_rounds > = max_clarification_rounds :
# Max rounds reached - force handoff by adding system instruction
logger . warning (
f " Max clarification rounds ( { max_clarification_rounds } ) reached. Forcing handoff to planner. Using prepared clarified topic: { clarified_topic } "
)
# Add system instruction to force handoff - let LLM choose the right tool
messages . append (
{
" role " : " system " ,
" content " : f " MAX ROUNDS REACHED. You MUST call handoff_after_clarification (not handoff_to_planner) with the appropriate locale based on the user ' s language and research_topic= ' { clarified_topic } ' . Do not ask any more questions. " ,
}
)
2025-10-13 22:35:57 -07:00
response = (
get_llm_by_type ( AGENT_LLM_MAP [ " coordinator " ] )
. bind_tools ( tools )
. invoke ( messages )
)
logger . debug ( f " Current state messages: { state [ ' messages ' ] } " )
# Initialize response processing variables
goto = " __end__ "
locale = state . get ( " locale " , " en-US " )
2025-10-22 22:49:07 +08:00
research_topic = (
clarification_history [ 0 ]
if clarification_history
else state . get ( " research_topic " , " " )
)
if not clarified_topic :
clarified_topic = research_topic
2025-10-13 22:35:57 -07:00
# --- Process LLM response ---
# No tool calls - LLM is asking a clarifying question
if not response . tool_calls and response . content :
2025-10-24 16:43:39 +08:00
# Check if we've reached max rounds - if so, force handoff to planner
if clarification_rounds > = max_clarification_rounds :
logger . warning (
f " Max clarification rounds ( { max_clarification_rounds } ) reached. "
" LLM didn ' t call handoff tool, forcing handoff to planner. "
)
goto = " planner "
# Continue to final section instead of early return
else :
2025-10-13 22:35:57 -07:00
# Continue clarification process
clarification_rounds + = 1
# Do NOT add LLM response to clarification_history - only user responses
logger . info (
f " Clarification response: { clarification_rounds } / { max_clarification_rounds } : { response . content } "
)
# Append coordinator's question to messages
2025-10-22 22:49:07 +08:00
updated_messages = list ( state_messages )
2025-10-13 22:35:57 -07:00
if response . content :
2025-10-22 22:49:07 +08:00
updated_messages . append (
2025-10-13 22:35:57 -07:00
HumanMessage ( content = response . content , name = " coordinator " )
)
return Command (
update = {
2025-10-22 22:49:07 +08:00
" messages " : updated_messages ,
2025-10-13 22:35:57 -07:00
" locale " : locale ,
" research_topic " : research_topic ,
" resources " : configurable . resources ,
" clarification_rounds " : clarification_rounds ,
" clarification_history " : clarification_history ,
2025-10-22 22:49:07 +08:00
" clarified_research_topic " : clarified_topic ,
2025-10-13 22:35:57 -07:00
" is_clarification_complete " : False ,
" goto " : goto ,
" __interrupt__ " : [ ( " coordinator " , response . content ) ] ,
} ,
goto = goto ,
)
else :
# LLM called a tool (handoff) or has no content - clarification complete
if response . tool_calls :
logger . info (
f " Clarification completed after { clarification_rounds } rounds. LLM called handoff tool. "
)
else :
logger . warning ( " LLM response has no content and no tool calls. " )
# goto will be set in the final section based on tool calls
# ============================================================
# Final: Build and return Command
# ============================================================
2025-10-22 22:49:07 +08:00
messages = list ( state . get ( " messages " , [ ] ) or [ ] )
2025-10-13 22:35:57 -07:00
if response . content :
messages . append ( HumanMessage ( content = response . content , name = " coordinator " ) )
# Process tool calls for BOTH branches (legacy and clarification)
if response . tool_calls :
2025-04-26 17:51:04 +08:00
try :
for tool_call in response . tool_calls :
2025-10-13 22:35:57 -07:00
tool_name = tool_call . get ( " name " , " " )
tool_args = tool_call . get ( " args " , { } )
if tool_name in [ " handoff_to_planner " , " handoff_after_clarification " ] :
logger . info ( " Handing off to planner " )
goto = " planner "
2025-10-22 22:49:07 +08:00
if not enable_clarification and tool_args . get ( " research_topic " ) :
research_topic = tool_args [ " research_topic " ]
if enable_clarification :
logger . info (
" Using prepared clarified topic: %s " ,
clarified_topic or research_topic ,
)
else :
logger . info (
" Using research topic for handoff: %s " , research_topic
)
2025-04-26 17:51:04 +08:00
break
2025-10-13 22:35:57 -07:00
2025-04-26 17:51:04 +08:00
except Exception as e :
logger . error ( f " Error processing tool calls: { e } " )
2025-10-13 22:35:57 -07:00
goto = " planner "
2025-05-09 14:22:07 +08:00
else :
2025-10-22 08:27:06 +08:00
# No tool calls detected - fallback to planner instead of ending
logger . warning (
" LLM didn ' t call any tools. This may indicate tool calling issues with the model. "
" Falling back to planner to ensure research proceeds. "
)
# Log full response for debugging
logger . debug ( f " Coordinator response content: { response . content } " )
logger . debug ( f " Coordinator response object: { response } " )
# Fallback to planner to ensure workflow continues
goto = " planner "
2025-10-13 22:35:57 -07:00
# Apply background_investigation routing if enabled (unified logic)
if goto == " planner " and state . get ( " enable_background_investigation " ) :
goto = " background_investigator "
# Set default values for state variables (in case they're not defined in legacy mode)
if not enable_clarification :
clarification_rounds = 0
clarification_history = [ ]
2025-10-22 22:49:07 +08:00
clarified_research_topic_value = clarified_topic or research_topic
2025-10-24 16:43:39 +08:00
# clarified_research_topic: Complete clarified topic with all clarification rounds
2025-04-07 16:25:55 +08:00
return Command (
2025-06-11 11:10:02 +08:00
update = {
2025-07-18 21:36:13 +08:00
" messages " : messages ,
2025-06-11 11:10:02 +08:00
" locale " : locale ,
" research_topic " : research_topic ,
2025-10-22 22:49:07 +08:00
" clarified_research_topic " : clarified_research_topic_value ,
2025-06-11 11:10:02 +08:00
" resources " : configurable . resources ,
2025-10-13 22:35:57 -07:00
" clarification_rounds " : clarification_rounds ,
" clarification_history " : clarification_history ,
" is_clarification_complete " : goto != " coordinator " ,
" goto " : goto ,
2025-06-11 11:10:02 +08:00
} ,
2025-04-07 16:25:55 +08:00
goto = goto ,
)
2025-06-07 20:48:39 +08:00
def reporter_node ( state : State , config : RunnableConfig ) :
2025-04-07 16:25:55 +08:00
""" Reporter node that write a final report. """
logger . info ( " Reporter write final report " )
2025-06-07 20:48:39 +08:00
configurable = Configuration . from_runnable_config ( config )
2025-04-22 11:04:28 +08:00
current_plan = state . get ( " current_plan " )
input_ = {
" messages " : [
HumanMessage (
f " # Research Requirements \n \n ## Task \n \n { current_plan . title } \n \n ## Description \n \n { current_plan . thought } "
2025-05-17 20:29:41 -07:00
)
2025-04-22 11:04:28 +08:00
] ,
" locale " : state . get ( " locale " , " en-US " ) ,
}
2025-10-24 16:31:19 +08:00
invoke_messages = apply_prompt_template ( " reporter " , input_ , configurable , input_ . get ( " locale " , " en-US " ) )
2025-04-07 16:25:55 +08:00
observations = state . get ( " observations " , [ ] )
2025-04-10 11:50:28 +08:00
2025-04-11 11:40:26 +08:00
# Add a reminder about the new report format, citation style, and table usage
2025-04-10 11:50:28 +08:00
invoke_messages . append (
HumanMessage (
2025-04-11 11:40:26 +08:00
content = " IMPORTANT: Structure your report according to the format in the prompt. Remember to include: \n \n 1. Key Points - A bulleted list of the most important findings \n 2. Overview - A brief introduction to the topic \n 3. Detailed Analysis - Organized into logical sections \n 4. Survey Note (optional) - For more comprehensive reports \n 5. Key Citations - List all references at the end \n \n For citations, DO NOT include inline citations in the text. Instead, place all citations in the ' Key Citations ' section at the end using the format: `- [Source Title](URL)`. Include an empty line between each citation for better readability. \n \n PRIORITIZE USING MARKDOWN TABLES for data presentation and comparison. Use tables whenever presenting comparative data, statistics, features, or options. Structure tables with clear headers and aligned columns. Example table format: \n \n | Feature | Description | Pros | Cons | \n |---------|-------------|------|------| \n | Feature 1 | Description 1 | Pros 1 | Cons 1 | \n | Feature 2 | Description 2 | Pros 2 | Cons 2 | " ,
2025-04-10 11:50:28 +08:00
name = " system " ,
2025-05-17 20:29:41 -07:00
)
2025-04-10 11:50:28 +08:00
)
2025-09-27 06:42:22 -07:00
observation_messages = [ ]
2025-04-07 16:25:55 +08:00
for observation in observations :
2025-09-27 06:42:22 -07:00
observation_messages . append (
2025-04-07 16:25:55 +08:00
HumanMessage (
2025-04-23 16:00:01 +08:00
content = f " Below are some observations for the research task: \n \n { observation } " ,
2025-04-07 16:25:55 +08:00
name = " observation " ,
2025-05-17 20:29:41 -07:00
)
2025-04-07 16:25:55 +08:00
)
2025-09-27 06:42:22 -07:00
# Context compression
llm_token_limit = get_llm_token_limit_by_type ( AGENT_LLM_MAP [ " reporter " ] )
compressed_state = ContextManager ( llm_token_limit ) . compress_messages (
{ " messages " : observation_messages }
)
invoke_messages + = compressed_state . get ( " messages " , [ ] )
2025-04-07 16:25:55 +08:00
logger . debug ( f " Current invoke messages: { invoke_messages } " )
response = get_llm_by_type ( AGENT_LLM_MAP [ " reporter " ] ) . invoke ( invoke_messages )
response_content = response . content
logger . info ( f " reporter response: { response_content } " )
return { " final_report " : response_content }
2025-06-04 21:47:17 -07:00
def research_team_node ( state : State ) :
2025-04-07 16:25:55 +08:00
""" Research team node that collaborates on tasks. """
logger . info ( " Research team is collaborating on tasks. " )
2025-10-27 08:21:30 +08:00
logger . debug ( " Entering research_team_node - coordinating research and coder agents " )
2025-06-04 21:47:17 -07:00
pass
2025-04-07 16:25:55 +08:00
2025-11-24 20:07:28 +08:00
def validate_web_search_usage ( messages : list , agent_name : str = " agent " ) - > bool :
"""
Validate if the agent has used the web search tool during execution .
Args :
messages : List of messages from the agent execution
agent_name : Name of the agent ( for logging purposes )
Returns :
bool : True if web search tool was used , False otherwise
"""
web_search_used = False
for message in messages :
# Check for ToolMessage instances indicating web search was used
if isinstance ( message , ToolMessage ) and message . name == " web_search " :
web_search_used = True
logger . info ( f " [VALIDATION] { agent_name } received ToolMessage from web_search tool " )
break
# Check for AIMessage content that mentions tool calls
if hasattr ( message , ' tool_calls ' ) and message . tool_calls :
for tool_call in message . tool_calls :
if tool_call . get ( ' name ' ) == " web_search " :
web_search_used = True
logger . info ( f " [VALIDATION] { agent_name } called web_search tool " )
break
# break outer loop if web search was used
if web_search_used :
break
# Check for message name attribute
if hasattr ( message , ' name ' ) and message . name == " web_search " :
web_search_used = True
logger . info ( f " [VALIDATION] { agent_name } used web_search tool " )
break
if not web_search_used :
logger . warning ( f " [VALIDATION] { agent_name } did not use web_search tool " )
return web_search_used
2025-04-23 16:00:01 +08:00
async def _execute_agent_step (
2025-11-24 20:07:28 +08:00
state : State , agent , agent_name : str , config : RunnableConfig = None
2025-04-07 16:25:55 +08:00
) - > Command [ Literal [ " research_team " ] ] :
""" Helper function to execute a step using the specified agent. """
2025-10-27 08:21:30 +08:00
logger . debug ( f " [_execute_agent_step] Starting execution for agent: { agent_name } " )
2025-04-07 16:25:55 +08:00
current_plan = state . get ( " current_plan " )
2025-07-22 14:13:10 +08:00
plan_title = current_plan . title
2025-04-21 20:16:08 +08:00
observations = state . get ( " observations " , [ ] )
2025-10-27 08:21:30 +08:00
logger . debug ( f " [_execute_agent_step] Plan title: { plan_title } , observations count: { len ( observations ) } " )
2025-04-07 16:25:55 +08:00
# Find the first unexecuted step
2025-05-14 03:40:14 -07:00
current_step = None
completed_steps = [ ]
2025-10-27 08:21:30 +08:00
for idx , step in enumerate ( current_plan . steps ) :
2025-04-07 16:25:55 +08:00
if not step . execution_res :
2025-05-14 03:40:14 -07:00
current_step = step
2025-10-27 08:21:30 +08:00
logger . debug ( f " [_execute_agent_step] Found unexecuted step at index { idx } : { step . title } " )
2025-04-07 16:25:55 +08:00
break
2025-05-14 03:40:14 -07:00
else :
completed_steps . append ( step )
if not current_step :
2025-10-27 08:21:30 +08:00
logger . warning ( f " [_execute_agent_step] No unexecuted step found in { len ( current_plan . steps ) } total steps " )
2025-10-28 21:45:29 +08:00
return Command (
update = preserve_state_meta_fields ( state ) ,
goto = " research_team "
)
2025-05-14 03:40:14 -07:00
2025-10-27 08:21:30 +08:00
logger . info ( f " [_execute_agent_step] Executing step: { current_step . title } , agent: { agent_name } " )
logger . debug ( f " [_execute_agent_step] Completed steps so far: { len ( completed_steps ) } " )
2025-04-07 16:25:55 +08:00
2025-05-14 03:40:14 -07:00
# Format completed steps information
completed_steps_info = " "
if completed_steps :
2025-07-22 14:13:10 +08:00
completed_steps_info = " # Completed Research Steps \n \n "
2025-05-14 03:40:14 -07:00
for i , step in enumerate ( completed_steps ) :
2025-07-22 14:13:10 +08:00
completed_steps_info + = f " ## Completed Step { i + 1 } : { step . title } \n \n "
2025-05-14 03:40:14 -07:00
completed_steps_info + = f " <finding> \n { step . execution_res } \n </finding> \n \n "
2025-04-07 16:25:55 +08:00
2025-05-14 03:40:14 -07:00
# Prepare the input for the agent with completed steps info
2025-04-07 16:25:55 +08:00
agent_input = {
" messages " : [
HumanMessage (
2025-07-22 14:13:10 +08:00
content = f " # Research Topic \n \n { plan_title } \n \n { completed_steps_info } # Current Step \n \n ## Title \n \n { current_step . title } \n \n ## Description \n \n { current_step . description } \n \n ## Locale \n \n { state . get ( ' locale ' , ' en-US ' ) } "
2025-05-17 20:29:41 -07:00
)
2025-04-07 16:25:55 +08:00
]
}
2025-04-10 11:50:28 +08:00
# Add citation reminder for researcher agent
if agent_name == " researcher " :
2025-05-28 14:13:46 +08:00
if state . get ( " resources " ) :
resources_info = " **The user mentioned the following resource files:** \n \n "
for resource in state . get ( " resources " ) :
resources_info + = f " - { resource . title } ( { resource . description } ) \n "
agent_input [ " messages " ] . append (
HumanMessage (
content = resources_info
+ " \n \n "
+ " You MUST use the **local_search_tool** to retrieve the information from the resource files. " ,
)
)
2025-04-10 11:50:28 +08:00
agent_input [ " messages " ] . append (
HumanMessage (
content = " IMPORTANT: DO NOT include inline citations in the text. Instead, track all sources and include a References section at the end using link reference format. Include an empty line between each citation for better readability. Use this format for each reference: \n - [Source Title](URL) \n \n - [Another Source](URL) " ,
name = " system " ,
2025-05-17 20:29:41 -07:00
)
2025-04-10 11:50:28 +08:00
)
2025-04-07 16:25:55 +08:00
# Invoke the agent
2025-05-18 11:37:03 +08:00
default_recursion_limit = 25
try :
env_value_str = os . getenv ( " AGENT_RECURSION_LIMIT " , str ( default_recursion_limit ) )
parsed_limit = int ( env_value_str )
if parsed_limit > 0 :
recursion_limit = parsed_limit
logger . info ( f " Recursion limit set to: { recursion_limit } " )
else :
logger . warning (
f " AGENT_RECURSION_LIMIT value ' { env_value_str } ' (parsed as { parsed_limit } ) is not positive. "
f " Using default value { default_recursion_limit } . "
)
recursion_limit = default_recursion_limit
except ValueError :
raw_env_value = os . getenv ( " AGENT_RECURSION_LIMIT " )
logger . warning (
f " Invalid AGENT_RECURSION_LIMIT value: ' { raw_env_value } ' . "
f " Using default value { default_recursion_limit } . "
)
recursion_limit = default_recursion_limit
2025-05-28 14:13:46 +08:00
logger . info ( f " Agent input: { agent_input } " )
2025-10-23 22:08:14 +08:00
# Validate message content before invoking agent
try :
validated_messages = validate_message_content ( agent_input [ " messages " ] )
agent_input [ " messages " ] = validated_messages
except Exception as validation_error :
logger . error ( f " Error validating agent input messages: { validation_error } " )
2025-10-19 16:33:14 +08:00
try :
result = await agent . ainvoke (
input = agent_input , config = { " recursion_limit " : recursion_limit }
)
except Exception as e :
import traceback
2025-10-22 22:49:07 +08:00
2025-10-19 16:33:14 +08:00
error_traceback = traceback . format_exc ( )
error_message = f " Error executing { agent_name } agent for step ' { current_step . title } ' : { str ( e ) } "
logger . exception ( error_message )
logger . error ( f " Full traceback: \n { error_traceback } " )
2025-10-23 22:08:14 +08:00
# Enhanced error diagnostics for content-related errors
if " Field required " in str ( e ) and " content " in str ( e ) :
logger . error ( f " Message content validation error detected " )
for i , msg in enumerate ( agent_input . get ( ' messages ' , [ ] ) ) :
logger . error ( f " Message { i } : type= { type ( msg ) . __name__ } , "
f " has_content= { hasattr ( msg , ' content ' ) } , "
f " content_type= { type ( msg . content ) . __name__ if hasattr ( msg , ' content ' ) else ' N/A ' } , "
f " content_len= { len ( str ( msg . content ) ) if hasattr ( msg , ' content ' ) and msg . content else 0 } " )
2025-10-22 22:49:07 +08:00
2025-10-19 16:33:14 +08:00
detailed_error = f " [ERROR] { agent_name . capitalize ( ) } Agent Error \n \n Step: { current_step . title } \n \n Error Details: \n { str ( e ) } \n \n Please check the logs for more information. "
current_step . execution_res = detailed_error
2025-10-22 22:49:07 +08:00
2025-10-19 16:33:14 +08:00
return Command (
update = {
" messages " : [
HumanMessage (
content = detailed_error ,
name = agent_name ,
)
] ,
" observations " : observations + [ detailed_error ] ,
2025-10-28 21:45:29 +08:00
* * preserve_state_meta_fields ( state ) ,
2025-10-19 16:33:14 +08:00
} ,
goto = " research_team " ,
)
2025-04-07 16:25:55 +08:00
# Process the result
response_content = result [ " messages " ] [ - 1 ] . content
2025-10-26 07:24:25 +08:00
# Sanitize response to remove extra tokens and truncate if needed
response_content = sanitize_tool_response ( str ( response_content ) )
2025-04-07 16:25:55 +08:00
logger . debug ( f " { agent_name . capitalize ( ) } full response: { response_content } " )
2025-11-24 20:07:28 +08:00
# Validate web search usage for researcher agent if enforcement is enabled
web_search_validated = True
should_validate = agent_name == " researcher "
validation_info = " "
if should_validate :
# Check if enforcement is enabled in configuration
configurable = Configuration . from_runnable_config ( config ) if config else Configuration ( )
if configurable . enforce_researcher_search :
web_search_validated = validate_web_search_usage ( result [ " messages " ] , agent_name )
# If web search was not used, add a warning to the response
if not web_search_validated :
logger . warning ( f " [VALIDATION] Researcher did not use web_search tool. Adding reminder to response. " )
# Add validation information to observations
validation_info = (
" \n \n [WARNING] This research was completed without using the web_search tool. "
" Please verify that the information provided is accurate and up-to-date. "
" \n \n [VALIDATION WARNING] Researcher did not use the web_search tool as recommended. "
)
2025-04-07 16:25:55 +08:00
# Update the step with the execution result
2025-05-14 03:40:14 -07:00
current_step . execution_res = response_content
logger . info ( f " Step ' { current_step . title } ' execution completed by { agent_name } " )
2025-04-07 16:25:55 +08:00
2025-11-27 21:47:08 +08:00
# Include all messages from agent result to preserve intermediate tool calls/results
# This ensures multiple web_search calls all appear in the stream, not just the final result
agent_messages = result . get ( " messages " , [ ] )
logger . debug (
f " { agent_name . capitalize ( ) } returned { len ( agent_messages ) } messages. "
f " Message types: { [ type ( msg ) . __name__ for msg in agent_messages ] } "
)
# Count tool messages for logging
tool_message_count = sum ( 1 for msg in agent_messages if isinstance ( msg , ToolMessage ) )
if tool_message_count > 0 :
logger . info (
f " { agent_name . capitalize ( ) } agent made { tool_message_count } tool calls. "
f " All tool results will be preserved and streamed to frontend. "
)
2025-04-07 16:25:55 +08:00
return Command (
update = {
2025-11-27 21:47:08 +08:00
" messages " : agent_messages ,
2025-11-24 20:07:28 +08:00
" observations " : observations + [ response_content + validation_info ] ,
2025-10-28 21:45:29 +08:00
* * preserve_state_meta_fields ( state ) ,
2025-04-07 16:25:55 +08:00
} ,
goto = " research_team " ,
)
2025-04-23 16:00:01 +08:00
async def _setup_and_execute_agent_step (
state : State ,
config : RunnableConfig ,
agent_type : str ,
default_tools : list ,
) - > Command [ Literal [ " research_team " ] ] :
""" Helper function to set up an agent with appropriate tools and execute a step.
This function handles the common logic for both researcher_node and coder_node :
1. Configures MCP servers and tools based on agent type
2. Creates an agent with the appropriate tools or uses the default agent
3. Executes the agent on the current step
Args :
state : The current state
config : The runnable config
agent_type : The type of agent ( " researcher " or " coder " )
default_tools : The default tools to add to the agent
Returns :
Command to update state and go to research_team
"""
configurable = Configuration . from_runnable_config ( config )
mcp_servers = { }
2025-04-23 18:02:58 +08:00
enabled_tools = { }
2025-04-23 16:00:01 +08:00
# Extract MCP server configuration for this agent type
if configurable . mcp_settings :
for server_name , server_config in configurable . mcp_settings [ " servers " ] . items ( ) :
if (
server_config [ " enabled_tools " ]
and agent_type in server_config [ " add_to_agents " ]
) :
mcp_servers [ server_name ] = {
k : v
for k , v in server_config . items ( )
2025-08-20 17:23:57 +08:00
if k in ( " transport " , " command " , " args " , " url " , " env " , " headers " )
2025-04-23 16:00:01 +08:00
}
2025-04-23 18:02:58 +08:00
for tool_name in server_config [ " enabled_tools " ] :
enabled_tools [ tool_name ] = server_name
2025-04-23 16:00:01 +08:00
# Create and execute agent with MCP tools if available
if mcp_servers :
2025-08-04 10:36:31 +08:00
client = MultiServerMCPClient ( mcp_servers )
loaded_tools = default_tools [ : ]
all_tools = await client . get_tools ( )
for tool in all_tools :
if tool . name in enabled_tools :
tool . description = (
f " Powered by ' { enabled_tools [ tool . name ] } ' . \n { tool . description } "
)
loaded_tools . append ( tool )
2025-09-27 06:42:22 -07:00
llm_token_limit = get_llm_token_limit_by_type ( AGENT_LLM_MAP [ agent_type ] )
pre_model_hook = partial ( ContextManager ( llm_token_limit , 3 ) . compress_messages )
agent = create_agent (
feat: implement tool-specific interrupts for create_react_agent (#572) (#659)
* feat: implement tool-specific interrupts for create_react_agent (#572)
Add selective tool interrupt capability allowing interrupts before specific tools
rather than all tools. Users can now configure which tools trigger interrupts via
the interrupt_before_tools parameter.
Changes:
- Create ToolInterceptor class to handle tool-specific interrupt logic
- Add interrupt_before_tools parameter to create_agent() function
- Extend Configuration with interrupt_before_tools field
- Add interrupt_before_tools to ChatRequest API
- Update nodes.py to pass interrupt configuration to agents
- Update app.py workflow to support tool interrupt configuration
- Add comprehensive unit tests for tool interceptor
Features:
- Selective tool interrupts: interrupt only specific tools by name
- Approval keywords: recognize user approval (approved, proceed, accept, etc.)
- Backward compatible: optional parameter, existing code unaffected
- Flexible: works with default tools and MCP-powered tools
- Works with existing resume mechanism for seamless workflow
Example usage:
request = ChatRequest(
messages=[...],
interrupt_before_tools=['db_tool', 'sensitive_api']
)
* test: add comprehensive integration tests for tool-specific interrupts (#572)
Add 24 integration tests covering all aspects of the tool interceptor feature:
Test Coverage:
- Agent creation with tool interrupts
- Configuration support (with/without interrupts)
- ChatRequest API integration
- Multiple tools with selective interrupts
- User approval/rejection flows
- Tool wrapping and functionality preservation
- Error handling and edge cases
- Approval keyword recognition
- Complex tool inputs
- Logging and monitoring
All tests pass with 100% coverage of tool interceptor functionality.
Tests verify:
✓ Selective tool interrupts work correctly
✓ Only specified tools trigger interrupts
✓ Non-matching tools execute normally
✓ User feedback is properly parsed
✓ Tool functionality is preserved after wrapping
✓ Error handling works as expected
✓ Configuration options are properly respected
✓ Logging provides useful debugging info
* fix: mock get_llm_by_type in agent creation test
Fix test_agent_creation_with_tool_interrupts which was failing because
get_llm_by_type() was being called before create_react_agent was mocked.
Changes:
- Add mock for get_llm_by_type in test
- Use context manager composition for multiple patches
- Test now passes and validates tool wrapping correctly
All 24 integration tests now pass successfully.
* refactor: use mock assertion methods for consistent and clearer error messages
Update integration tests to use mock assertion methods instead of direct
attribute checking for consistency and clearer error messages:
Changes:
- Replace 'assert mock_interrupt.called' with 'mock_interrupt.assert_called()'
- Replace 'assert not mock_interrupt.called' with 'mock_interrupt.assert_not_called()'
Benefits:
- Consistent with pytest-mock and unittest.mock best practices
- Clearer error messages when assertions fail
- Better IDE autocompletion support
- More professional test code
All 42 tests pass with improved assertion patterns.
* refactor: use default_factory for interrupt_before_tools consistency
Improve consistency between ChatRequest and Configuration implementations:
Changes:
- ChatRequest.interrupt_before_tools: Use Field(default_factory=list) instead of Optional[None]
- Remove unnecessary 'or []' conversion in app.py line 505
- Aligns with Configuration.interrupt_before_tools implementation pattern
- No functional changes - all tests still pass
Benefits:
- Consistent field definition across codebase
- Simpler and cleaner code
- Reduced chance of None/empty list bugs
- Better alignment with Pydantic best practices
All 42 tests passing.
* refactor: improve tool input formatting in interrupt messages
Enhance tool input representation for better readability in interrupt messages:
Changes:
- Add json import for better formatting
- Create _format_tool_input() static method with JSON serialization
- Use JSON formatting for dicts, lists, tuples with indent=2
- Fall back to str() for non-serializable types
- Handle None input specially (returns 'No input')
- Improve interrupt message formatting with better spacing
Benefits:
- Complex tool inputs now display as readable JSON
- Nested structures are properly indented and visible
- Better user experience when reviewing tool inputs before approval
- Handles edge cases gracefully with fallbacks
- Improved logging output for debugging
Example improvements:
Before: {'query': 'SELECT...', 'limit': 10, 'nested': {'key': 'value'}}
After:
{
"query": "SELECT...",
"limit": 10,
"nested": {
"key": "value"
}
}
All 42 tests still passing.
* test: add comprehensive unit tests for tool input formatting
2025-10-26 09:47:03 +08:00
agent_type ,
agent_type ,
loaded_tools ,
agent_type ,
pre_model_hook ,
interrupt_before_tools = configurable . interrupt_before_tools ,
2025-09-27 06:42:22 -07:00
)
2025-11-24 20:07:28 +08:00
return await _execute_agent_step ( state , agent , agent_type , config )
2025-04-23 16:00:01 +08:00
else :
2025-05-17 22:23:52 -07:00
# Use default tools if no MCP servers are configured
2025-09-27 06:42:22 -07:00
llm_token_limit = get_llm_token_limit_by_type ( AGENT_LLM_MAP [ agent_type ] )
pre_model_hook = partial ( ContextManager ( llm_token_limit , 3 ) . compress_messages )
agent = create_agent (
feat: implement tool-specific interrupts for create_react_agent (#572) (#659)
* feat: implement tool-specific interrupts for create_react_agent (#572)
Add selective tool interrupt capability allowing interrupts before specific tools
rather than all tools. Users can now configure which tools trigger interrupts via
the interrupt_before_tools parameter.
Changes:
- Create ToolInterceptor class to handle tool-specific interrupt logic
- Add interrupt_before_tools parameter to create_agent() function
- Extend Configuration with interrupt_before_tools field
- Add interrupt_before_tools to ChatRequest API
- Update nodes.py to pass interrupt configuration to agents
- Update app.py workflow to support tool interrupt configuration
- Add comprehensive unit tests for tool interceptor
Features:
- Selective tool interrupts: interrupt only specific tools by name
- Approval keywords: recognize user approval (approved, proceed, accept, etc.)
- Backward compatible: optional parameter, existing code unaffected
- Flexible: works with default tools and MCP-powered tools
- Works with existing resume mechanism for seamless workflow
Example usage:
request = ChatRequest(
messages=[...],
interrupt_before_tools=['db_tool', 'sensitive_api']
)
* test: add comprehensive integration tests for tool-specific interrupts (#572)
Add 24 integration tests covering all aspects of the tool interceptor feature:
Test Coverage:
- Agent creation with tool interrupts
- Configuration support (with/without interrupts)
- ChatRequest API integration
- Multiple tools with selective interrupts
- User approval/rejection flows
- Tool wrapping and functionality preservation
- Error handling and edge cases
- Approval keyword recognition
- Complex tool inputs
- Logging and monitoring
All tests pass with 100% coverage of tool interceptor functionality.
Tests verify:
✓ Selective tool interrupts work correctly
✓ Only specified tools trigger interrupts
✓ Non-matching tools execute normally
✓ User feedback is properly parsed
✓ Tool functionality is preserved after wrapping
✓ Error handling works as expected
✓ Configuration options are properly respected
✓ Logging provides useful debugging info
* fix: mock get_llm_by_type in agent creation test
Fix test_agent_creation_with_tool_interrupts which was failing because
get_llm_by_type() was being called before create_react_agent was mocked.
Changes:
- Add mock for get_llm_by_type in test
- Use context manager composition for multiple patches
- Test now passes and validates tool wrapping correctly
All 24 integration tests now pass successfully.
* refactor: use mock assertion methods for consistent and clearer error messages
Update integration tests to use mock assertion methods instead of direct
attribute checking for consistency and clearer error messages:
Changes:
- Replace 'assert mock_interrupt.called' with 'mock_interrupt.assert_called()'
- Replace 'assert not mock_interrupt.called' with 'mock_interrupt.assert_not_called()'
Benefits:
- Consistent with pytest-mock and unittest.mock best practices
- Clearer error messages when assertions fail
- Better IDE autocompletion support
- More professional test code
All 42 tests pass with improved assertion patterns.
* refactor: use default_factory for interrupt_before_tools consistency
Improve consistency between ChatRequest and Configuration implementations:
Changes:
- ChatRequest.interrupt_before_tools: Use Field(default_factory=list) instead of Optional[None]
- Remove unnecessary 'or []' conversion in app.py line 505
- Aligns with Configuration.interrupt_before_tools implementation pattern
- No functional changes - all tests still pass
Benefits:
- Consistent field definition across codebase
- Simpler and cleaner code
- Reduced chance of None/empty list bugs
- Better alignment with Pydantic best practices
All 42 tests passing.
* refactor: improve tool input formatting in interrupt messages
Enhance tool input representation for better readability in interrupt messages:
Changes:
- Add json import for better formatting
- Create _format_tool_input() static method with JSON serialization
- Use JSON formatting for dicts, lists, tuples with indent=2
- Fall back to str() for non-serializable types
- Handle None input specially (returns 'No input')
- Improve interrupt message formatting with better spacing
Benefits:
- Complex tool inputs now display as readable JSON
- Nested structures are properly indented and visible
- Better user experience when reviewing tool inputs before approval
- Handles edge cases gracefully with fallbacks
- Improved logging output for debugging
Example improvements:
Before: {'query': 'SELECT...', 'limit': 10, 'nested': {'key': 'value'}}
After:
{
"query": "SELECT...",
"limit": 10,
"nested": {
"key": "value"
}
}
All 42 tests still passing.
* test: add comprehensive unit tests for tool input formatting
2025-10-26 09:47:03 +08:00
agent_type ,
agent_type ,
default_tools ,
agent_type ,
pre_model_hook ,
interrupt_before_tools = configurable . interrupt_before_tools ,
2025-09-27 06:42:22 -07:00
)
2025-11-24 20:07:28 +08:00
return await _execute_agent_step ( state , agent , agent_type , config )
2025-04-23 16:00:01 +08:00
async def researcher_node (
state : State , config : RunnableConfig
) - > Command [ Literal [ " research_team " ] ] :
2025-04-07 16:25:55 +08:00
""" Researcher node that do research """
logger . info ( " Researcher node is researching. " )
2025-10-27 08:21:30 +08:00
logger . debug ( f " [researcher_node] Starting researcher agent " )
2025-05-17 22:23:52 -07:00
configurable = Configuration . from_runnable_config ( config )
2025-10-27 08:21:30 +08:00
logger . debug ( f " [researcher_node] Max search results: { configurable . max_search_results } " )
2025-05-28 14:13:46 +08:00
tools = [ get_web_search_tool ( configurable . max_search_results ) , crawl_tool ]
retriever_tool = get_retriever_tool ( state . get ( " resources " , [ ] ) )
if retriever_tool :
2025-10-27 08:21:30 +08:00
logger . debug ( f " [researcher_node] Adding retriever tool to tools list " )
2025-05-28 14:13:46 +08:00
tools . insert ( 0 , retriever_tool )
2025-10-27 08:21:30 +08:00
logger . info ( f " [researcher_node] Researcher tools count: { len ( tools ) } " )
logger . debug ( f " [researcher_node] Researcher tools: { [ tool . name if hasattr ( tool , ' name ' ) else str ( tool ) for tool in tools ] } " )
2025-11-24 20:07:28 +08:00
logger . info ( f " [researcher_node] enforce_researcher_search is set to: { configurable . enforce_researcher_search } " )
2025-10-27 08:21:30 +08:00
2025-04-23 16:00:01 +08:00
return await _setup_and_execute_agent_step (
state ,
config ,
" researcher " ,
2025-05-28 14:13:46 +08:00
tools ,
2025-04-23 16:00:01 +08:00
)
2025-04-07 16:25:55 +08:00
2025-04-23 16:00:01 +08:00
async def coder_node (
state : State , config : RunnableConfig
) - > Command [ Literal [ " research_team " ] ] :
2025-04-07 16:25:55 +08:00
""" Coder node that do code analysis. """
logger . info ( " Coder node is coding. " )
2025-10-27 08:21:30 +08:00
logger . debug ( f " [coder_node] Starting coder agent with python_repl_tool " )
2025-04-23 16:00:01 +08:00
return await _setup_and_execute_agent_step (
state ,
config ,
" coder " ,
[ python_repl_tool ] ,
)