fix: improve error handling in researcher and coder nodes (#596)

- Wrap agent.ainvoke() calls in try-except blocks
- Log full exception tracebacks for better debugging
- Return detailed error messages to users instead of generic 'internal error'
- Include step title and agent name in error context
- Allow workflow to continue gracefully when agent execution fails
- Store error details in observations for audit trail
This commit is contained in:
Willem Jiang
2025-10-19 16:33:14 +08:00
committed by GitHub
parent 497a2a39cf
commit 57c9c2dcd5

View File

@@ -678,9 +678,32 @@ async def _execute_agent_step(
recursion_limit = default_recursion_limit
logger.info(f"Agent input: {agent_input}")
result = await agent.ainvoke(
input=agent_input, config={"recursion_limit": recursion_limit}
)
try:
result = await agent.ainvoke(
input=agent_input, config={"recursion_limit": recursion_limit}
)
except Exception as e:
import traceback
error_traceback = traceback.format_exc()
error_message = f"Error executing {agent_name} agent for step '{current_step.title}': {str(e)}"
logger.exception(error_message)
logger.error(f"Full traceback:\n{error_traceback}")
detailed_error = f"[ERROR] {agent_name.capitalize()} Agent Error\n\nStep: {current_step.title}\n\nError Details:\n{str(e)}\n\nPlease check the logs for more information."
current_step.execution_res = detailed_error
return Command(
update={
"messages": [
HumanMessage(
content=detailed_error,
name=agent_name,
)
],
"observations": observations + [detailed_error],
},
goto="research_team",
)
# Process the result
response_content = result["messages"][-1].content