fix: improve error handling in researcher and coder nodes (#596)

- Wrap agent.ainvoke() calls in try-except blocks - Log full exception tracebacks for better debugging - Return detailed error messages to users instead of generic 'internal error' - Include step title and agent name in error context - Allow workflow to continue gracefully when agent execution fails - Store error details in observations for audit trail
2026-04-03 06:12:14 +08:00 · 2025-10-19 16:33:14 +08:00
parent 497a2a39cf
commit 57c9c2dcd5
1 changed files with 26 additions and 3 deletions
--- a/src/graph/nodes.py
+++ b/src/graph/nodes.py
@@ -678,9 +678,32 @@ async def _execute_agent_step(
        recursion_limit = default_recursion_limit

    logger.info(f"Agent input: {agent_input}")
-    result = await agent.ainvoke(
-        input=agent_input, config={"recursion_limit": recursion_limit}
-    )
+    try:
+        result = await agent.ainvoke(
+            input=agent_input, config={"recursion_limit": recursion_limit}
+        )
+    except Exception as e:
+        import traceback
+        error_traceback = traceback.format_exc()
+        error_message = f"Error executing {agent_name} agent for step '{current_step.title}': {str(e)}"
+        logger.exception(error_message)
+        logger.error(f"Full traceback:\n{error_traceback}")
+        
+        detailed_error = f"[ERROR] {agent_name.capitalize()} Agent Error\n\nStep: {current_step.title}\n\nError Details:\n{str(e)}\n\nPlease check the logs for more information."
+        current_step.execution_res = detailed_error
+        
+        return Command(
+            update={
+                "messages": [
+                    HumanMessage(
+                        content=detailed_error,
+                        name=agent_name,
+                    )
+                ],
+                "observations": observations + [detailed_error],
+            },
+            goto="research_team",
+        )

    # Process the result
    response_content = result["messages"][-1].content