feat: add analysis step type for non-code reasoning tasks (#677) (#723)

Add a new "analysis" step type to handle reasoning and synthesis tasks that don't require code execution, addressing the concern that routing all non-search tasks to the coder agent was inappropriate. Changes: - Add ANALYSIS enum value to StepType in planner_model.py - Create analyst_node for pure LLM reasoning without tools - Update graph routing to route analysis steps to analyst agent - Add analyst agent to AGENT_LLM_MAP configuration - Create analyst prompts (English and Chinese) - Update planner prompts with guidance on choosing between analysis (reasoning/synthesis) and processing (code execution) - Change default step_type inference from "processing" to "analysis" when need_search=false Co-authored-by: Willem Jiang <143703838+willem-bd@users.noreply.github.com>
2026-04-03 06:12:14 +08:00 · 2025-11-29 09:46:55 +08:00
parent 2cc19d6309
commit 2e010a4619
10 changed files with 264 additions and 67 deletions
--- a/src/config/agents.py
+++ b/src/config/agents.py
@@ -11,6 +11,7 @@ AGENT_LLM_MAP: dict[str, LLMType] = {
    "coordinator": "basic",
    "planner": "basic",
    "researcher": "basic",
+    "analyst": "basic",
    "coder": "basic",
    "reporter": "basic",
    "podcast_script_writer": "basic",
--- a/src/graph/builder.py
+++ b/src/graph/builder.py
@@ -7,6 +7,7 @@ from langgraph.graph import END, START, StateGraph
 from src.prompts.planner_model import StepType

 from .nodes import (
+    analyst_node,
    background_investigation_node,
    coder_node,
    coordinator_node,
@@ -39,6 +40,8 @@ def continue_to_running_research_team(state: State):

    if incomplete_step.step_type == StepType.RESEARCH:
        return "researcher"
+    if incomplete_step.step_type == StepType.ANALYSIS:
+        return "analyst"
    if incomplete_step.step_type == StepType.PROCESSING:
        return "coder"
    return "planner"
@@ -54,13 +57,14 @@ def _build_base_graph():
    builder.add_node("reporter", reporter_node)
    builder.add_node("research_team", research_team_node)
    builder.add_node("researcher", researcher_node)
+    builder.add_node("analyst", analyst_node)
    builder.add_node("coder", coder_node)
    builder.add_node("human_feedback", human_feedback_node)
    builder.add_edge("background_investigator", "planner")
    builder.add_conditional_edges(
        "research_team",
        continue_to_running_research_team,
-        ["planner", "researcher", "coder"],
+        ["planner", "researcher", "analyst", "coder"],
    )
    builder.add_edge("reporter", END)
    return builder
--- a/src/graph/nodes.py
+++ b/src/graph/nodes.py
@@ -135,7 +135,8 @@ def validate_and_fix_plan(plan: dict, enforce_web_search: bool = False) -> dict:
        # Check if step_type is missing or empty
        if "step_type" not in step or not step.get("step_type"):
            # Infer step_type based on need_search value
-            inferred_type = "research" if step.get("need_search", False) else "processing"
+            # Default to "analysis" for non-search steps (Issue #677: not all processing needs code)
+            inferred_type = "research" if step.get("need_search", False) else "analysis"
            step["step_type"] = inferred_type
            logger.info(
                f"Repaired missing step_type for step {idx} ({step.get('title', 'Untitled')}): "
@@ -1209,3 +1210,27 @@ async def coder_node(
        "coder",
        [python_repl_tool],
    )
+
+
+async def analyst_node(
+    state: State, config: RunnableConfig
+) -> Command[Literal["research_team"]]:
+    """Analyst node that performs reasoning and analysis without code execution.
+    
+    This node handles tasks like:
+    - Cross-validating information from multiple sources
+    - Synthesizing research findings
+    - Comparative analysis
+    - Pattern recognition and trend analysis
+    - General reasoning tasks that don't require code
+    """
+    logger.info("Analyst node is analyzing.")
+    logger.debug(f"[analyst_node] Starting analyst agent for reasoning/analysis tasks")
+    
+    # Analyst uses no tools - pure LLM reasoning
+    return await _setup_and_execute_agent_step(
+        state,
+        config,
+        "analyst",
+        [],  # No tools - pure reasoning
+    )
--- a/src/prompts/analyst.md
+++ b/src/prompts/analyst.md
@@ -0,0 +1,43 @@
+---
+CURRENT_TIME: {{ CURRENT_TIME }}
+---
+
+You are `analyst` agent that is managed by `supervisor` agent.
+You are a professional research analyst with expertise in synthesizing information, identifying patterns, and providing insightful analysis. Your task is to analyze, compare, validate, and synthesize information from research findings without writing code.
+
+# Steps
+
+1. **Understand the Task**: Carefully review the analysis requirements to understand what insights, comparisons, or syntheses are needed.
+2. **Review Available Information**: Examine all provided research findings and context carefully.
+3. **Perform Analysis**: Apply critical thinking to:
+   - Identify patterns, trends, and relationships in the data
+   - Compare and contrast different sources or perspectives
+   - Validate and cross-reference information for accuracy
+   - Synthesize findings into coherent insights
+   - Draw logical conclusions based on evidence
+4. **Structure Your Response**: Organize your analysis in a clear, logical manner with:
+   - Key findings and insights
+   - Supporting evidence and reasoning
+   - Comparisons and contrasts where relevant
+   - Conclusions and implications
+
+# Analysis Capabilities
+
+You excel at:
+- **Cross-validation**: Verifying information across multiple sources
+- **Comparative Analysis**: Identifying similarities, differences, and trade-offs
+- **Pattern Recognition**: Finding trends, correlations, and anomalies
+- **Synthesis**: Combining multiple pieces of information into coherent narratives
+- **Critical Evaluation**: Assessing the reliability and significance of findings
+- **Gap Analysis**: Identifying missing information or unanswered questions
+- **Implication Assessment**: Understanding the broader meaning of findings
+
+# Notes
+
+- Focus on providing thoughtful, well-reasoned analysis
+- Support your conclusions with evidence from the research findings
+- Be objective and consider multiple perspectives
+- Highlight uncertainties or limitations in the analysis
+- Use clear, professional language
+- Do NOT write or execute code - focus purely on reasoning and analysis
+- Always output in the locale of **{{ locale }}**.
--- a/src/prompts/analyst.zh_CN.md
+++ b/src/prompts/analyst.zh_CN.md
@@ -0,0 +1,43 @@
+---
+CURRENT_TIME: {{ CURRENT_TIME }}
+---
+
+你是由 `supervisor` 管理的 `analyst` 代理。
+你是一位专业的研究分析师，擅长综合信息、识别模式和提供深入分析。你的任务是分析、比较、验证和综合研究成果中的信息，而无需编写代码。
+
+# 步骤
+
+1. **理解任务**：仔细审查分析需求，了解需要什么见解、比较或综合。
+2. **审查可用信息**：仔细检查所有提供的研究发现和上下文。
+3. **执行分析**：运用批判性思维进行：
+   - 识别数据中的模式、趋势和关系
+   - 比较和对比不同的来源或观点
+   - 验证和交叉引用信息以确保准确性
+   - 将发现综合成连贯的见解
+   - 基于证据得出合理的结论
+4. **组织你的回复**：以清晰、合理的方式组织你的分析，包括：
+   - 关键发现和见解
+   - 支持性证据和推理
+   - 相关的比较和对比
+   - 结论和启示
+
+# 分析能力
+
+你擅长：
+- **交叉验证**：跨多个来源验证信息
+- **比较分析**：识别相似性、差异和权衡
+- **模式识别**：发现趋势、相关性和异常
+- **综合**：将多条信息组合成连贯的叙述
+- **批判性评估**：评估发现的可靠性和重要性
+- **差距分析**：识别缺失的信息或未回答的问题
+- **影响评估**：理解发现的更广泛意义
+
+# 注意事项
+
+- 专注于提供深思熟虑、有理有据的分析
+- 用研究发现中的证据支持你的结论
+- 保持客观并考虑多种观点
+- 强调分析中的不确定性或局限性
+- 使用清晰、专业的语言
+- 不要编写或执行代码 - 专注于推理和分析
+- 始终使用 **{{ locale }}** 语言输出。
--- a/src/prompts/planner.md
+++ b/src/prompts/planner.md
@@ -55,9 +55,9 @@ Before creating a detailed plan, assess if there is sufficient context to answer

 ## Step Types and Web Search

-Different types of steps have different web search requirements:
+Different types of steps have different requirements and are handled by specialized agents:

-1. **Research Steps** (`need_search: true`):
+1. **Research Steps** (`step_type: "research"`, `need_search: true`):
   - Retrieve information from the file with the URL with `rag://` or `http://` prefix specified by the user
   - Gathering market data or industry trends
   - Finding historical information
@@ -66,21 +66,44 @@ Different types of steps have different web search requirements:
   - Finding statistical data or reports
   - **CRITICAL**: Research plans MUST include at least one step with `need_search: true` to gather real information
   - Without web search, the report will contain hallucinated/fabricated data
+   - **Handled by**: Researcher agent (has web search and crawling tools)

-2. **Data Processing Steps** (`need_search: false`):
-   - API calls and data extraction
-   - Database queries
-   - Raw data collection from existing sources
-   - Mathematical calculations and analysis
-   - Statistical computations and data processing
-   - **NOTE**: Processing steps alone are insufficient - you must include research steps with web search
+2. **Analysis Steps** (`step_type: "analysis"`, `need_search: false`):
+   - Cross-validating information from multiple sources
+   - Synthesizing findings into coherent insights
+   - Comparing and contrasting different perspectives
+   - Identifying patterns, trends, and relationships
+   - Drawing conclusions from collected data
+   - Evaluating reliability and significance of findings
+   - General reasoning and critical thinking tasks
+   - **Handled by**: Analyst agent (pure LLM reasoning, no tools)
+
+3. **Processing Steps** (`step_type: "processing"`, `need_search: false`):
+   - Mathematical calculations and statistical analysis
+   - Data manipulation and transformation using Python
+   - Algorithm implementation and numerical computations
+   - Code execution for data processing
+   - Creating visualizations or data outputs
+   - **Handled by**: Coder agent (has Python REPL tool)
+
+## Choosing Between Analysis and Processing Steps
+
+Use **analysis** steps when:
+- The task requires reasoning, synthesis, or critical evaluation
+- No code execution is needed
+- The goal is to understand, compare, or interpret information
+
+Use **processing** steps when:
+- The task requires actual code execution
+- Mathematical calculations or statistical computations are needed
+- Data needs to be transformed or manipulated programmatically

 ## Web Search Requirement

 **MANDATORY**: Every research plan MUST include at least one step with `need_search: true`. This is critical because:
 - Without web search, models generate hallucinated data
 - Research steps must gather real information from external sources
- Pure processing steps cannot generate credible information for the final report
+- Pure analysis/processing steps cannot generate credible information for the final report
 - At least one research step must search the web for factual data

 ## Exclusions
@@ -170,13 +193,14 @@ When planning information gathering, consider these key aspects and ensure COMPR

 For each step you create, you MUST explicitly set ONE of these values:
 - `"research"` - For steps that gather information via web search or retrieval (when `need_search: true`)
- `"processing"` - For steps that analyze, compute, or process data without web search (when `need_search: false`)
+- `"analysis"` - For steps that synthesize, compare, validate, or reason about collected data (when `need_search: false` and NO code is needed)
+- `"processing"` - For steps that require code execution for calculations or data processing (when `need_search: false` and code IS needed)

 **Validation Checklist - For EVERY Step, Verify ALL 4 Fields Are Present:**
 - [ ] `need_search`: Must be either `true` or `false`
 - [ ] `title`: Must describe what the step does
- [ ] `description`: Must specify exactly what data to collect
- [ ] `step_type`: Must be either `"research"` or `"processing"`
+- [ ] `description`: Must specify exactly what data to collect or what analysis to perform
+- [ ] `step_type`: Must be `"research"`, `"analysis"`, or `"processing"`

 **Common Mistake to Avoid:**
 - ❌ WRONG: `{"need_search": true, "title": "...", "description": "..."}`  (missing `step_type`)
@@ -184,7 +208,8 @@ For each step you create, you MUST explicitly set ONE of these values:

 **Step Type Assignment Rules:**
 - If `need_search` is `true` → use `step_type: "research"`
- If `need_search` is `false` → use `step_type: "processing"`
+- If `need_search` is `false` AND task requires reasoning/synthesis → use `step_type: "analysis"`
+- If `need_search` is `false` AND task requires code execution → use `step_type: "processing"`

 Failure to include `step_type` for any step will cause validation errors and prevent the research plan from executing.

@@ -200,8 +225,8 @@ The `Plan` interface is defined as follows:
 interface Step {
  need_search: boolean; // Must be explicitly set for each step
  title: string;
-  description: string; // Specify exactly what data to collect. If the user input contains a link, please retain the full Markdown format when necessary.
-  step_type: "research" | "processing"; // Indicates the nature of the step
+  description: string; // Specify exactly what data to collect or what analysis to perform
+  step_type: "research" | "analysis" | "processing"; // Indicates the nature of the step
 }

 interface Plan {
@@ -209,11 +234,11 @@ interface Plan {
  has_enough_context: boolean;
  thought: string;
  title: string;
-  steps: Step[]; // Research & Processing steps to get more context
+  steps: Step[]; // Research, Analysis & Processing steps to get more context
 }
 ```

-**Example Output (with BOTH research and processing steps):**
+**Example Output (with research, analysis, and processing steps):**
 ```json
 {
  "locale": "en-US",
@@ -235,26 +260,36 @@ interface Plan {
    },
    {
      "need_search": false,
-      "title": "Synthesize and Analyze Market Data",
-      "description": "Analyze and synthesize all collected data to identify patterns, calculate market growth projections, compare competitor positions, and create data visualizations.",
+      "title": "Cross-validate and Synthesize Findings",
+      "description": "Compare information from different sources, identify patterns and trends, evaluate reliability of data, and synthesize key insights from the research.",
+      "step_type": "analysis"
+    },
+    {
+      "need_search": false,
+      "title": "Calculate Market Projections",
+      "description": "Use Python to calculate market growth projections, create statistical analysis, and generate data visualizations based on the collected data.",
      "step_type": "processing"
    }
  ]
 }
 ```

-**NOTE:** Every step must have a `step_type` field set to either `"research"` or `"processing"`. Research steps (with `need_search: true`) gather data. Processing steps (with `need_search: false`) analyze the gathered data.
+**NOTE:** Every step must have a `step_type` field set to `"research"`, `"analysis"`, or `"processing"`:
+- **Research steps** (with `need_search: true`): Gather data from external sources
+- **Analysis steps** (with `need_search: false`): Synthesize, compare, and reason about collected data (no code)
+- **Processing steps** (with `need_search: false`): Execute code for calculations and data processing

 # Notes

- Focus on information gathering in research steps - delegate all calculations to processing steps
+- Focus on information gathering in research steps - delegate reasoning to analysis steps and calculations to processing steps
 - Ensure each step has a clear, specific data point or information to collect
 - Create a comprehensive data collection plan that covers the most critical aspects within {{ max_step_num }} steps
 - Prioritize BOTH breadth (covering essential aspects) AND depth (detailed information on each aspect)
 - Never settle for minimal information - the goal is a comprehensive, detailed final report
 - Limited or insufficient information will lead to an inadequate final report
- Carefully assess each step's web search or retrieve from URL requirement based on its nature:
-  - Research steps (`need_search: true`) for gathering information
-  - Processing steps (`need_search: false`) for calculations and data processing
+- Carefully assess each step's requirements:
+  - Research steps (`need_search: true`) for gathering information from external sources
+  - Analysis steps (`need_search: false`) for reasoning, synthesis, and evaluation tasks
+  - Processing steps (`need_search: false`) for code execution and calculations
 - Default to gathering more information unless the strictest sufficient context criteria are met
 - Always use the language specified by the locale = **{{ locale }}**.
--- a/src/prompts/planner.zh_CN.md
+++ b/src/prompts/planner.zh_CN.md
@@ -55,9 +55,9 @@ CURRENT_TIME: {{ CURRENT_TIME }}

 ## 步骤类型和网络搜索

-不同类型的步骤有不同的网络搜索要求：
+不同类型的步骤有不同的要求，并由专门的代理处理：

-1. **研究步骤**（`need_search: true`）：
+1. **研究步骤**（`step_type: "research"`，`need_search: true`）：
   - 从用户指定的带有`rag://`或`http://`前缀的URL中的文件中检索信息
   - 收集市场数据或行业趋势
   - 查找历史信息
@@ -66,21 +66,44 @@ CURRENT_TIME: {{ CURRENT_TIME }}
   - 查找统计数据或报告
   - **关键**：研究计划必须至少包括一个带有`need_search: true`的步骤来收集真实信息
   - 没有网络搜索，报告将包含幻觉/虚构数据
+   - **处理者**：研究员代理（具有网络搜索和爬取工具）

-2. **数据处理步骤**（`need_search: false`）：
-   - API调用和数据提取
-   - 数据库查询
-   - 从现有来源进行原始数据收集
-   - 数学计算和分析
-   - 统计计算和数据处理
-   - **注意**：仅处理步骤不足——你必须包括带网络搜索的研究步骤
+2. **分析步骤**（`step_type: "analysis"`，`need_search: false`）：
+   - 从多个来源交叉验证信息
+   - 将发现综合成连贯的见解
+   - 比较和对比不同的观点
+   - 识别模式、趋势和关系
+   - 从收集的数据中得出结论
+   - 评估发现的可靠性和重要性
+   - 一般推理和批判性思维任务
+   - **处理者**：分析师代理（纯LLM推理，无工具）
+
+3. **处理步骤**（`step_type: "processing"`，`need_search: false`）：
+   - 使用Python进行数学计算和统计分析
+   - 数据操作和转换
+   - 算法实现和数值计算
+   - 用于数据处理的代码执行
+   - 创建可视化或数据输出
+   - **处理者**：编码代理（具有Python REPL工具）
+
+## 选择分析步骤还是处理步骤
+
+使用**分析**步骤当：
+- 任务需要推理、综合或批判性评估
+- 不需要代码执行
+- 目标是理解、比较或解释信息
+
+使用**处理**步骤当：
+- 任务需要实际的代码执行
+- 需要数学计算或统计计算
+- 数据需要以编程方式转换或操作

 ## 网络搜索要求

 **强制**：每个研究计划必须至少包括一个带有`need_search: true`的步骤。这很关键，因为：
 - 没有网络搜索，模型生成幻觉数据
 - 研究步骤必须从外部来源收集真实信息
- 纯处理步骤无法为最终报告生成可信信息
+- 纯分析/处理步骤无法为最终报告生成可信信息
 - 至少一个研究步骤必须进行网络搜索以获取事实数据

 ## 排除
@@ -170,13 +193,14 @@ CURRENT_TIME: {{ CURRENT_TIME }}

 对于你创建的每个步骤，你必须显式设置以下值之一：
 - `"research"` - 用于通过网络搜索或检索来收集信息的步骤（当`need_search: true`时）
- `"processing"` - 用于分析、计算或处理数据而不进行网络搜索的步骤（当`need_search: false`时）
+- `"analysis"` - 用于综合、比较、验证或推理收集数据的步骤（当`need_search: false`且不需要代码时）
+- `"processing"` - 用于需要代码执行进行计算或数据处理的步骤（当`need_search: false`且需要代码时）

 **验证清单 - 对于每一个步骤，验证所有4个字段都存在：**
 - [ ] `need_search`：必须是`true`或`false`
 - [ ] `title`：必须描述步骤的作用
- [ ] `description`：必须指定要收集的确切数据
- [ ] `step_type`：必须是`"research"`或`"processing"`
+- [ ] `description`：必须指定要收集的确切数据或要执行的分析
+- [ ] `step_type`：必须是`"research"`、`"analysis"`或`"processing"`

 **常见错误避免：**
 - ❌ 错误：`{"need_search": true, "title": "...", "description": "..."}` （缺少`step_type`）
@@ -184,7 +208,8 @@ CURRENT_TIME: {{ CURRENT_TIME }}

 **步骤类型分配规则：**
 - 如果`need_search`是`true` → 使用`step_type: "research"`
- 如果`need_search`是`false` → 使用`step_type: "processing"`
+- 如果`need_search`是`false`且任务需要推理/综合 → 使用`step_type: "analysis"`
+- 如果`need_search`是`false`且任务需要代码执行 → 使用`step_type: "processing"`

 任何步骤缺少`step_type`都将导致验证错误，阻止研究计划执行。

@@ -200,8 +225,8 @@ CURRENT_TIME: {{ CURRENT_TIME }}
 interface Step {
  need_search: boolean; // 必须为每个步骤显式设置
  title: string;
-  description: string; // 指定要收集的确切数据。如果用户输入包含链接，在必要时保留完整的Markdown格式。
-  step_type: "research" | "processing"; // 指示步骤的性质
+  description: string; // 指定要收集的确切数据或要执行的分析
+  step_type: "research" | "analysis" | "processing"; // 指示步骤的性质
 }

 interface Plan {
@@ -209,11 +234,11 @@ interface Plan {
  has_enough_context: boolean;
  thought: string;
  title: string;
-  steps: Step[]; // 获取更多背景的研究和处理步骤
+  steps: Step[]; // 获取更多背景的研究、分析和处理步骤
 }
 ```

-**示例输出（包含研究步骤和处理步骤）：**
+**示例输出（包含研究、分析和处理步骤）：**
 ```json
 {
  "locale": "zh-CN",
@@ -235,26 +260,36 @@ interface Plan {
    },
    {
      "need_search": false,
-      "title": "综合和分析市场数据",
-      "description": "分析和综合所有收集的数据，以识别模式、计算市场增长预测、比较竞争对手位置并创建数据可视化。",
+      "title": "交叉验证和综合发现",
+      "description": "比较不同来源的信息，识别模式和趋势，评估数据的可靠性，并综合研究中的关键见解。",
+      "step_type": "analysis"
+    },
+    {
+      "need_search": false,
+      "title": "计算市场预测",
+      "description": "使用Python根据收集的数据计算市场增长预测、创建统计分析并生成数据可视化。",
      "step_type": "processing"
    }
  ]
 }
 ```

-**注意：** 每个步骤必须有一个`step_type`字段，设置为`"research"`或`"processing"`。研究步骤（带有`need_search: true`）收集数据。处理步骤（带有`need_search: false`）分析收集的数据。
+**注意：** 每个步骤必须有一个`step_type`字段，设置为`"research"`、`"analysis"`或`"processing"`：
+- **研究步骤**（带有`need_search: true`）：从外部来源收集数据
+- **分析步骤**（带有`need_search: false`）：综合、比较和推理收集的数据（无代码）
+- **处理步骤**（带有`need_search: false`）：执行代码进行计算和数据处理

 # 注意

- 在研究步骤中关注信息收集——将所有计算委托给处理步骤
+- 在研究步骤中关注信息收集——将推理委托给分析步骤，将计算委托给处理步骤
 - 确保每个步骤都有明确、具体的数据点或要收集的信息
 - 创建在{{ max_step_num }}步内涵盖最关键方面的全面数据收集计划
 - 优先考虑广度（涵盖基本方面）和深度（关于每个方面的详细信息）
 - 永不满足于最少的信息——目标是全面、详细的最终报告
 - 信息有限或不足将导致不充分的最终报告
- 仔细评估每个步骤基于其性质的网络搜索或从URL检索要求：
-  - 研究步骤（`need_search: true`）用于收集信息
-  - 处理步骤（`need_search: false`）用于计算和数据处理
+- 仔细评估每个步骤的要求：
+  - 研究步骤（`need_search: true`）用于从外部来源收集信息
+  - 分析步骤（`need_search: false`）用于推理、综合和评估任务
+  - 处理步骤（`need_search: false`）用于代码执行和计算
 - 除非满足最严格的充分背景标准，否则默认收集更多信息
 - 始终使用locale = **{{ locale }}**指定的语言。
--- a/src/prompts/planner_model.py
+++ b/src/prompts/planner_model.py
@@ -9,6 +9,7 @@ from pydantic import BaseModel, Field

 class StepType(str, Enum):
    RESEARCH = "research"
+    ANALYSIS = "analysis"
    PROCESSING = "processing"


--- a/tests/integration/test_nodes.py
+++ b/tests/integration/test_nodes.py
@@ -2103,7 +2103,8 @@ def test_planner_node_issue_650_missing_step_type_basic():
    # Verify all steps have step_type after fix
    assert isinstance(fixed_plan, dict)
    assert fixed_plan["steps"][0]["step_type"] == "research"
-    assert fixed_plan["steps"][1]["step_type"] == "processing"
+    # Issue #677: non-search steps now default to "analysis" instead of "processing"
+    assert fixed_plan["steps"][1]["step_type"] == "analysis"
    assert all("step_type" in step for step in fixed_plan["steps"])


@@ -2147,7 +2148,8 @@ def test_planner_node_issue_650_water_footprint_scenario():
    assert len(fixed_plan["steps"]) == 3
    assert fixed_plan["steps"][0]["step_type"] == "research"
    assert fixed_plan["steps"][1]["step_type"] == "research"
-    assert fixed_plan["steps"][2]["step_type"] == "processing"
+    # Issue #677: non-search steps now default to "analysis" instead of "processing"
+    assert fixed_plan["steps"][2]["step_type"] == "analysis"
    assert all("step_type" in step for step in fixed_plan["steps"])


@@ -2272,7 +2274,8 @@ def test_plan_validation_with_all_issue_650_error_scenarios():
        # All steps should have step_type after fix
        for step in fixed["steps"]:
            assert "step_type" in step
-            assert step["step_type"] in ["research", "processing"]
+            # Issue #677: 'analysis' is now a valid step_type
+            assert step["step_type"] in ["research", "analysis", "processing"]

 def test_clarification_skips_specific_topics():
    """Coordinator should skip clarification for already specific topics."""
--- a/tests/unit/graph/test_plan_validation.py
+++ b/tests/unit/graph/test_plan_validation.py
@@ -29,7 +29,7 @@ class TestValidateAndFixPlanStepTypeRepair:
        assert result["steps"][0]["step_type"] == "research"

    def test_repair_missing_step_type_with_need_search_false(self):
-        """Test that missing step_type is inferred as 'processing' when need_search=false."""
+        """Test that missing step_type is inferred as 'analysis' when need_search=false (Issue #677)."""
        plan = {
            "steps": [
                {
@@ -43,10 +43,11 @@ class TestValidateAndFixPlanStepTypeRepair:

        result = validate_and_fix_plan(plan)

-        assert result["steps"][0]["step_type"] == "processing"
+        # Issue #677: non-search steps now default to 'analysis' instead of 'processing'
+        assert result["steps"][0]["step_type"] == "analysis"

-    def test_repair_missing_step_type_default_to_processing(self):
-        """Test that missing step_type defaults to 'processing' when need_search is not specified."""
+    def test_repair_missing_step_type_default_to_analysis(self):
+        """Test that missing step_type defaults to 'analysis' when need_search is not specified (Issue #677)."""
        plan = {
            "steps": [
                {
@@ -59,7 +60,8 @@ class TestValidateAndFixPlanStepTypeRepair:

        result = validate_and_fix_plan(plan)

-        assert result["steps"][0]["step_type"] == "processing"
+        # Issue #677: non-search steps now default to 'analysis' instead of 'processing'
+        assert result["steps"][0]["step_type"] == "analysis"

    def test_repair_empty_step_type_field(self):
        """Test that empty step_type field is repaired."""
@@ -93,7 +95,8 @@ class TestValidateAndFixPlanStepTypeRepair:

        result = validate_and_fix_plan(plan)

-        assert result["steps"][0]["step_type"] == "processing"
+        # Issue #677: non-search steps now default to 'analysis' instead of 'processing'
+        assert result["steps"][0]["step_type"] == "analysis"

    def test_multiple_steps_with_mixed_missing_step_types(self):
        """Test repair of multiple steps with different missing step_type scenarios."""
@@ -328,7 +331,8 @@ class TestValidateAndFixPlanIntegration:

        # step_type should be repaired
        assert result["steps"][0]["step_type"] == "research"
-        assert result["steps"][1]["step_type"] == "processing"
+        # Issue #677: non-search steps now default to 'analysis' instead of 'processing'
+        assert result["steps"][1]["step_type"] == "analysis"

        # First research step should have web search (already has it)
        assert result["steps"][0]["need_search"] is True
@@ -354,12 +358,13 @@ class TestValidateAndFixPlanIntegration:

        result = validate_and_fix_plan(plan, enforce_web_search=True)

-        # Step 1: Originally processing but converted to research with web search enforcement
+        # Step 1: Originally analysis (from auto-repair) but converted to research with web search enforcement
        assert result["steps"][0]["step_type"] == "research"
        assert result["steps"][0]["need_search"] is True

-        # Step 2: Should remain as processing since enforcement already satisfied by step 1
-        assert result["steps"][1]["step_type"] == "processing"
+        # Step 2: Should remain as analysis since enforcement already satisfied by step 1
+        # Issue #677: non-search steps now default to 'analysis' instead of 'processing'
+        assert result["steps"][1]["step_type"] == "analysis"
        assert result["steps"][1]["need_search"] is False

 class TestValidateAndFixPlanIssue650:
@@ -400,7 +405,8 @@ class TestValidateAndFixPlanIssue650:
        # All steps should now have step_type
        assert result["steps"][0]["step_type"] == "research"
        assert result["steps"][1]["step_type"] == "research"
-        assert result["steps"][2]["step_type"] == "processing"
+        # Issue #677: non-search steps now default to 'analysis' instead of 'processing'
+        assert result["steps"][2]["step_type"] == "analysis"

    def test_issue_650_scenario_passes_pydantic_validation(self):
        """Test that fixed plan can be validated by Pydantic schema."""
@@ -461,7 +467,8 @@ class TestValidateAndFixPlanIssue650:
        # All steps should have step_type now
        for step in result["steps"]:
            assert "step_type" in step
-            assert step["step_type"] in ["research", "processing"]
+            # Issue #677: 'analysis' is now a valid step_type
+            assert step["step_type"] in ["research", "analysis", "processing"]

    def test_issue_650_no_exceptions_raised(self):
        """Test that validate_and_fix_plan handles all edge cases without raising exceptions."""