src/graph/utils.py

# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT

from typing import Any

ASSISTANT_SPEAKER_NAMES = {
    "coordinator",
    "planner",
    "researcher",
    "coder",
    "reporter",
    "background_investigator",
}


def get_message_content(message: Any) -> str:
    """Extract message content from dict or LangChain message."""
    if isinstance(message, dict):
        return message.get("content", "")
    return getattr(message, "content", "")


def is_user_message(message: Any) -> bool:
    """Return True if the message originated from the end user."""
    if isinstance(message, dict):
        role = (message.get("role") or "").lower()
        if role in {"user", "human"}:
            return True
        if role in {"assistant", "system"}:
            return False
        name = (message.get("name") or "").lower()
        if name and name in ASSISTANT_SPEAKER_NAMES:
            return False
        return role == "" and name not in ASSISTANT_SPEAKER_NAMES

    message_type = (getattr(message, "type", "") or "").lower()
    name = (getattr(message, "name", "") or "").lower()
    if message_type == "human":
        return not (name and name in ASSISTANT_SPEAKER_NAMES)

    role_attr = getattr(message, "role", None)
    if isinstance(role_attr, str) and role_attr.lower() in {"user", "human"}:
        return True

    additional_role = getattr(message, "additional_kwargs", {}).get("role")
    if isinstance(additional_role, str) and additional_role.lower() in {
        "user",
        "human",
    }:
        return True

    return False


def get_latest_user_message(messages: list[Any]) -> tuple[Any, str]:
    """Return the latest user-authored message and its content."""
    for message in reversed(messages or []):
        if is_user_message(message):
            content = get_message_content(message)
            if content:
                return message, content
    return None, ""


def build_clarified_topic_from_history(
    clarification_history: list[str],
) -> tuple[str, list[str]]:
    """Construct clarified topic string from an ordered clarification history."""
    sequence = [item for item in clarification_history if item]
    if not sequence:
        return "", []
    if len(sequence) == 1:
        return sequence[0], sequence
    head, *tail = sequence
    clarified_string = f"{head} - {', '.join(tail)}"
    return clarified_string, sequence


def reconstruct_clarification_history(
    messages: list[Any],
    fallback_history: list[str] | None = None,
    base_topic: str = "",
) -> list[str]:
    """Rebuild clarification history from user-authored messages, with fallback.

    Args:
        messages: Conversation messages in chronological order.
        fallback_history: Optional existing history to use if no user messages found.
        base_topic: Optional topic to use when no user messages are available.

    Returns:
        A cleaned clarification history containing unique consecutive user contents.
    """
    sequence: list[str] = []
    for message in messages or []:
        if not is_user_message(message):
            continue
        content = get_message_content(message)
        if not content:
            continue
        if sequence and sequence[-1] == content:
            continue
        sequence.append(content)

    if sequence:
        return sequence

    fallback = [item for item in (fallback_history or []) if item]
    if fallback:
        return fallback

    base_topic = (base_topic or "").strip()
    return [base_topic] if base_topic else []
fix: Refine clarification workflow state handling (#641) * fix: support local models by making thought field optional in Plan model - Make thought field optional in Plan model to fix Pydantic validation errors with local models - Add Ollama configuration example to conf.yaml.example - Update documentation to include local model support - Improve planner prompt with better JSON format requirements Fixes local model integration issues where models like qwen3:14b would fail due to missing thought field in JSON output. * feat: Add intelligent clarification feature for research queries - Add multi-turn clarification process to refine vague research questions - Implement three-dimension clarification standard (Tech/App, Focus, Scope) - Add clarification state management in coordinator node - Update coordinator prompt with detailed clarification guidelines - Add UI settings to enable/disable clarification feature (disabled by default) - Update workflow to handle clarification rounds recursively - Add comprehensive test coverage for clarification functionality - Update documentation with clarification feature usage guide Key components: - src/graph/nodes.py: Core clarification logic and state management - src/prompts/coordinator.md: Detailed clarification guidelines - src/workflow.py: Recursive clarification handling - web/: UI settings integration - tests/: Comprehensive test coverage - docs/: Updated configuration guide * fix: Improve clarification conversation continuity - Add comprehensive conversation history to clarification context - Include previous exchanges summary in system messages - Add explicit guidelines for continuing rounds in coordinator prompt - Prevent LLM from starting new topics during clarification - Ensure topic continuity across clarification rounds Fixes issue where LLM would restart clarification instead of building upon previous exchanges. * fix: Add conversation history to clarification context * fix: resolve clarification feature message to planer, prompt, test issues - Optimize coordinator.md prompt template for better clarification flow - Simplify final message sent to planner after clarification - Fix API key assertion issues in test_search.py * fix: Add configurable max_clarification_rounds and comprehensive tests - Add max_clarification_rounds parameter for external configuration - Add comprehensive test cases for clarification feature in test_app.py - Fixes issues found during interactive mode testing where: - Recursive call failed due to missing initial_state parameter - Clarification exited prematurely at max rounds - Incorrect logging of max rounds reached * Move clarification tests to test_nodes.py and add max_clarification_rounds to zh.json * fix: add max_clarification_rounds parameter passing from frontend to backend - Add max_clarification_rounds parameter in store.ts sendMessage function - Add max_clarification_rounds type definition in chat.ts - Ensure frontend settings page clarification rounds are correctly passed to backend * fix: refine clarification workflow state handling and coverage - Add clarification history reconstruction - Fix clarified topic accumulation - Add clarified_research_topic state field - Preserve clarification state in recursive calls - Add comprehensive test coverage * refactor: optimize coordinator logic and type annotations - Simplify handoff topic logic in coordinator_node - Update type annotations from Tuple to tuple - Improve code readability and maintainability --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com> 2025-10-22 22:49:07 +08:00			`# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates`
			`# SPDX-License-Identifier: MIT`

			`from typing import Any`

			`ASSISTANT_SPEAKER_NAMES = {`
			`"coordinator",`
			`"planner",`
			`"researcher",`
			`"coder",`
			`"reporter",`
			`"background_investigator",`
			`}`


			`def get_message_content(message: Any) -> str:`
			`"""Extract message content from dict or LangChain message."""`
			`if isinstance(message, dict):`
			`return message.get("content", "")`
			`return getattr(message, "content", "")`


			`def is_user_message(message: Any) -> bool:`
			`"""Return True if the message originated from the end user."""`
			`if isinstance(message, dict):`
			`role = (message.get("role") or "").lower()`
			`if role in {"user", "human"}:`
			`return True`
			`if role in {"assistant", "system"}:`
			`return False`
			`name = (message.get("name") or "").lower()`
			`if name and name in ASSISTANT_SPEAKER_NAMES:`
			`return False`
			`return role == "" and name not in ASSISTANT_SPEAKER_NAMES`

			`message_type = (getattr(message, "type", "") or "").lower()`
			`name = (getattr(message, "name", "") or "").lower()`
			`if message_type == "human":`
			`return not (name and name in ASSISTANT_SPEAKER_NAMES)`

			`role_attr = getattr(message, "role", None)`
			`if isinstance(role_attr, str) and role_attr.lower() in {"user", "human"}:`
			`return True`

			`additional_role = getattr(message, "additional_kwargs", {}).get("role")`
			`if isinstance(additional_role, str) and additional_role.lower() in {`
			`"user",`
			`"human",`
			`}:`
			`return True`

			`return False`


			`def get_latest_user_message(messages: list[Any]) -> tuple[Any, str]:`
			`"""Return the latest user-authored message and its content."""`
			`for message in reversed(messages or []):`
			`if is_user_message(message):`
			`content = get_message_content(message)`
			`if content:`
			`return message, content`
			`return None, ""`


			`def build_clarified_topic_from_history(`
			`clarification_history: list[str],`
			`) -> tuple[str, list[str]]:`
			`"""Construct clarified topic string from an ordered clarification history."""`
			`sequence = [item for item in clarification_history if item]`
			`if not sequence:`
			`return "", []`
			`if len(sequence) == 1:`
			`return sequence[0], sequence`
			`head, *tail = sequence`
			`clarified_string = f"{head} - {', '.join(tail)}"`
			`return clarified_string, sequence`


			`def reconstruct_clarification_history(`
			`messages: list[Any],`
			`fallback_history: list[str] \| None = None,`
			`base_topic: str = "",`
			`) -> list[str]:`
			`"""Rebuild clarification history from user-authored messages, with fallback.`

			`Args:`
			`messages: Conversation messages in chronological order.`
			`fallback_history: Optional existing history to use if no user messages found.`
			`base_topic: Optional topic to use when no user messages are available.`

			`Returns:`
			`A cleaned clarification history containing unique consecutive user contents.`
			`"""`
			`sequence: list[str] = []`
			`for message in messages or []:`
			`if not is_user_message(message):`
			`continue`
			`content = get_message_content(message)`
			`if not content:`
			`continue`
			`if sequence and sequence[-1] == content:`
			`continue`
			`sequence.append(content)`

			`if sequence:`
			`return sequence`

			`fallback = [item for item in (fallback_history or []) if item]`
			`if fallback:`
			`return fallback`

			`base_topic = (base_topic or "").strip()`
			`return [base_topic] if base_topic else []`