backend/packages/harness/deerflow/community/image_search/tools.py

"""
Image Search Tool - Search images using DuckDuckGo for reference in image generation.
"""

import json
import logging

from langchain.tools import tool

from deerflow.config import get_app_config

logger = logging.getLogger(__name__)


def _search_images(
    query: str,
    max_results: int = 5,
    region: str = "wt-wt",
    safesearch: str = "moderate",
    size: str | None = None,
    color: str | None = None,
    type_image: str | None = None,
    layout: str | None = None,
    license_image: str | None = None,
) -> list[dict]:
    """
    Execute image search using DuckDuckGo.

    Args:
        query: Search keywords
        max_results: Maximum number of results
        region: Search region
        safesearch: Safe search level
        size: Image size (Small/Medium/Large/Wallpaper)
        color: Color filter
        type_image: Image type (photo/clipart/gif/transparent/line)
        layout: Layout (Square/Tall/Wide)
        license_image: License filter

    Returns:
        List of search results
    """
    try:
        from ddgs import DDGS
    except ImportError:
        logger.error("ddgs library not installed. Run: pip install ddgs")
        return []

    ddgs = DDGS(timeout=30)

    try:
        kwargs = {
            "region": region,
            "safesearch": safesearch,
            "max_results": max_results,
        }

        if size:
            kwargs["size"] = size
        if color:
            kwargs["color"] = color
        if type_image:
            kwargs["type_image"] = type_image
        if layout:
            kwargs["layout"] = layout
        if license_image:
            kwargs["license_image"] = license_image

        results = ddgs.images(query, **kwargs)
        return list(results) if results else []

    except Exception as e:
        logger.error(f"Failed to search images: {e}")
        return []


@tool("image_search", parse_docstring=True)
def image_search_tool(
    query: str,
    max_results: int = 5,
    size: str | None = None,
    type_image: str | None = None,
    layout: str | None = None,
) -> str:
    """Search for images online. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy.

    **When to use:**
    - Before generating character/portrait images: search for similar poses, expressions, styles
    - Before generating specific objects/products: search for accurate visual references
    - Before generating scenes/locations: search for architectural or environmental references
    - Before generating fashion/clothing: search for style and detail references

    The returned image URLs can be used as reference images in image generation to significantly improve quality.

    Args:
        query: Search keywords describing the images you want to find. Be specific for better results (e.g., "Japanese woman street photography 1990s" instead of just "woman").
        max_results: Maximum number of images to return. Default is 5.
        size: Image size filter. Options: "Small", "Medium", "Large", "Wallpaper". Use "Large" for reference images.
        type_image: Image type filter. Options: "photo", "clipart", "gif", "transparent", "line". Use "photo" for realistic references.
        layout: Layout filter. Options: "Square", "Tall", "Wide". Choose based on your generation needs.
    """
    config = get_app_config().get_tool_config("image_search")

    # Override max_results from config if set
    if config is not None and "max_results" in config.model_extra:
        max_results = config.model_extra.get("max_results", max_results)

    results = _search_images(
        query=query,
        max_results=max_results,
        size=size,
        type_image=type_image,
        layout=layout,
    )

    if not results:
        return json.dumps({"error": "No images found", "query": query}, ensure_ascii=False)

    normalized_results = [
        {
            "title": r.get("title", ""),
            "image_url": r.get("thumbnail", ""),
            "thumbnail_url": r.get("thumbnail", ""),
        }
        for r in results
    ]

    output = {
        "query": query,
        "total_results": len(normalized_results),
        "results": normalized_results,
        "usage_hint": "Use the 'image_url' values as reference images in image generation. Download them first if needed.",
    }

    return json.dumps(output, indent=2, ensure_ascii=False)
feat: add image search builtin tool 2026-01-29 08:23:50 +08:00			`"""`
			`Image Search Tool - Search images using DuckDuckGo for reference in image generation.`
			`"""`

			`import json`
			`import logging`

			`from langchain.tools import tool`

refactor: split backend into harness (deerflow.) and app (app.) (#1131) * refactor: extract shared utils to break harness→app cross-layer imports Move _validate_skill_frontmatter to src/skills/validation.py and CONVERTIBLE_EXTENSIONS + convert_file_to_markdown to src/utils/file_conversion.py. This eliminates the two reverse dependencies from client.py (harness layer) into gateway/routers/ (app layer), preparing for the harness/app package split. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * refactor: split backend/src into harness (deerflow.) and app (app.) Physically split the monolithic backend/src/ package into two layers: - Harness (`packages/harness/deerflow/`): publishable agent framework package with import prefix `deerflow.`. Contains agents, sandbox, tools, models, MCP, skills, config, and all core infrastructure. - App* (`app/`): unpublished application code with import prefix `app.`. Contains gateway (FastAPI REST API) and channels (IM integrations). Key changes: - Move 13 harness modules to packages/harness/deerflow/ via git mv - Move gateway + channels to app/ via git mv - Rename all imports: src. → deerflow.* (harness) / app.* (app layer) - Set up uv workspace with deerflow-harness as workspace member - Update langgraph.json, config.example.yaml, all scripts, Docker files - Add build-system (hatchling) to harness pyproject.toml - Add PYTHONPATH=. to gateway startup commands for app.* resolution - Update ruff.toml with known-first-party for import sorting - Update all documentation to reflect new directory structure Boundary rule enforced: harness code never imports from app. All 429 tests pass. Lint clean. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: add harness→app boundary check test and update docs Add test_harness_boundary.py that scans all Python files in packages/harness/deerflow/ and fails if any `from app.` or `import app.` statement is found. This enforces the architectural rule that the harness layer never depends on the app layer. Update CLAUDE.md to document the harness/app split architecture, import conventions, and the boundary enforcement test. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * feat: add config versioning with auto-upgrade on startup When config.example.yaml schema changes, developers' local config.yaml files can silently become outdated. This adds a config_version field and auto-upgrade mechanism so breaking changes (like src.* → deerflow.* renames) are applied automatically before services start. - Add config_version: 1 to config.example.yaml - Add startup version check warning in AppConfig.from_file() - Add scripts/config-upgrade.sh with migration registry for value replacements - Add `make config-upgrade` target - Auto-run config-upgrade in serve.sh and start-daemon.sh before starting services - Add config error hints in service failure messages Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix comments * fix: update src.* import in test_sandbox_tools_security to deerflow.* Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: handle empty config and search parent dirs for config.example.yaml Address Copilot review comments on PR #1131: - Guard against yaml.safe_load() returning None for empty config files - Search parent directories for config.example.yaml instead of only looking next to config.yaml, fixing detection in common setups Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: correct skills root path depth and config_version type coercion - loader.py: fix get_skills_root_path() to use 5 parent levels (was 3) after harness split, file lives at packages/harness/deerflow/skills/ so parent×3 resolved to backend/packages/harness/ instead of backend/ - app_config.py: coerce config_version to int() before comparison in _check_config_version() to prevent TypeError when YAML stores value as string (e.g. config_version: "1") - tests: add regression tests for both fixes Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix: update test imports from src.* to deerflow./app. after harness refactor Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> 2026-03-14 22:55:52 +08:00			`from deerflow.config import get_app_config`
feat: add image search builtin tool 2026-01-29 08:23:50 +08:00
			`logger = logging.getLogger(__name__)`


			`def _search_images(`
			`query: str,`
			`max_results: int = 5,`
			`region: str = "wt-wt",`
			`safesearch: str = "moderate",`
			`size: str \| None = None,`
			`color: str \| None = None,`
			`type_image: str \| None = None,`
			`layout: str \| None = None,`
			`license_image: str \| None = None,`
			`) -> list[dict]:`
			`"""`
			`Execute image search using DuckDuckGo.`

			`Args:`
			`query: Search keywords`
			`max_results: Maximum number of results`
			`region: Search region`
			`safesearch: Safe search level`
			`size: Image size (Small/Medium/Large/Wallpaper)`
			`color: Color filter`
			`type_image: Image type (photo/clipart/gif/transparent/line)`
			`layout: Layout (Square/Tall/Wide)`
			`license_image: License filter`

			`Returns:`
			`List of search results`
			`"""`
			`try:`
			`from ddgs import DDGS`
			`except ImportError:`
			`logger.error("ddgs library not installed. Run: pip install ddgs")`
			`return []`

feat: optimize vision tools and image handling - Add model-aware vision tool loading based on supports_vision flag - Move view_image_tool from config to builtin tools for dynamic inclusion - Add timeout to image search to prevent hanging requests - Optimize image search results format using thumbnails - Add image validation for reference images in generation - Improve error handling with detailed messages Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-01-29 14:57:26 +08:00			`ddgs = DDGS(timeout=30)`
feat: add image search builtin tool 2026-01-29 08:23:50 +08:00
			`try:`
			`kwargs = {`
			`"region": region,`
			`"safesearch": safesearch,`
			`"max_results": max_results,`
			`}`

			`if size:`
			`kwargs["size"] = size`
			`if color:`
			`kwargs["color"] = color`
			`if type_image:`
			`kwargs["type_image"] = type_image`
			`if layout:`
			`kwargs["layout"] = layout`
			`if license_image:`
			`kwargs["license_image"] = license_image`

			`results = ddgs.images(query, **kwargs)`
			`return list(results) if results else []`

			`except Exception as e:`
			`logger.error(f"Failed to search images: {e}")`
			`return []`


			`@tool("image_search", parse_docstring=True)`
			`def image_search_tool(`
			`query: str,`
			`max_results: int = 5,`
			`size: str \| None = None,`
			`type_image: str \| None = None,`
			`layout: str \| None = None,`
			`) -> str:`
			`"""Search for images online. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy.`

			`When to use:`
			`- Before generating character/portrait images: search for similar poses, expressions, styles`
			`- Before generating specific objects/products: search for accurate visual references`
			`- Before generating scenes/locations: search for architectural or environmental references`
			`- Before generating fashion/clothing: search for style and detail references`

			`The returned image URLs can be used as reference images in image generation to significantly improve quality.`

			`Args:`
			`query: Search keywords describing the images you want to find. Be specific for better results (e.g., "Japanese woman street photography 1990s" instead of just "woman").`
			`max_results: Maximum number of images to return. Default is 5.`
			`size: Image size filter. Options: "Small", "Medium", "Large", "Wallpaper". Use "Large" for reference images.`
			`type_image: Image type filter. Options: "photo", "clipart", "gif", "transparent", "line". Use "photo" for realistic references.`
			`layout: Layout filter. Options: "Square", "Tall", "Wide". Choose based on your generation needs.`
			`"""`
			`config = get_app_config().get_tool_config("image_search")`

			`# Override max_results from config if set`
			`if config is not None and "max_results" in config.model_extra:`
			`max_results = config.model_extra.get("max_results", max_results)`

			`results = _search_images(`
			`query=query,`
			`max_results=max_results,`
			`size=size,`
			`type_image=type_image,`
			`layout=layout,`
			`)`

			`if not results:`
			`return json.dumps({"error": "No images found", "query": query}, ensure_ascii=False)`

			`normalized_results = [`
			`{`
			`"title": r.get("title", ""),`
feat: optimize vision tools and image handling - Add model-aware vision tool loading based on supports_vision flag - Move view_image_tool from config to builtin tools for dynamic inclusion - Add timeout to image search to prevent hanging requests - Optimize image search results format using thumbnails - Add image validation for reference images in generation - Improve error handling with detailed messages Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> 2026-01-29 14:57:26 +08:00			`"image_url": r.get("thumbnail", ""),`
feat: add image search builtin tool 2026-01-29 08:23:50 +08:00			`"thumbnail_url": r.get("thumbnail", ""),`
			`}`
			`for r in results`
			`]`

			`output = {`
			`"query": query,`
			`"total_results": len(normalized_results),`
			`"results": normalized_results,`
			`"usage_hint": "Use the 'image_url' values as reference images in image generation. Download them first if needed.",`
			`}`

			`return json.dumps(output, indent=2, ensure_ascii=False)`