feat: add firecrawl community package with web_search and web_fetch tools

Add web_search_tool and web_fetch_tool implementations using the official firecrawl-py SDK as an alternative to Tavily/Jina AI integrations. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-04-14 10:44:46 +08:00 · 2026-01-26 19:58:08 +08:00
parent 0cc7cc08e9
commit ce7f7258ba
4 changed files with 571 additions and 0 deletions
--- a/backend/src/community/firecrawl/tools.py
+++ b/backend/src/community/firecrawl/tools.py
@@ -0,0 +1,67 @@
+import json
+
+from firecrawl import FirecrawlApp
+from langchain.tools import tool
+
+from src.config import get_app_config
+
+
+def _get_firecrawl_client() -> FirecrawlApp:
+    config = get_app_config().get_tool_config("web_search")
+    api_key = None
+    if config is not None:
+        api_key = config.model_extra.get("api_key")
+    return FirecrawlApp(api_key=api_key)  # type: ignore[arg-type]
+
+
+@tool("web_search", parse_docstring=True)
+def web_search_tool(query: str) -> str:
+    """Search the web.
+
+    Args:
+        query: The query to search for.
+    """
+    config = get_app_config().get_tool_config("web_search")
+    max_results = 5
+    if config is not None:
+        max_results = config.model_extra.get("max_results", max_results)
+
+    client = _get_firecrawl_client()
+    result = client.search(query, limit=max_results)
+
+    # result.web contains list of SearchResultWeb objects
+    web_results = result.web or []
+    normalized_results = [
+        {
+            "title": getattr(item, "title", "") or "",
+            "url": getattr(item, "url", "") or "",
+            "snippet": getattr(item, "description", "") or "",
+        }
+        for item in web_results
+    ]
+    json_results = json.dumps(normalized_results, indent=2, ensure_ascii=False)
+    return json_results
+
+
+@tool("web_fetch", parse_docstring=True)
+def web_fetch_tool(url: str) -> str:
+    """Fetch the contents of a web page at a given URL.
+    Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
+    This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
+    Do NOT add www. to URLs that do NOT have them.
+    URLs must include the schema: https://example.com is a valid URL while example.com is an invalid URL.
+
+    Args:
+        url: The URL to fetch the contents of.
+    """
+    client = _get_firecrawl_client()
+    result = client.scrape(url, formats=["markdown"])
+
+    markdown_content = result.markdown or ""
+    metadata = result.metadata
+    title = metadata.title if metadata and metadata.title else "Untitled"
+
+    if not markdown_content:
+        return "Error: No content found"
+
+    return f"# {title}\n\n{markdown_content}"