# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates # SPDX-License-Identifier: MIT import logging import os import requests logger = logging.getLogger(__name__) class JinaClient: def crawl(self, url: str, return_format: str = "html") -> str: headers = { "Content-Type": "application/json", "X-Return-Format": return_format, } if os.getenv("JINA_API_KEY"): headers["Authorization"] = f"Bearer {os.getenv('JINA_API_KEY')}" else: logger.warning( "Jina API key is not set. Provide your own key to access a higher rate limit. See https://jina.ai/reader for more information." ) data = {"url": url} response = requests.post("https://r.jina.ai/", headers=headers, json=data) if response.status_code != 200: raise ValueError(f"Jina API returned status {response.status_code}: {response.text}") if not response.text or not response.text.strip(): raise ValueError("Jina API returned empty response") return response.text