"""Tests for readability extraction fallback behavior.""" import subprocess import pytest from deerflow.utils.readability import ReadabilityExtractor def test_extract_article_falls_back_when_readability_js_fails(monkeypatch): """When Node-based readability fails, extraction should fall back to Python mode.""" calls: list[bool] = [] def _fake_simple_json_from_html_string(html: str, use_readability: bool = False): calls.append(use_readability) if use_readability: raise subprocess.CalledProcessError( returncode=1, cmd=["node", "ExtractArticle.js"], stderr="boom", ) return {"title": "Fallback Title", "content": "
Fallback Content
"} monkeypatch.setattr( "deerflow.utils.readability.simple_json_from_html_string", _fake_simple_json_from_html_string, ) article = ReadabilityExtractor().extract_article("test") assert calls == [True, False] assert article.title == "Fallback Title" assert article.html_content == "Fallback Content
" def test_extract_article_re_raises_unexpected_exception(monkeypatch): """Unexpected errors should be surfaced instead of silently falling back.""" calls: list[bool] = [] def _fake_simple_json_from_html_string(html: str, use_readability: bool = False): calls.append(use_readability) if use_readability: raise RuntimeError("unexpected parser failure") return {"title": "Should Not Reach Fallback", "content": "Fallback
"} monkeypatch.setattr( "deerflow.utils.readability.simple_json_from_html_string", _fake_simple_json_from_html_string, ) with pytest.raises(RuntimeError, match="unexpected parser failure"): ReadabilityExtractor().extract_article("test") assert calls == [True]