Files
deer-flow/src/crawler/readability_extractor.py

13 lines
364 B
Python
Raw Normal View History

from readabilipy import simple_json_from_html_string
from .article import Article
class ReadabilityExtractor:
def extract_article(self, html: str) -> Article:
article = simple_json_from_html_string(html, use_readability=True)
return Article(
title=article.get("title"),
html_content=article.get("content"),
)