mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-24 14:44:46 +08:00
feat: lite deep researcher implementation
This commit is contained in:
12
src/crawler/readability_extractor.py
Normal file
12
src/crawler/readability_extractor.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from readabilipy import simple_json_from_html_string
|
||||
|
||||
from .article import Article
|
||||
|
||||
|
||||
class ReadabilityExtractor:
|
||||
def extract_article(self, html: str) -> Article:
|
||||
article = simple_json_from_html_string(html, use_readability=True)
|
||||
return Article(
|
||||
title=article.get("title"),
|
||||
html_content=article.get("content"),
|
||||
)
|
||||
Reference in New Issue
Block a user