mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-17 11:44:44 +08:00
test: add unit tests of crawler (#292)
* test: add unit tests of crawler * test: polish the code of crawler unit tests
This commit is contained in:
@@ -3,8 +3,7 @@
|
||||
|
||||
from .article import Article
|
||||
from .crawler import Crawler
|
||||
from .jina_client import JinaClient
|
||||
from .readability_extractor import ReadabilityExtractor
|
||||
|
||||
__all__ = [
|
||||
"Article",
|
||||
"Crawler",
|
||||
]
|
||||
__all__ = ["Article", "Crawler", "JinaClient", "ReadabilityExtractor"]
|
||||
|
||||
@@ -26,13 +26,3 @@ class Crawler:
|
||||
article = extractor.extract_article(html)
|
||||
article.url = url
|
||||
return article
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) == 2:
|
||||
url = sys.argv[1]
|
||||
else:
|
||||
url = "https://fintel.io/zh-hant/s/br/nvdc34"
|
||||
crawler = Crawler()
|
||||
article = crawler.crawl(url)
|
||||
print(article.to_markdown())
|
||||
|
||||
Reference in New Issue
Block a user