test: add unit tests of crawler (#292)

* test: add unit tests of crawler

* test: polish the code of crawler unit tests
This commit is contained in:
Willem Jiang
2025-06-07 21:51:05 +08:00
committed by GitHub
parent 0e22c373af
commit c6ed423021
4 changed files with 149 additions and 14 deletions

View File

@@ -3,8 +3,7 @@
from .article import Article
from .crawler import Crawler
from .jina_client import JinaClient
from .readability_extractor import ReadabilityExtractor
__all__ = [
"Article",
"Crawler",
]
__all__ = ["Article", "Crawler", "JinaClient", "ReadabilityExtractor"]

View File

@@ -26,13 +26,3 @@ class Crawler:
article = extractor.extract_article(html)
article.url = url
return article
if __name__ == "__main__":
if len(sys.argv) == 2:
url = sys.argv[1]
else:
url = "https://fintel.io/zh-hant/s/br/nvdc34"
crawler = Crawler()
article = crawler.crawl(url)
print(article.to_markdown())