mirror of
https://gitee.com/wanwujie/deer-flow
synced 2026-04-30 01:04:48 +08:00
fix(metrics): update the polynomial regular expression used on uncontrolled data (#784)
This commit is contained in:
@@ -125,7 +125,7 @@ def count_words(text: str) -> int:
|
|||||||
|
|
||||||
def count_citations(text: str) -> int:
|
def count_citations(text: str) -> int:
|
||||||
"""Count markdown-style citations [text](url)."""
|
"""Count markdown-style citations [text](url)."""
|
||||||
pattern = r"\[.+?\]\(https?://[^\s\)]+\)"
|
pattern = r"\[[^\]]*\]\(https?://[^\s\)]+\)"
|
||||||
return len(re.findall(pattern, text))
|
return len(re.findall(pattern, text))
|
||||||
|
|
||||||
|
|
||||||
@@ -148,7 +148,7 @@ def extract_domains(text: str) -> List[str]:
|
|||||||
|
|
||||||
def count_images(text: str) -> int:
|
def count_images(text: str) -> int:
|
||||||
"""Count markdown images ."""
|
"""Count markdown images ."""
|
||||||
pattern = r"!\[.*?\]\(.+?\)"
|
pattern = r"!\[[^\]]*\]\([^)]+\)"
|
||||||
return len(re.findall(pattern, text))
|
return len(re.findall(pattern, text))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user