From 5a79f896c4aeb5ff981619188dc59038dee760aa Mon Sep 17 00:00:00 2001 From: Willem Jiang Date: Fri, 26 Dec 2025 10:10:12 +0800 Subject: [PATCH] fix(metrics): update the polynomial regular expression used on uncontrolled data (#784) --- src/eval/metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/eval/metrics.py b/src/eval/metrics.py index 3b61b70..bbcc171 100644 --- a/src/eval/metrics.py +++ b/src/eval/metrics.py @@ -125,7 +125,7 @@ def count_words(text: str) -> int: def count_citations(text: str) -> int: """Count markdown-style citations [text](url).""" - pattern = r"\[.+?\]\(https?://[^\s\)]+\)" + pattern = r"\[[^\]]*\]\(https?://[^\s\)]+\)" return len(re.findall(pattern, text)) @@ -148,7 +148,7 @@ def extract_domains(text: str) -> List[str]: def count_images(text: str) -> int: """Count markdown images ![alt](url).""" - pattern = r"!\[.*?\]\(.+?\)" + pattern = r"!\[[^\]]*\]\([^)]+\)" return len(re.findall(pattern, text))