fix: unescape markdown-escaped characters in math formulas to fix #608 (#637)

When editing reports, tiptap-markdown escapes special characters (*, _, [, ])
which corrupts LaTeX formulas. This fix:

1. Adds unescapeLatexInMath() function to reverse markdown escaping within
   math delimiters ($...$ and 94410...94410)
2. Applies the unescape function in the editor's onChange callback to clean
   the markdown before storing it
3. Adds comprehensive tests covering edge cases and round-trip scenarios

The fix ensures formulas like $(f * g)[n]$ remain unescaped when editing,
preventing display errors after save/reload.
This commit is contained in:
Willem Jiang
2025-10-21 10:06:31 +08:00
committed by GitHub
parent cb5c477371
commit 1a16677d1a
3 changed files with 127 additions and 3 deletions

View File

@@ -2,6 +2,42 @@ export function autoFixMarkdown(markdown: string): string {
return autoCloseTrailingLink(markdown);
}
/**
* Unescape markdown-escaped characters within math delimiters
* tiptap-markdown escapes special characters like *, _, [, ] which corrupts math formulas
* This function restores the original LaTeX by unescaping within $...$ and $$...$$
*/
export function unescapeLatexInMath(markdown: string): string {
let result = markdown;
// Process inline math: $...$
result = result.replace(/\$([^\$]+?)\$/g, (match, mathContent) => {
const unescaped = unescapeMarkdownSpecialChars(mathContent);
return `$${unescaped}$`;
});
// Process display math: $$...$$
result = result.replace(/\$\$([\s\S]+?)\$\$/g, (match, mathContent) => {
const unescaped = unescapeMarkdownSpecialChars(mathContent);
return `$$${unescaped}$$`;
});
return result;
}
/**
* Reverse markdown escaping for special characters
* Order matters: process \\ last to avoid re-escaping
*/
function unescapeMarkdownSpecialChars(text: string): string {
return text
.replace(/\\\*/g, '*') // \* → *
.replace(/\\_/g, '_') // \_ → _
.replace(/\\\[/g, '[') // \[ → [
.replace(/\\\]/g, ']') // \] → ]
.replace(/\\\\/g, '\\'); // \\ → \
}
/**
* Normalize math delimiters for editor consumption
* Converts display delimiters (\[...\], \\[...\\]) to $$ format