Revert Unicode formatted text back to Markdown syntax in cleanText

- Add UNICODE_TO_STYLE reverse map to classify Unicode chars by font style
- Rewrite cleanText to group same-style runs and wrap in Markdown (**bold**, _italic_, **_bold-italic_**)
This commit is contained in:
2026-04-29 22:51:48 +02:00
parent e5c3417a83
commit 783278058b
2 changed files with 32 additions and 2 deletions
+23 -2
View File
@@ -1,4 +1,4 @@
import { BOLD_MAP, ITALIC_MAP, BOLD_ITALIC_MAP, UNICODE_TO_ASCII_MAP } from "./unicode-maps";
import { BOLD_MAP, ITALIC_MAP, BOLD_ITALIC_MAP, UNICODE_TO_ASCII_MAP, UNICODE_TO_STYLE } from "./unicode-maps";
export type FormatStyle = "bold" | "italic" | "bold-italic";
@@ -20,7 +20,28 @@ export function cleanText(text: string): string {
line = line.replace(/^\(\d+\) /, "").replace(/^\d+\/ /, "");
return [...line].map(ch => UNICODE_TO_ASCII_MAP.get(ch) ?? ch).join("");
const output: string[] = [];
let currentStyle: string | null = null;
let buf: string[] = [];
const flush = () => {
if (buf.length === 0) return;
const s = buf.join("");
if (currentStyle === "bold") output.push(`**${s}**`);
else if (currentStyle === "italic") output.push(`_${s}_`);
else if (currentStyle === "bold-italic") output.push(`**_${s}_**`);
else output.push(s);
buf = [];
};
for (const ch of line) {
const style = UNICODE_TO_STYLE.get(ch) ?? null;
const ascii = UNICODE_TO_ASCII_MAP.get(ch) ?? ch;
if (style !== currentStyle) { flush(); currentStyle = style; }
buf.push(ascii);
}
flush();
return output.join("");
}).join("\n");
}
+9
View File
@@ -31,3 +31,12 @@ export const UNICODE_TO_ASCII_MAP: Map<string, string> = new Map(
Object.entries(m).map(([ascii, unicode]) => [unicode, ascii] as [string, string])
)
);
export type FontStyle = "bold" | "italic" | "bold-italic";
// Reverse map: Unicode symbol → font style
export const UNICODE_TO_STYLE: Map<string, FontStyle> = new Map([
...Object.values(BOLD_MAP).map(ch => [ch, "bold"] as [string, FontStyle]),
...Object.values(ITALIC_MAP).map(ch => [ch, "italic"] as [string, FontStyle]),
...Object.values(BOLD_ITALIC_MAP).map(ch => [ch, "bold-italic"] as [string, FontStyle]),
]);