Files
obsidian-unicode-formatter/src/thread-splitter.ts
T

175 lines
5.0 KiB
TypeScript

export interface ThreadOptions {
maxChars: number;
addNumbering: boolean;
}
const DEFAULT_OPTIONS: ThreadOptions = {
maxChars: 280,
addNumbering: true,
};
const NUMBERING_RESERVE = 8;
function countChars(text: string): number {
let count = 0;
let lastIndex = 0;
const urlRe = /https?:\/\/\S+/g;
let match: RegExpExecArray | null;
while ((match = urlRe.exec(text)) !== null) {
count += match.index - lastIndex;
count += 23;
lastIndex = urlRe.lastIndex;
}
count += text.length - lastIndex;
return count;
}
function textFits(text: string, max: number): boolean {
return countChars(text) <= max;
}
function splitByWords(text: string, max: number): string[] {
const words = text.split(/\s+/);
const chunks: string[] = [];
let current = "";
for (const word of words) {
if (!current) {
current = word;
continue;
}
const candidate = current + " " + word;
if (countChars(candidate) <= max - 3) {
current = candidate;
} else {
chunks.push(current + "...");
current = word;
}
}
if (current) chunks.push(current);
return chunks;
}
function splitBySentences(text: string, max: number): string[] {
const sentences = text.match(/[^.!?\s][^.!?]*(?:[.!?]+|$)/g) || [text];
const chunks: string[] = [];
let current = "";
for (const sentence of sentences) {
if (!current) {
if (textFits(sentence, max)) {
current = sentence;
} else {
chunks.push(...splitByWords(sentence, max));
}
} else {
const candidate = current + sentence;
if (textFits(candidate, max)) {
current = candidate;
} else {
chunks.push(current);
if (textFits(sentence, max)) {
current = sentence;
} else {
chunks.push(...splitByWords(sentence, max));
current = "";
}
}
}
}
if (current) chunks.push(current);
return chunks;
}
function splitBlock(text: string, max: number): string[] {
const lines = text.split("\n").filter(l => l.trim());
const chunks: string[] = [];
let current = "";
for (const line of lines) {
const trimmed = line.trim();
if (!current) {
if (textFits(trimmed, max)) {
current = trimmed;
} else {
chunks.push(...splitBySentences(trimmed, max));
}
} else {
const candidate = current + "\n" + trimmed;
if (textFits(candidate, max)) {
current = candidate;
} else {
chunks.push(current);
if (textFits(trimmed, max)) {
current = trimmed;
} else {
chunks.push(...splitBySentences(trimmed, max));
current = "";
}
}
}
}
if (current) chunks.push(current);
return chunks.length > 0 ? chunks : [text];
}
export function splitIntoThreads(text: string, options?: Partial<ThreadOptions>): string[] {
const opts: ThreadOptions = Object.assign({}, DEFAULT_OPTIONS, options);
const effectiveMax = opts.addNumbering ? opts.maxChars - NUMBERING_RESERVE : opts.maxChars;
const paragraphs = text.split(/\n{2,}/).filter(p => p.trim());
if (paragraphs.length === 0) return [];
const rawTweets: string[] = [];
let current = "";
for (const para of paragraphs) {
const trimmed = para.trim();
if (!current) {
if (textFits(trimmed, effectiveMax)) {
current = trimmed;
} else {
const chunks = splitBlock(trimmed, effectiveMax);
if (chunks.length > 1) {
rawTweets.push(...chunks.slice(0, -1));
current = chunks[chunks.length - 1]!;
} else {
current = chunks[0]!;
}
}
} else {
const candidate = current + "\n\n" + trimmed;
if (textFits(candidate, effectiveMax)) {
current = candidate;
} else {
rawTweets.push(current);
if (textFits(trimmed, effectiveMax)) {
current = trimmed;
} else {
const chunks = splitBlock(trimmed, effectiveMax);
if (chunks.length > 1) {
rawTweets.push(...chunks.slice(0, -1));
current = chunks[chunks.length - 1]!;
} else {
current = chunks[0]!;
}
}
}
}
}
if (current) rawTweets.push(current);
if (opts.addNumbering && rawTweets.length > 1) {
const total = rawTweets.length;
return rawTweets.map((tweet, i) => `${i + 1}/${total} ${tweet}`);
}
return rawTweets;
}