it-tools/src/tools/markdown-toc-generator/markdown-toc-generator.service.ts
2024-12-15 20:59:48 +01:00

183 lines
4.9 KiB
TypeScript

import sanitizeHtml from 'sanitize-html';
function stripNonLatinCharacters(text: string) {
return text.replace(/[^A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u02BB\u02EE\uA78C\d\s_-]/g, '');
};
function transformInlineCode(text: string, transform: (s: string) => string) {
return text.replace(/`(.*?)`/g, (_, p) => {
return `\`${transform(p)}\``;
});
};
function spacesToDash(text: string) {
return text.replace(/\s/g, '-');
};
function stripHtmlTags(text: string) {
return sanitizeHtml(text, {
allowedTags: [],
allowedAttributes: {},
});
}
function stripMarkdownLinks(text: string, replacement: string = '$1') {
return text.replace(/\[([^\]]*)\]\([^\)]*\)/g, replacement); // NOSONAR
};
function concatDashes(text: string) {
return text.replace(/--+/g, '-');
};
function removeUnderscoreBoldAndItalics(text: string) {
const underscoreBoldAndItalicsRegexes = ['__', '_'].map((it) => {
return new RegExp(`\\b${it}([^_\\s]|[^_\\s].*?[^_\\s])${it}\\b`, 'g');
});
let result = text;
underscoreBoldAndItalicsRegexes.forEach((regex) => {
result = result.replace(regex, '$1');
});
return result;
};
function genericAnchorGenerator(text: string, concatSpaces: boolean) {
let result = text;
result = result.toLowerCase();
result = transformInlineCode(result, (s: string) => {
return stripNonLatinCharacters(s);
});
result = removeUnderscoreBoldAndItalics(result);
result = stripHtmlTags(result);
result = stripMarkdownLinks(result);
result = result.trim();
result = stripNonLatinCharacters(result);
result = spacesToDash(result);
if (concatSpaces) {
result = concatDashes(result);
}
return result;
};
function escapeRegExp(string: string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
}
interface Title {
level: number
id: string
name: string
md: string
}
function getTitles(markdown: string, idGenerator: (titleMarkdownContent: string) => string) {
const titles: Title[] = [];
markdown = markdown.replace(/^```[\s\S]*?\n```/mg, () => {
return '';
});
markdown = markdown.replace(/^~~~[\s\S]*?\n~~~/mg, () => {
return '';
});
[...markdown.matchAll(/^(#+)(.*$)/mg)].forEach( // NOSONAR
([match, levelString, titleContent]) => {
const level = levelString.length;
titles.push({
md: match,
level,
id: idGenerator(titleContent),
name: titleContent.trim(),
});
});
return titles;
};
export function getTocMarkdown({
markdown,
generateAnchors = true,
indentChars = '-*+',
indentSpaces = 2,
maxLevel = -1,
anchorPrefix = '',
concatSpaces = true,
commentStyle = 'html',
}: {
markdown: string
generateAnchors?: boolean
indentChars?: string
indentSpaces?: number
maxLevel?: number
anchorPrefix?: string
concatSpaces?: boolean
commentStyle?: 'html' | 'liquid'
}) {
const allIds: { [id: string]: number } = {};
const getFinalId = (id: string) => {
if (typeof allIds[id] === 'undefined') {
allIds[id] = 0;
return id;
}
else {
allIds[id] += 1;
return `${id}-${allIds[id]}`;
}
};
const titles = getTitles(markdown, titleContent => getFinalId(genericAnchorGenerator(titleContent, concatSpaces)));
const createLink = (linkText: string, url: string) => {
return `[${linkText.replace(/\[/g, '\\[').replace(/\]/g, '\\]')}](${url.replace(/\(/g, '%28').replace(/\(/g, '%29')})`;
};
let markdownTOC = '';
let resultMarkdown = markdown;
const commentOpen = commentStyle === 'html' ? '<!--' : '{%- #';
const commentClose = commentStyle === 'html' ? '-->' : '-%}';
resultMarkdown = resultMarkdown.replace(
new RegExp(`\n${escapeRegExp(commentOpen)} TOC START.*?TOC END ${escapeRegExp(commentClose)}\n`, 'smg'),
'\n[TOC]\n',
);
resultMarkdown = resultMarkdown.replace(
new RegExp(`^${escapeRegExp(commentOpen)} TOC ANCHOR.*?\n`, 'mg'),
'',
);
titles.forEach((title) => {
if (title.level === 1) {
return;
}
if (maxLevel > 0 && title.level > maxLevel) {
return;
}
const level = title.level - 2;
let offset = '';
if (level) {
offset = `${Array.from({ length: level * indentSpaces }).join(' ')} `;
}
const bulletChar = indentChars[level] ?? indentChars.slice(-1)[0];
const anchorName = `${anchorPrefix}${title.id}`;
markdownTOC += `${offset}${bulletChar} ${createLink(stripMarkdownLinks(title.name), `#${anchorName}`)}\n`;
if (generateAnchors) {
resultMarkdown = resultMarkdown.replace(
new RegExp(`(?<!^${commentOpen} TOC ANCHOR.*\n)^${escapeRegExp(title.md)}`, 'm'),
`${commentOpen} TOC ANCHOR ${commentClose}<a name="${title.id}"></a>\n${title.md}`,
);
}
});
resultMarkdown = resultMarkdown.replace(
/^\[TOC\]\n/mg,
`${commentOpen} TOC START ${commentClose}\n${markdownTOC}${commentOpen} TOC END ${commentClose}\n`,
);
return resultMarkdown;
}