mirror of
https://github.com/CorentinTh/it-tools.git
synced 2025-04-24 16:56:14 -04:00
183 lines
4.9 KiB
TypeScript
183 lines
4.9 KiB
TypeScript
import sanitizeHtml from 'sanitize-html';
|
|
|
|
function stripNonLatinCharacters(text: string) {
|
|
return text.replace(/[^A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u02BB\u02EE\uA78C\d\s_-]/g, '');
|
|
};
|
|
|
|
function transformInlineCode(text: string, transform: (s: string) => string) {
|
|
return text.replace(/`(.*?)`/g, (_, p) => {
|
|
return `\`${transform(p)}\``;
|
|
});
|
|
};
|
|
|
|
function spacesToDash(text: string) {
|
|
return text.replace(/\s/g, '-');
|
|
};
|
|
|
|
function stripHtmlTags(text: string) {
|
|
return sanitizeHtml(text, {
|
|
allowedTags: [],
|
|
allowedAttributes: {},
|
|
});
|
|
}
|
|
|
|
function stripMarkdownLinks(text: string, replacement: string = '$1') {
|
|
return text.replace(/\[([^\]]*)\]\([^\)]*\)/g, replacement); // NOSONAR
|
|
};
|
|
|
|
function concatDashes(text: string) {
|
|
return text.replace(/--+/g, '-');
|
|
};
|
|
|
|
function removeUnderscoreBoldAndItalics(text: string) {
|
|
const underscoreBoldAndItalicsRegexes = ['__', '_'].map((it) => {
|
|
return new RegExp(`\\b${it}([^_\\s]|[^_\\s].*?[^_\\s])${it}\\b`, 'g');
|
|
});
|
|
|
|
let result = text;
|
|
|
|
underscoreBoldAndItalicsRegexes.forEach((regex) => {
|
|
result = result.replace(regex, '$1');
|
|
});
|
|
return result;
|
|
};
|
|
|
|
function genericAnchorGenerator(text: string, concatSpaces: boolean) {
|
|
let result = text;
|
|
result = result.toLowerCase();
|
|
result = transformInlineCode(result, (s: string) => {
|
|
return stripNonLatinCharacters(s);
|
|
});
|
|
result = removeUnderscoreBoldAndItalics(result);
|
|
result = stripHtmlTags(result);
|
|
result = stripMarkdownLinks(result);
|
|
result = result.trim();
|
|
result = stripNonLatinCharacters(result);
|
|
result = spacesToDash(result);
|
|
if (concatSpaces) {
|
|
result = concatDashes(result);
|
|
}
|
|
return result;
|
|
};
|
|
|
|
function escapeRegExp(string: string) {
|
|
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
|
|
}
|
|
|
|
interface Title {
|
|
level: number
|
|
id: string
|
|
name: string
|
|
md: string
|
|
}
|
|
|
|
function getTitles(markdown: string, idGenerator: (titleMarkdownContent: string) => string) {
|
|
const titles: Title[] = [];
|
|
|
|
markdown = markdown.replace(/^```[\s\S]*?\n```/mg, () => {
|
|
return '';
|
|
});
|
|
markdown = markdown.replace(/^~~~[\s\S]*?\n~~~/mg, () => {
|
|
return '';
|
|
});
|
|
|
|
[...markdown.matchAll(/^(#+)(.*$)/mg)].forEach( // NOSONAR
|
|
([match, levelString, titleContent]) => {
|
|
const level = levelString.length;
|
|
|
|
titles.push({
|
|
md: match,
|
|
level,
|
|
id: idGenerator(titleContent),
|
|
name: titleContent.trim(),
|
|
});
|
|
});
|
|
|
|
return titles;
|
|
};
|
|
|
|
export function getTocMarkdown({
|
|
markdown,
|
|
generateAnchors = true,
|
|
indentChars = '-*+',
|
|
indentSpaces = 2,
|
|
maxLevel = -1,
|
|
anchorPrefix = '',
|
|
concatSpaces = true,
|
|
commentStyle = 'html',
|
|
}: {
|
|
markdown: string
|
|
generateAnchors?: boolean
|
|
indentChars?: string
|
|
indentSpaces?: number
|
|
maxLevel?: number
|
|
anchorPrefix?: string
|
|
concatSpaces?: boolean
|
|
commentStyle?: 'html' | 'liquid'
|
|
}) {
|
|
const allIds: { [id: string]: number } = {};
|
|
const getFinalId = (id: string) => {
|
|
if (typeof allIds[id] === 'undefined') {
|
|
allIds[id] = 0;
|
|
return id;
|
|
}
|
|
else {
|
|
allIds[id] += 1;
|
|
return `${id}-${allIds[id]}`;
|
|
}
|
|
};
|
|
const titles = getTitles(markdown, titleContent => getFinalId(genericAnchorGenerator(titleContent, concatSpaces)));
|
|
|
|
const createLink = (linkText: string, url: string) => {
|
|
return `[${linkText.replace(/\[/g, '\\[').replace(/\]/g, '\\]')}](${url.replace(/\(/g, '%28').replace(/\(/g, '%29')})`;
|
|
};
|
|
|
|
let markdownTOC = '';
|
|
let resultMarkdown = markdown;
|
|
const commentOpen = commentStyle === 'html' ? '<!--' : '{%- #';
|
|
const commentClose = commentStyle === 'html' ? '-->' : '-%}';
|
|
|
|
resultMarkdown = resultMarkdown.replace(
|
|
new RegExp(`\n${escapeRegExp(commentOpen)} TOC START.*?TOC END ${escapeRegExp(commentClose)}\n`, 'smg'),
|
|
'\n[TOC]\n',
|
|
);
|
|
resultMarkdown = resultMarkdown.replace(
|
|
new RegExp(`^${escapeRegExp(commentOpen)} TOC ANCHOR.*?\n`, 'mg'),
|
|
'',
|
|
);
|
|
|
|
titles.forEach((title) => {
|
|
if (title.level === 1) {
|
|
return;
|
|
}
|
|
|
|
if (maxLevel > 0 && title.level > maxLevel) {
|
|
return;
|
|
}
|
|
|
|
const level = title.level - 2;
|
|
let offset = '';
|
|
if (level) {
|
|
offset = `${Array.from({ length: level * indentSpaces }).join(' ')} `;
|
|
}
|
|
const bulletChar = indentChars[level] ?? indentChars.slice(-1)[0];
|
|
|
|
const anchorName = `${anchorPrefix}${title.id}`;
|
|
|
|
markdownTOC += `${offset}${bulletChar} ${createLink(stripMarkdownLinks(title.name), `#${anchorName}`)}\n`;
|
|
|
|
if (generateAnchors) {
|
|
resultMarkdown = resultMarkdown.replace(
|
|
new RegExp(`(?<!^${commentOpen} TOC ANCHOR.*\n)^${escapeRegExp(title.md)}`, 'm'),
|
|
`${commentOpen} TOC ANCHOR ${commentClose}<a name="${title.id}"></a>\n${title.md}`,
|
|
);
|
|
}
|
|
});
|
|
|
|
resultMarkdown = resultMarkdown.replace(
|
|
/^\[TOC\]\n/mg,
|
|
`${commentOpen} TOC START ${commentClose}\n${markdownTOC}${commentOpen} TOC END ${commentClose}\n`,
|
|
);
|
|
|
|
return resultMarkdown;
|
|
}
|