Highlighting now takes account of character set width

This commit is contained in:
n1474335 2022-09-16 16:00:39 +01:00
parent 08b91fd7ff
commit a141873db8
2 changed files with 57 additions and 5 deletions

View file

@ -6,6 +6,8 @@
* @license Apache-2.0
*/
import cptable from "codepage";
/**
* Character encoding format mappings.
*/
@ -175,6 +177,46 @@ for (const name in CHR_ENC_CODE_PAGES) {
CHR_ENC_SIMPLE_REVERSE_LOOKUP[CHR_ENC_CODE_PAGES[name]] = simpleName;
}
/**
* Returns the width of the character set for the given codepage.
* For example, UTF-8 is a Single Byte Character Set, whereas
* UTF-16 is a Double Byte Character Set.
*
* @param {number} page - The codepage number
* @returns {number}
*/
export function chrEncWidth(page) {
if (typeof page !== "number") return 0;
// Raw Bytes have a width of 1
if (page === 0) return 1;
const pageStr = page.toString();
// Confirm this page is legitimate
if (!Object.prototype.hasOwnProperty.call(CHR_ENC_SIMPLE_REVERSE_LOOKUP, pageStr))
return 0;
// Statically defined code pages
if (Object.prototype.hasOwnProperty.call(cptable, pageStr))
return cptable[pageStr].dec.length > 256 ? 2 : 1;
// Cached code pages
if (cptable.utils.cache.sbcs.includes(pageStr))
return 1;
if (cptable.utils.cache.dbcs.includes(pageStr))
return 2;
// Dynamically generated code pages
if (Object.prototype.hasOwnProperty.call(cptable.utils.magic, pageStr)) {
// Generate a single character and measure it
const a = cptable.utils.encode(page, "a");
return a.length;
}
return 0;
}
/**
* Unicode Normalisation Forms
*