From 6df8c1004f1fb6e522716922d9b31427bf00f126 Mon Sep 17 00:00:00 2001
From: "Glenn R. Martin" <222487+grmartin@users.noreply.github.com>
Date: Tue, 3 Jun 2025 09:22:20 -0400
Subject: [PATCH 1/9] Adding AI Token Counter

(cherry picked from commit 8b20dcbf1b52e3accc430655c2d1d94f058171fe)
---
 package-lock.json                     |   6 ++
 package.json                          |   1 +
 src/core/operations/CountAITokens.mjs | 100 ++++++++++++++++++++++++++
 3 files changed, 107 insertions(+)
 create mode 100644 src/core/operations/CountAITokens.mjs

diff --git a/package-lock.json b/package-lock.json
index b374df4b..6ef3b9f1 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -46,6 +46,7 @@
         "file-saver": "^2.0.5",
         "flat": "^6.0.1",
         "geodesy": "1.1.3",
+        "gpt-tokenizer": "^2.9.0",
         "handlebars": "^4.7.8",
         "hash-wasm": "^4.12.0",
         "highlight.js": "^11.9.0",
@@ -10361,6 +10362,11 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/gpt-tokenizer": {
+      "version": "2.9.0",
+      "resolved": "https://registry.npmjs.org/gpt-tokenizer/-/gpt-tokenizer-2.9.0.tgz",
+      "integrity": "sha512-YSpexBL/k4bfliAzMrRqn3M6+it02LutVyhVpDeMKrC/O9+pCe/5s8U2hYKa2vFLD5/vHhsKc8sOn/qGqII8Kg=="
+    },
     "node_modules/graceful-fs": {
       "version": "4.2.11",
       "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
diff --git a/package.json b/package.json
index 9191ab6f..aab5da6a 100644
--- a/package.json
+++ b/package.json
@@ -132,6 +132,7 @@
     "file-saver": "^2.0.5",
     "flat": "^6.0.1",
     "geodesy": "1.1.3",
+    "gpt-tokenizer": "^2.9.0",
     "handlebars": "^4.7.8",
     "hash-wasm": "^4.12.0",
     "highlight.js": "^11.9.0",
diff --git a/src/core/operations/CountAITokens.mjs b/src/core/operations/CountAITokens.mjs
new file mode 100644
index 00000000..ecba26ee
--- /dev/null
+++ b/src/core/operations/CountAITokens.mjs
@@ -0,0 +1,100 @@
+/**
+ * @author grmartin [grmartin]
+ * @copyright Crown Copyright 2016
+ * @license Apache-2.0
+ */
+
+import Operation from "../Operation.mjs";
+
+// This mapping returns a Promise that resolves to the correct countTokens function for the model.
+const MODEL_TO_COUNT_TOKENS = {
+    // cl100k_base models
+    "gpt-4": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
+    "gpt-4-32k": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
+    "gpt-4-turbo": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
+    "gpt-4o": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
+    "gpt-4-0125-preview": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
+    "gpt-4-1106-preview": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
+    "gpt-3.5-turbo": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
+    "gpt-3.5-turbo-16k": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
+    "gpt-3.5-turbo-instruct": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
+    "gpt-3.5-turbo-0125": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
+    "gpt-3.5-turbo-1106": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
+    "text-embedding-ada-002": () => import("gpt-tokenizer/model/text-embedding-ada-002").then(m => m.countTokens),
+    "text-embedding-3-large": () => import("gpt-tokenizer/model/text-embedding-3-large").then(m => m.countTokens),
+    "text-embedding-3-small": () => import("gpt-tokenizer/model/text-embedding-3-small").then(m => m.countTokens),
+
+    // p50k_base models
+    "code-davinci-002": () => import("gpt-tokenizer/model/code-davinci-002").then(m => m.countTokens),
+    "code-davinci-001": () => import("gpt-tokenizer/model/code-davinci-002").then(m => m.countTokens),
+    "code-cushman-002": () => import("gpt-tokenizer/model/code-cushman-002").then(m => m.countTokens),
+    "code-cushman-001": () => import("gpt-tokenizer/model/code-cushman-002").then(m => m.countTokens),
+    "text-davinci-002": () => import("gpt-tokenizer/model/text-davinci-002").then(m => m.countTokens),
+    "text-davinci-003": () => import("gpt-tokenizer/model/text-davinci-003").then(m => m.countTokens),
+
+    // p50k_edit models
+    "text-davinci-edit-001": () => import("gpt-tokenizer/model/text-davinci-edit-001").then(m => m.countTokens),
+    "code-davinci-edit-001": () => import("gpt-tokenizer/model/code-davinci-edit-001").then(m => m.countTokens),
+
+    // r50k_base models
+    "davinci": () => import("gpt-tokenizer/model/davinci").then(m => m.countTokens),
+    "curie": () => import("gpt-tokenizer/model/curie").then(m => m.countTokens),
+    "babbage": () => import("gpt-tokenizer/model/babbage").then(m => m.countTokens),
+    "ada": () => import("gpt-tokenizer/model/ada").then(m => m.countTokens),
+};
+
+
+/**
+ * Count AI Tokens operation
+ */
+class CountAITokens extends Operation {
+
+    /**
+     * Count AI Tokens constructor
+     */
+    constructor() {
+        super();
+
+        this.name = "Count AI Tokens";
+        this.module = "AI";
+        this.infoURL = "https://github.com/niieani/gpt-tokenizer";
+        this.description = "Counts the number of GPT tokens in the input text using niieani/gpt-tokenizer. Select the model to use the correct encoding.";
+        this.inputType = "string";
+        this.outputType = "string";
+        this.args = [
+            {
+                name: "Model",
+                type: "option",
+                value: Object.keys(MODEL_TO_COUNT_TOKENS),
+            }
+        ];
+    }
+
+    /**
+     * @param {string} input
+     * @param {Object[]} args
+     * @returns {string}
+     */
+    async run(input, args) {
+        if (!input) return "";
+        // const [model] = args;
+        // // Use the mapping, fallback to cl100k_base if not found
+        // const encoding = MODEL_TO_ENCODING[model] || cl100k_base;
+        // const tokenCount = encoding.;
+        // return tokenCount.toString();
+        const [model] = args;
+        let countTokensFn;
+        if (MODEL_TO_COUNT_TOKENS[model]) {
+            countTokensFn = await MODEL_TO_COUNT_TOKENS[model]();
+        } else {
+            // fallback to default (gpt-3.5-turbo encoding)
+            countTokensFn = (await import("gpt-tokenizer/model/gpt-3.5-turbo")).countTokens;
+        }
+        const tokenCount = countTokensFn(input);
+        return tokenCount.toString();
+    }
+
+}
+
+export default CountAITokens;
+

From 233eb3d452699a3bef117e6187cd361cf6cc6624 Mon Sep 17 00:00:00 2001
From: "Glenn R. Martin" <222487+grmartin@users.noreply.github.com>
Date: Tue, 3 Jun 2025 10:44:23 -0400
Subject: [PATCH 2/9] Adding AI Category

(cherry picked from commit 83381be9c7346467919620ecee7a1ba2eb058811)
---
 src/core/config/Categories.json | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json
index 434c8bb6..8d835921 100644
--- a/src/core/config/Categories.json
+++ b/src/core/config/Categories.json
@@ -3,6 +3,10 @@
         "name": "Favourites",
         "ops": []
     },
+    {
+        "name": "AI",
+        "ops": ["Count AI Tokens"]
+    },
     {
         "name": "Data format",
         "ops": [

From 0df7ac0bad9566852b5e2b66ca914330be671d6d Mon Sep 17 00:00:00 2001
From: "Glenn R. Martin" <222487+grmartin@users.noreply.github.com>
Date: Tue, 3 Jun 2025 21:17:53 -0400
Subject: [PATCH 3/9] Making the tokenizer in to a library.

---
 src/core/lib/GPTTokenizer.mjs         | 191 ++++++++++++++++++++++++++
 src/core/operations/CountAITokens.mjs |  49 +------
 2 files changed, 197 insertions(+), 43 deletions(-)
 create mode 100644 src/core/lib/GPTTokenizer.mjs

diff --git a/src/core/lib/GPTTokenizer.mjs b/src/core/lib/GPTTokenizer.mjs
new file mode 100644
index 00000000..ba03727c
--- /dev/null
+++ b/src/core/lib/GPTTokenizer.mjs
@@ -0,0 +1,191 @@
+// noinspection SpellCheckingInspection
+
+/**
+ * @author grmartin [grmartin@engineer.com]
+ * @copyright Crown Copyright 2016
+ * @license Apache-2.0
+ */
+
+/**
+ * Convert an imported module in to a solid type
+ * @param m an imported module
+ * @returns {TokenizerModule}
+ */
+const exportModule = (m) => {
+    return {
+        countTokens: m.countTokens, // # of tokens
+        encode: m.encode,           // tokens
+        decode: m.decode,           // token ids
+    };
+};
+
+export const defaultValue = Symbol("*");
+
+// Tokenizer module constants
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const GPT_35_TURBO_TOKENIZER = () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const TEXT_EMBEDDING_ADA_002_TOKENIZER = () => import("gpt-tokenizer/model/text-embedding-ada-002").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const TEXT_EMBEDDING_3_LARGE_TOKENIZER = () => import("gpt-tokenizer/model/text-embedding-3-large").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const TEXT_EMBEDDING_3_SMALL_TOKENIZER = () => import("gpt-tokenizer/model/text-embedding-3-small").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const CODE_DAVINCI_002_TOKENIZER = () => import("gpt-tokenizer/model/code-davinci-002").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const CODE_CUSHMAN_002_TOKENIZER = () => import("gpt-tokenizer/model/code-cushman-002").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const TEXT_DAVINCI_002_TOKENIZER = () => import("gpt-tokenizer/model/text-davinci-002").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const TEXT_DAVINCI_003_TOKENIZER = () => import("gpt-tokenizer/model/text-davinci-003").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const TEXT_DAVINCI_EDIT_001_TOKENIZER = () => import("gpt-tokenizer/model/text-davinci-edit-001").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const CODE_DAVINCI_EDIT_001_TOKENIZER = () => import("gpt-tokenizer/model/code-davinci-edit-001").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const DAVINCI_TOKENIZER = () => import("gpt-tokenizer/model/davinci").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const CURIE_TOKENIZER = () => import("gpt-tokenizer/model/curie").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const BABBAGE_TOKENIZER = () => import("gpt-tokenizer/model/babbage").then(m => exportModule(m));
+
+/**
+ * @returns {Promise<TokenizerModule>}
+ * @constructor
+ */
+const ADA_TOKENIZER = () => import("gpt-tokenizer/model/ada").then(m => exportModule(m));
+
+// This mapping returns a Promise that resolves to the correct countTokens function for the model.
+export const MODEL_TO_MODULES = {
+    // cl100k_base models
+    [defaultValue]: GPT_35_TURBO_TOKENIZER,
+    "gpt-4": GPT_35_TURBO_TOKENIZER,
+    "gpt-4-32k": GPT_35_TURBO_TOKENIZER,
+    "gpt-4-turbo": GPT_35_TURBO_TOKENIZER,
+    "gpt-4o": GPT_35_TURBO_TOKENIZER,
+    "gpt-4-0125-preview": GPT_35_TURBO_TOKENIZER,
+    "gpt-4-1106-preview": GPT_35_TURBO_TOKENIZER,
+    "gpt-3.5-turbo": GPT_35_TURBO_TOKENIZER,
+    "gpt-3.5-turbo-16k": GPT_35_TURBO_TOKENIZER,
+    "gpt-3.5-turbo-instruct": GPT_35_TURBO_TOKENIZER,
+    "gpt-3.5-turbo-0125": GPT_35_TURBO_TOKENIZER,
+    "gpt-3.5-turbo-1106": GPT_35_TURBO_TOKENIZER,
+    "text-embedding-ada-002": TEXT_EMBEDDING_ADA_002_TOKENIZER,
+    "text-embedding-3-large": TEXT_EMBEDDING_3_LARGE_TOKENIZER,
+    "text-embedding-3-small": TEXT_EMBEDDING_3_SMALL_TOKENIZER,
+
+    // p50k_base models
+    "code-davinci-002": CODE_DAVINCI_002_TOKENIZER,
+    "code-davinci-001": CODE_DAVINCI_002_TOKENIZER,
+    "code-cushman-002": CODE_CUSHMAN_002_TOKENIZER,
+    "code-cushman-001": CODE_CUSHMAN_002_TOKENIZER,
+    "text-davinci-002": TEXT_DAVINCI_002_TOKENIZER,
+    "text-davinci-003": TEXT_DAVINCI_003_TOKENIZER,
+
+    // p50k_edit models
+    "text-davinci-edit-001": TEXT_DAVINCI_EDIT_001_TOKENIZER,
+    "code-davinci-edit-001": CODE_DAVINCI_EDIT_001_TOKENIZER,
+
+    // r50k_base models
+    "davinci": DAVINCI_TOKENIZER,
+    "curie": CURIE_TOKENIZER,
+    "babbage": BABBAGE_TOKENIZER,
+    "ada": ADA_TOKENIZER,
+};
+
+/**
+ * @typedef {Object} EncodeOptions
+ * @property {Set<string>|'all'} [allowedSpecial] - A list of special tokens that are allowed in the input.
+ * If set to 'all', all special tokens are allowed except those in disallowedSpecial.
+ * @default undefined
+ * @property {Set<string>|'all'} [disallowedSpecial] - A list of special tokens that are disallowed in the input.
+ * If set to 'all', all special tokens are disallowed except those in allowedSpecial.
+ * @default 'all'
+ */
+
+/**
+ * @typedef {Object} ChatMessage
+ * @property {'system'|'user'|'assistant'} [role] - The role of the message sender.
+ * @property {string} [name] - The name of the message sender.
+ * @property {string} content - The content of the message.
+ */
+
+/**
+ * @func EncodeFn
+ * @param {string} lineToEncode - The string to encode.
+ * @param {EncodeOptions} [encodeOptions] - Optional encoding options.
+ * @returns {number[]} An array of numbers representing the encoded result.
+ */
+
+/**
+ * @func DecodeFn
+ * @param {Iterable<number>} inputTokensToDecode - An iterable collection of numbers to decode.
+ * @returns {string} The decoded string.
+ */
+
+/**
+ * A function that counts tokens.
+ *
+ * @func CountTokensFn
+ * @param {string | Iterable<ChatMessage>} input - The input string or an iterable of ChatMessage objects.
+ * @param {EncodeOptions} [encodeOptions] - Optional encoding options to customize the token counting process.
+ * @returns {number} The total number of tokens counted.
+ */
+
+/**
+ * @typedef {Object} TokenizerModule
+ * @property {CountTokensFn} countTokens - Function to count tokens in input
+ * @property {DecodeFn} decode - Function to convert token IDs back to text
+ * @property {EncodeFn} encode - Function to convert text to token IDs
+ */
diff --git a/src/core/operations/CountAITokens.mjs b/src/core/operations/CountAITokens.mjs
index ecba26ee..c1876fba 100644
--- a/src/core/operations/CountAITokens.mjs
+++ b/src/core/operations/CountAITokens.mjs
@@ -1,48 +1,11 @@
 /**
- * @author grmartin [grmartin]
+ * @author grmartin [grmartin@engineer.com]
  * @copyright Crown Copyright 2016
  * @license Apache-2.0
  */
 
 import Operation from "../Operation.mjs";
-
-// This mapping returns a Promise that resolves to the correct countTokens function for the model.
-const MODEL_TO_COUNT_TOKENS = {
-    // cl100k_base models
-    "gpt-4": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
-    "gpt-4-32k": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
-    "gpt-4-turbo": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
-    "gpt-4o": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
-    "gpt-4-0125-preview": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
-    "gpt-4-1106-preview": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
-    "gpt-3.5-turbo": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
-    "gpt-3.5-turbo-16k": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
-    "gpt-3.5-turbo-instruct": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
-    "gpt-3.5-turbo-0125": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
-    "gpt-3.5-turbo-1106": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
-    "text-embedding-ada-002": () => import("gpt-tokenizer/model/text-embedding-ada-002").then(m => m.countTokens),
-    "text-embedding-3-large": () => import("gpt-tokenizer/model/text-embedding-3-large").then(m => m.countTokens),
-    "text-embedding-3-small": () => import("gpt-tokenizer/model/text-embedding-3-small").then(m => m.countTokens),
-
-    // p50k_base models
-    "code-davinci-002": () => import("gpt-tokenizer/model/code-davinci-002").then(m => m.countTokens),
-    "code-davinci-001": () => import("gpt-tokenizer/model/code-davinci-002").then(m => m.countTokens),
-    "code-cushman-002": () => import("gpt-tokenizer/model/code-cushman-002").then(m => m.countTokens),
-    "code-cushman-001": () => import("gpt-tokenizer/model/code-cushman-002").then(m => m.countTokens),
-    "text-davinci-002": () => import("gpt-tokenizer/model/text-davinci-002").then(m => m.countTokens),
-    "text-davinci-003": () => import("gpt-tokenizer/model/text-davinci-003").then(m => m.countTokens),
-
-    // p50k_edit models
-    "text-davinci-edit-001": () => import("gpt-tokenizer/model/text-davinci-edit-001").then(m => m.countTokens),
-    "code-davinci-edit-001": () => import("gpt-tokenizer/model/code-davinci-edit-001").then(m => m.countTokens),
-
-    // r50k_base models
-    "davinci": () => import("gpt-tokenizer/model/davinci").then(m => m.countTokens),
-    "curie": () => import("gpt-tokenizer/model/curie").then(m => m.countTokens),
-    "babbage": () => import("gpt-tokenizer/model/babbage").then(m => m.countTokens),
-    "ada": () => import("gpt-tokenizer/model/ada").then(m => m.countTokens),
-};
-
+import {defaultValue, MODEL_TO_MODULES} from "../lib/GPTTokenizer.mjs";
 
 /**
  * Count AI Tokens operation
@@ -65,7 +28,7 @@ class CountAITokens extends Operation {
             {
                 name: "Model",
                 type: "option",
-                value: Object.keys(MODEL_TO_COUNT_TOKENS),
+                value: Object.keys(MODEL_TO_MODULES),
             }
         ];
     }
@@ -84,11 +47,11 @@ class CountAITokens extends Operation {
         // return tokenCount.toString();
         const [model] = args;
         let countTokensFn;
-        if (MODEL_TO_COUNT_TOKENS[model]) {
-            countTokensFn = await MODEL_TO_COUNT_TOKENS[model]();
+        if (MODEL_TO_MODULES[model]) {
+            countTokensFn = (await MODEL_TO_MODULES[model]()).countTokens;
         } else {
             // fallback to default (gpt-3.5-turbo encoding)
-            countTokensFn = (await import("gpt-tokenizer/model/gpt-3.5-turbo")).countTokens;
+            countTokensFn = (await MODEL_TO_MODULES[defaultValue]()).countTokens;
         }
         const tokenCount = countTokensFn(input);
         return tokenCount.toString();

From 8443330abd94ff111dddeea568866de64fe92a1d Mon Sep 17 00:00:00 2001
From: "Glenn R. Martin" <222487+grmartin@users.noreply.github.com>
Date: Tue, 3 Jun 2025 21:19:28 -0400
Subject: [PATCH 4/9] Removing some unneeded comments

---
 src/core/operations/CountAITokens.mjs | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/core/operations/CountAITokens.mjs b/src/core/operations/CountAITokens.mjs
index c1876fba..72b92090 100644
--- a/src/core/operations/CountAITokens.mjs
+++ b/src/core/operations/CountAITokens.mjs
@@ -40,11 +40,7 @@ class CountAITokens extends Operation {
      */
     async run(input, args) {
         if (!input) return "";
-        // const [model] = args;
-        // // Use the mapping, fallback to cl100k_base if not found
-        // const encoding = MODEL_TO_ENCODING[model] || cl100k_base;
-        // const tokenCount = encoding.;
-        // return tokenCount.toString();
+
         const [model] = args;
         let countTokensFn;
         if (MODEL_TO_MODULES[model]) {

From dd583a4943826943171e3db2afff39fbab8d68ce Mon Sep 17 00:00:00 2001
From: "Glenn R. Martin" <222487+grmartin@users.noreply.github.com>
Date: Sun, 8 Jun 2025 19:17:23 -0400
Subject: [PATCH 5/9] Adding GPT Token Parser display

---
 src/core/config/Categories.json       |   2 +-
 src/core/lib/GPTTokenizer.mjs         | 124 +---------------------
 src/core/operations/ParseAITokens.mjs | 147 ++++++++++++++++++++++++++
 3 files changed, 150 insertions(+), 123 deletions(-)
 create mode 100644 src/core/operations/ParseAITokens.mjs

diff --git a/src/core/config/Categories.json b/src/core/config/Categories.json
index 8d835921..010e72a1 100644
--- a/src/core/config/Categories.json
+++ b/src/core/config/Categories.json
@@ -5,7 +5,7 @@
     },
     {
         "name": "AI",
-        "ops": ["Count AI Tokens"]
+        "ops": ["Count AI Tokens", "Parse AI Tokens"]
     },
     {
         "name": "Data format",
diff --git a/src/core/lib/GPTTokenizer.mjs b/src/core/lib/GPTTokenizer.mjs
index ba03727c..e2c57f0a 100644
--- a/src/core/lib/GPTTokenizer.mjs
+++ b/src/core/lib/GPTTokenizer.mjs
@@ -6,104 +6,30 @@
  * @license Apache-2.0
  */
 
-/**
- * Convert an imported module in to a solid type
- * @param m an imported module
- * @returns {TokenizerModule}
- */
 const exportModule = (m) => {
     return {
         countTokens: m.countTokens, // # of tokens
-        encode: m.encode,           // tokens
-        decode: m.decode,           // token ids
+        encode: m.encode,           // tokens ids
+        decodeGenerator: m.decodeGenerator, // tokens
     };
 };
 
 export const defaultValue = Symbol("*");
 
 // Tokenizer module constants
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const GPT_35_TURBO_TOKENIZER = () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const TEXT_EMBEDDING_ADA_002_TOKENIZER = () => import("gpt-tokenizer/model/text-embedding-ada-002").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const TEXT_EMBEDDING_3_LARGE_TOKENIZER = () => import("gpt-tokenizer/model/text-embedding-3-large").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const TEXT_EMBEDDING_3_SMALL_TOKENIZER = () => import("gpt-tokenizer/model/text-embedding-3-small").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const CODE_DAVINCI_002_TOKENIZER = () => import("gpt-tokenizer/model/code-davinci-002").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const CODE_CUSHMAN_002_TOKENIZER = () => import("gpt-tokenizer/model/code-cushman-002").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const TEXT_DAVINCI_002_TOKENIZER = () => import("gpt-tokenizer/model/text-davinci-002").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const TEXT_DAVINCI_003_TOKENIZER = () => import("gpt-tokenizer/model/text-davinci-003").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const TEXT_DAVINCI_EDIT_001_TOKENIZER = () => import("gpt-tokenizer/model/text-davinci-edit-001").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const CODE_DAVINCI_EDIT_001_TOKENIZER = () => import("gpt-tokenizer/model/code-davinci-edit-001").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const DAVINCI_TOKENIZER = () => import("gpt-tokenizer/model/davinci").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const CURIE_TOKENIZER = () => import("gpt-tokenizer/model/curie").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const BABBAGE_TOKENIZER = () => import("gpt-tokenizer/model/babbage").then(m => exportModule(m));
-
-/**
- * @returns {Promise<TokenizerModule>}
- * @constructor
- */
 const ADA_TOKENIZER = () => import("gpt-tokenizer/model/ada").then(m => exportModule(m));
 
 // This mapping returns a Promise that resolves to the correct countTokens function for the model.
@@ -143,49 +69,3 @@ export const MODEL_TO_MODULES = {
     "babbage": BABBAGE_TOKENIZER,
     "ada": ADA_TOKENIZER,
 };
-
-/**
- * @typedef {Object} EncodeOptions
- * @property {Set<string>|'all'} [allowedSpecial] - A list of special tokens that are allowed in the input.
- * If set to 'all', all special tokens are allowed except those in disallowedSpecial.
- * @default undefined
- * @property {Set<string>|'all'} [disallowedSpecial] - A list of special tokens that are disallowed in the input.
- * If set to 'all', all special tokens are disallowed except those in allowedSpecial.
- * @default 'all'
- */
-
-/**
- * @typedef {Object} ChatMessage
- * @property {'system'|'user'|'assistant'} [role] - The role of the message sender.
- * @property {string} [name] - The name of the message sender.
- * @property {string} content - The content of the message.
- */
-
-/**
- * @func EncodeFn
- * @param {string} lineToEncode - The string to encode.
- * @param {EncodeOptions} [encodeOptions] - Optional encoding options.
- * @returns {number[]} An array of numbers representing the encoded result.
- */
-
-/**
- * @func DecodeFn
- * @param {Iterable<number>} inputTokensToDecode - An iterable collection of numbers to decode.
- * @returns {string} The decoded string.
- */
-
-/**
- * A function that counts tokens.
- *
- * @func CountTokensFn
- * @param {string | Iterable<ChatMessage>} input - The input string or an iterable of ChatMessage objects.
- * @param {EncodeOptions} [encodeOptions] - Optional encoding options to customize the token counting process.
- * @returns {number} The total number of tokens counted.
- */
-
-/**
- * @typedef {Object} TokenizerModule
- * @property {CountTokensFn} countTokens - Function to count tokens in input
- * @property {DecodeFn} decode - Function to convert token IDs back to text
- * @property {EncodeFn} encode - Function to convert text to token IDs
- */
diff --git a/src/core/operations/ParseAITokens.mjs b/src/core/operations/ParseAITokens.mjs
new file mode 100644
index 00000000..b56d85d8
--- /dev/null
+++ b/src/core/operations/ParseAITokens.mjs
@@ -0,0 +1,147 @@
+/**
+ * @author grmartin [grmartin@engineer.com]
+ * @copyright Crown Copyright 2016
+ * @license Apache-2.0
+ */
+
+import Operation from "../Operation.mjs";
+import {defaultValue, MODEL_TO_MODULES} from "../lib/GPTTokenizer.mjs";
+
+const pastelColors = [
+    "rgba(102,197,204,.4)",
+    "rgba(246,207,113,.4)",
+    "rgba(248,156,116,.4)",
+    "rgba(239,65,70,.4)",
+    "rgba(220,176,242,.4)",
+    "rgba(135,197,95,.4)",
+    "rgba(158,185,243,.4)",
+    "rgba(254,136,177,.4)",
+    "rgba(201,219,116,.4)",
+    "rgba(139,224,164,.4)",
+    "rgba(180,151,231,.4)",
+];
+
+/**
+ * Count AI Tokens operation
+ */
+class ParseAITokens extends Operation {
+
+    /**
+     * Parse AI Tokens constructor
+     */
+    constructor() {
+        super();
+
+        this.name = "Parse AI Tokens";
+        this.module = "AI";
+        this.infoURL = "https://github.com/niieani/gpt-tokenizer";
+        this.description = "Parses the GPT tokens in the input text using niieani/gpt-tokenizer. Select the model to use the correct encoding.";
+        this.inputType = "string";
+        this.outputType = "html";
+        this.args = [
+            {
+                name: "Model",
+                type: "option",
+                value: Object.keys(MODEL_TO_MODULES),
+            },
+            {
+                name: "Show Token IDs",
+                type: "boolean",
+                value: false
+            }
+        ];
+    }
+
+    /**
+     * @param {string} input
+     * @param {Object[]} args
+     * @returns {string}
+     */
+    async run(input, args) {
+        if (!input) return "";
+
+        const [model, showIds] = args;
+        let fns;
+        if (MODEL_TO_MODULES[model]) {
+            fns = (await MODEL_TO_MODULES[model]());
+        } else {
+            // fallback to default (gpt-3.5-turbo encoding)
+            fns = (await MODEL_TO_MODULES[defaultValue]());
+        }
+
+        const encodedTokens = fns.encode(input); // IDs
+
+        let displayTokens = [];
+        if (showIds) {
+            displayTokens = encodedTokens.map((x)=> x.toString());
+        } else {
+            const tokens = [];
+            for (const token of fns.decodeGenerator(encodedTokens)) {
+                tokens.push(token);
+            }
+            displayTokens = tokens;
+        }
+
+        return this.format(input, displayTokens);
+
+    };
+
+    /**
+     * Format HTML
+     * @param {string} input
+     * @param {string[]} tokens
+     */
+    format(input, tokens) {
+
+        const tokenHtml = tokens.map((t, i) => {
+            const tok =
+                t.replaceAll(" ", "\u00A0")
+                    .replaceAll("\n", "<newline>");
+
+            const css = [
+                `background-color:${pastelColors[i % pastelColors.length]}`,
+                "padding: 0 0",
+                "border-radius: 3px",
+                "margin-right: 0",
+                "margin-bottom: 4px",
+                "display: 'inline-block'",
+                "height: 1.5em"
+            ];
+
+            return `<span style="${css.join(";")}">${tok}</span>`;
+        });
+
+        return this.replaceSpacesOutsideTags(`
+            <div style="padding: 0; margin: 0">
+                <h1>Tokens</h1>
+                <p style="font-family: monospace">
+                    ${tokenHtml.join("")}
+                </p>
+                <hr />
+                <ul style="list-style: none; padding-left: 0">
+                    <li><strong>Characters:</strong>&nbsp;${input.length}</li>
+                    <li><strong>Tokens:</strong>&nbsp;${tokens.length}</li>
+                </ul>
+            </div>`
+        );
+    };
+
+    /**
+     * Replace all space not starting within the HTML tag.
+     * @param {string} htmlString
+     * @returns {string}
+     */
+    replaceSpacesOutsideTags(htmlString) {
+        return htmlString.replace(/(<[^>]*?>)|(\s+)/g, function(match, tag, spaces) {
+            if (tag) {
+                return tag;
+            } else if (spaces) {
+                return "";
+            }
+        }).replace(/[\r\n]/g, "");
+    };
+
+}
+
+export default ParseAITokens;
+

From 0b913d070a87bb7f02666610a7e41c1b45133dd1 Mon Sep 17 00:00:00 2001
From: "Glenn R. Martin" <222487+grmartin@users.noreply.github.com>
Date: Mon, 9 Jun 2025 00:02:01 -0400
Subject: [PATCH 6/9] Encoding HTML entities as well as ensuring no script tags
 slip by

---
 src/core/operations/ParseAITokens.mjs | 35 +++++++++++++++++----------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/core/operations/ParseAITokens.mjs b/src/core/operations/ParseAITokens.mjs
index b56d85d8..06d4c5f6 100644
--- a/src/core/operations/ParseAITokens.mjs
+++ b/src/core/operations/ParseAITokens.mjs
@@ -95,7 +95,8 @@ class ParseAITokens extends Operation {
 
         const tokenHtml = tokens.map((t, i) => {
             const tok =
-                t.replaceAll(" ", "\u00A0")
+                t.replace(/[\u00A0-\u9999<>&]/g, i => "&#"+i.charCodeAt(0)+";")
+                    .replaceAll(" ", "\u00A0")
                     .replaceAll("\n", "<newline>");
 
             const css = [
@@ -127,21 +128,29 @@ class ParseAITokens extends Operation {
     };
 
     /**
-     * Replace all space not starting within the HTML tag.
-     * @param {string} htmlString
-     * @returns {string}
+     * Replace spaces outside HTML tags and sanitize <script> tags.
+     * @param {string} htmlString - The input HTML string.
+     * @returns {string} - The sanitized and formatted HTML string.
      */
     replaceSpacesOutsideTags(htmlString) {
-        return htmlString.replace(/(<[^>]*?>)|(\s+)/g, function(match, tag, spaces) {
-            if (tag) {
-                return tag;
-            } else if (spaces) {
-                return "";
-            }
-        }).replace(/[\r\n]/g, "");
-    };
+        return htmlString
+            .replace(/(<script\b[^>]*>.*?<\/script>)|(<[^>]*?>)|(\s+)/gi, (match, scriptTag, htmlTag, spaces) => {
+                if (scriptTag) {
+                    // Sanitize the <script> tag by escaping it
+                    return scriptTag
+                        .replace(/</g, "&lt;")
+                        .replace(/>/g, "&gt;");
+                } else if (htmlTag) {
+                    // Leave other HTML tags unchanged
+                    return htmlTag;
+                } else if (spaces) {
+                    // Replace spaces outside tags
+                    return "";
+                }
+            })
+            .replace(/[\r\n]/g, "");
+    }
 
 }
 
 export default ParseAITokens;
-

From e6a50df732fbc8ed7aba31bf6c4cc8ab52f62846 Mon Sep 17 00:00:00 2001
From: "Glenn R. Martin" <222487+grmartin@users.noreply.github.com>
Date: Mon, 9 Jun 2025 00:11:45 -0400
Subject: [PATCH 7/9] Lets hope a direct match and replace will satisfy CodeQL

---
 src/core/operations/ParseAITokens.mjs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/core/operations/ParseAITokens.mjs b/src/core/operations/ParseAITokens.mjs
index 06d4c5f6..70c457d5 100644
--- a/src/core/operations/ParseAITokens.mjs
+++ b/src/core/operations/ParseAITokens.mjs
@@ -148,6 +148,7 @@ class ParseAITokens extends Operation {
                     return "";
                 }
             })
+            .replace(/<script/ig, "&lt;script") // satisfy codeql
             .replace(/[\r\n]/g, "");
     }
 

From 46799891d0e583f3607c06b1e6849296ecc71cce Mon Sep 17 00:00:00 2001
From: "Glenn R. Martin" <222487+grmartin@users.noreply.github.com>
Date: Mon, 9 Jun 2025 00:17:39 -0400
Subject: [PATCH 8/9] Last attempt to try and satisfy CodeQL

---
 src/core/operations/ParseAITokens.mjs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/core/operations/ParseAITokens.mjs b/src/core/operations/ParseAITokens.mjs
index 70c457d5..06706fc0 100644
--- a/src/core/operations/ParseAITokens.mjs
+++ b/src/core/operations/ParseAITokens.mjs
@@ -134,7 +134,8 @@ class ParseAITokens extends Operation {
      */
     replaceSpacesOutsideTags(htmlString) {
         return htmlString
-            .replace(/(<script\b[^>]*>.*?<\/script>)|(<[^>]*?>)|(\s+)/gi, (match, scriptTag, htmlTag, spaces) => {
+            .replace(/<script/ig, "&lt;script")
+            .replace(/(&lt;script\b[^>]*>.*?<\/script>)|(<[^>]*?>)|(\s+)/gi, (match, scriptTag, htmlTag, spaces) => {
                 if (scriptTag) {
                     // Sanitize the <script> tag by escaping it
                     return scriptTag
@@ -148,7 +149,6 @@ class ParseAITokens extends Operation {
                     return "";
                 }
             })
-            .replace(/<script/ig, "&lt;script") // satisfy codeql
             .replace(/[\r\n]/g, "");
     }
 

From 6324a3a8081f28e26a2cb51ff4485bcf8a56bfe3 Mon Sep 17 00:00:00 2001
From: "Glenn R. Martin" <222487+grmartin@users.noreply.github.com>
Date: Tue, 10 Jun 2025 18:28:02 -0400
Subject: [PATCH 9/9] Trying to fix the script tag vulnerability

With code taken directly from CodeQL's manual.

https://codeql.github.com/codeql-query-help/javascript/js-incomplete-multi-character-sanitization/
---
 src/core/operations/ParseAITokens.mjs | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/src/core/operations/ParseAITokens.mjs b/src/core/operations/ParseAITokens.mjs
index 06706fc0..170aec70 100644
--- a/src/core/operations/ParseAITokens.mjs
+++ b/src/core/operations/ParseAITokens.mjs
@@ -71,7 +71,7 @@ class ParseAITokens extends Operation {
 
         const encodedTokens = fns.encode(input); // IDs
 
-        let displayTokens = [];
+        let displayTokens;
         if (showIds) {
             displayTokens = encodedTokens.map((x)=> x.toString());
         } else {
@@ -134,18 +134,11 @@ class ParseAITokens extends Operation {
      */
     replaceSpacesOutsideTags(htmlString) {
         return htmlString
-            .replace(/<script/ig, "&lt;script")
-            .replace(/(&lt;script\b[^>]*>.*?<\/script>)|(<[^>]*?>)|(\s+)/gi, (match, scriptTag, htmlTag, spaces) => {
-                if (scriptTag) {
-                    // Sanitize the <script> tag by escaping it
-                    return scriptTag
-                        .replace(/</g, "&lt;")
-                        .replace(/>/g, "&gt;");
-                } else if (htmlTag) {
-                    // Leave other HTML tags unchanged
-                    return htmlTag;
+            .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/ig, "")
+            .replace(/(<[^>]*?>)|(\s+)/g, function(match, tag, spaces) {
+                if (tag) {
+                    return tag;
                 } else if (spaces) {
-                    // Replace spaces outside tags
                     return "";
                 }
             })