Adding AI Token Counter

(cherry picked from commit 8b20dcbf1b52e3accc430655c2d1d94f058171fe)
2025-06-14 10:14:53 -04:00 · 2025-06-03 09:22:20 -04:00 · 2025-06-03 09:22:20 -04:00 · 6df8c1004f
commit 6df8c1004f
parent c57556f49f
3 changed files with 107 additions and 0 deletions
--- a/package-lock.json
+++ b/package-lock.json
@ -46,6 +46,7 @@
        "file-saver": "^2.0.5",
        "flat": "^6.0.1",
        "geodesy": "1.1.3",
        "gpt-tokenizer": "^2.9.0",
        "handlebars": "^4.7.8",
        "hash-wasm": "^4.12.0",
        "highlight.js": "^11.9.0",
@ -10361,6 +10362,11 @@
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/gpt-tokenizer": {
      "version": "2.9.0",
      "resolved": "https://registry.npmjs.org/gpt-tokenizer/-/gpt-tokenizer-2.9.0.tgz",
      "integrity": "sha512-YSpexBL/k4bfliAzMrRqn3M6+it02LutVyhVpDeMKrC/O9+pCe/5s8U2hYKa2vFLD5/vHhsKc8sOn/qGqII8Kg=="
    },
    "node_modules/graceful-fs": {
      "version": "4.2.11",
      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
--- a/package.json
+++ b/package.json
@ -132,6 +132,7 @@
    "file-saver": "^2.0.5",
    "flat": "^6.0.1",
    "geodesy": "1.1.3",
    "gpt-tokenizer": "^2.9.0",
    "handlebars": "^4.7.8",
    "hash-wasm": "^4.12.0",
    "highlight.js": "^11.9.0",
--- a/src/core/operations/CountAITokens.mjs
+++ b/src/core/operations/CountAITokens.mjs
@ -0,0 +1,100 @@
 /**
 * @author grmartin [grmartin]
 * @copyright Crown Copyright 2016
 * @license Apache-2.0
 */
 import Operation from "../Operation.mjs";
 // This mapping returns a Promise that resolves to the correct countTokens function for the model.
 const MODEL_TO_COUNT_TOKENS = {
    // cl100k_base models
    "gpt-4": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
    "gpt-4-32k": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
    "gpt-4-turbo": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
    "gpt-4o": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
    "gpt-4-0125-preview": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
    "gpt-4-1106-preview": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
    "gpt-3.5-turbo": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
    "gpt-3.5-turbo-16k": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
    "gpt-3.5-turbo-instruct": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
    "gpt-3.5-turbo-0125": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
    "gpt-3.5-turbo-1106": () => import("gpt-tokenizer/model/gpt-3.5-turbo").then(m => m.countTokens),
    "text-embedding-ada-002": () => import("gpt-tokenizer/model/text-embedding-ada-002").then(m => m.countTokens),
    "text-embedding-3-large": () => import("gpt-tokenizer/model/text-embedding-3-large").then(m => m.countTokens),
    "text-embedding-3-small": () => import("gpt-tokenizer/model/text-embedding-3-small").then(m => m.countTokens),
    // p50k_base models
    "code-davinci-002": () => import("gpt-tokenizer/model/code-davinci-002").then(m => m.countTokens),
    "code-davinci-001": () => import("gpt-tokenizer/model/code-davinci-002").then(m => m.countTokens),
    "code-cushman-002": () => import("gpt-tokenizer/model/code-cushman-002").then(m => m.countTokens),
    "code-cushman-001": () => import("gpt-tokenizer/model/code-cushman-002").then(m => m.countTokens),
    "text-davinci-002": () => import("gpt-tokenizer/model/text-davinci-002").then(m => m.countTokens),
    "text-davinci-003": () => import("gpt-tokenizer/model/text-davinci-003").then(m => m.countTokens),
    // p50k_edit models
    "text-davinci-edit-001": () => import("gpt-tokenizer/model/text-davinci-edit-001").then(m => m.countTokens),
    "code-davinci-edit-001": () => import("gpt-tokenizer/model/code-davinci-edit-001").then(m => m.countTokens),
    // r50k_base models
    "davinci": () => import("gpt-tokenizer/model/davinci").then(m => m.countTokens),
    "curie": () => import("gpt-tokenizer/model/curie").then(m => m.countTokens),
    "babbage": () => import("gpt-tokenizer/model/babbage").then(m => m.countTokens),
    "ada": () => import("gpt-tokenizer/model/ada").then(m => m.countTokens),
 };
 /**
 * Count AI Tokens operation
 */
 class CountAITokens extends Operation {
    /**
     * Count AI Tokens constructor
     */
    constructor() {
        super();
        this.name = "Count AI Tokens";
        this.module = "AI";
        this.infoURL = "https://github.com/niieani/gpt-tokenizer";
        this.description = "Counts the number of GPT tokens in the input text using niieani/gpt-tokenizer. Select the model to use the correct encoding.";
        this.inputType = "string";
        this.outputType = "string";
        this.args = [
            {
                name: "Model",
                type: "option",
                value: Object.keys(MODEL_TO_COUNT_TOKENS),
            }
        ];
    }
    /**
     * @param {string} input
     * @param {Object[]} args
     * @returns {string}
     */
    async run(input, args) {
        if (!input) return "";
        // const [model] = args;
        // // Use the mapping, fallback to cl100k_base if not found
        // const encoding = MODEL_TO_ENCODING[model] || cl100k_base;
        // const tokenCount = encoding.;
        // return tokenCount.toString();
        const [model] = args;
        let countTokensFn;
        if (MODEL_TO_COUNT_TOKENS[model]) {
            countTokensFn = await MODEL_TO_COUNT_TOKENS[model]();
        } else {
            // fallback to default (gpt-3.5-turbo encoding)
            countTokensFn = (await import("gpt-tokenizer/model/gpt-3.5-turbo")).countTokens;
        }
        const tokenCount = countTokensFn(input);
        return tokenCount.toString();
    }
 }
 export default CountAITokens;