feat(new tool): HAR file sanitizer

Fix #811
This commit is contained in:
sharevb 2025-03-09 21:09:30 +01:00 committed by ShareVB
parent 08d977b8cd
commit c5c4e5d026
14 changed files with 7042 additions and 8486 deletions

View file

@ -0,0 +1,150 @@
<script setup lang="ts">
import { downloadFile } from './lib/downloadFile';
import { defaultScrubItems, getHarInfo, sanitize } from './lib/har_sanitize';
type ScrubState = Record<ScrubType, Record<string, boolean>>;
type ScrubType =
| 'cookies'
| 'headers'
| 'queryArgs'
| 'postParams'
| 'mimeTypes';
const typeMap: Record<ScrubType, string> = {
cookies: 'Cookies',
mimeTypes: 'Mime Types',
headers: 'Headers',
postParams: 'Post Body Params',
queryArgs: 'Query String Parameters',
};
const defaulScrubState: ScrubState = {
cookies: {},
headers: {},
queryArgs: {},
postParams: {},
mimeTypes: {},
};
const scrubItemsToClean = ref<ScrubState>(defaulScrubState);
function getScrubableItems(input: string): ScrubState {
const rawItems = getHarInfo(input);
const output = { ...defaulScrubState };
Object.entries(rawItems).forEach(([key, items]: [string, string[]]) => {
output[key as ScrubType] = items.reduce(
(acc, curr) => {
if (!curr) {
return acc;
}
acc[curr] = defaultScrubItems.includes(curr);
return acc;
},
{} as Record<string, boolean>,
);
return null;
});
return output;
}
function sanitizeHar(input: string, scrubItems: ScrubState) {
const words = new Set<string>();
Object.entries(scrubItems.cookies).forEach(([key, val]) => {
if (val) {
words.add(key);
}
});
Object.entries(scrubItems.headers).forEach(([key, val]) => {
if (val) {
words.add(key);
}
});
Object.entries(scrubItems.queryArgs).forEach(([key, val]) => {
if (val) {
words.add(key);
}
});
Object.entries(scrubItems.postParams).forEach(([key, val]) => {
if (val) {
words.add(key);
}
});
const mimeTypes = new Set<string>();
Object.entries(scrubItems.mimeTypes).forEach(([key, val]) => {
if (val) {
mimeTypes.add(key);
}
});
return sanitize(input, {
scrubWords: [...words],
scrubMimetypes: [...mimeTypes],
});
}
const file = ref<File | null>(null);
const error = ref('');
function readAsTextAsync(file: File) {
return new Promise<string>((resolve, reject) => {
const reader = new FileReader();
reader.readAsText(file);
reader.onload = () => resolve(reader.result?.toString() ?? '');
reader.onerror = error => reject(error);
});
}
const harContent = ref('');
async function onFileUploaded(uploadedFile: File) {
file.value = uploadedFile;
harContent.value = await readAsTextAsync(uploadedFile);
error.value = '';
try {
scrubItemsToClean.value = getScrubableItems(harContent.value);
}
catch (e: any) {
error.value = e.toString();
}
}
function processHar() {
downloadFile(sanitizeHar(harContent.value, scrubItemsToClean.value), `sanitized-${file.value?.name}`);
}
</script>
<template>
<div>
<div style="flex: 0 0 100%" mb-3>
<div mx-auto max-w-600px>
<c-file-upload
title="Drag and drop a HAR file here, or click to select a file"
accept=".har" @file-upload="onFileUploaded"
/>
</div>
</div>
<c-alert v-if="error" title="Error">
{{ error }}
</c-alert>
<div v-for="(title, key) in typeMap" :key="key" mb-1>
<c-card v-if="Object.keys(scrubItemsToClean[key]).length" :title="title">
<n-checkbox font-size-5 @update:checked="(allChecked: boolean) => Object.keys(scrubItemsToClean[key]).forEach((name) => scrubItemsToClean[key][name] = allChecked)">
All {{ title }}
</n-checkbox>
<n-space size="large">
<n-checkbox v-for="(checked, name) in scrubItemsToClean[key]" :key="name" v-model:checked="scrubItemsToClean[key][name]" style="width: 150px">
{{ name }}
</n-checkbox>
</n-space>
</c-card>
</div>
<div v-if="!error" mt-3 flex justify-center>
<c-button @click="processHar()">
Sanitize and download
</c-button>
</div>
</div>
</template>

View file

@ -0,0 +1,12 @@
import { ClearFormatting } from '@vicons/tabler';
import { defineTool } from '../tool';
export const tool = defineTool({
name: 'HAR Sanitizer',
path: '/har-sanitizer',
description: 'HAR Files Sanitizer',
keywords: ['har', 'sanitizer'],
component: () => import('./har-sanitizer.vue'),
icon: ClearFormatting,
createdAt: new Date('2024-06-17'),
});

View file

@ -0,0 +1,19 @@
export function downloadFile(harOutput: string, name: string) {
const blob = new Blob([harOutput], { type: 'application/json' });
// Create a URL for the Blob
const url = URL.createObjectURL(blob);
// Create an anchor element to trigger the download
const a = document.createElement('a');
a.href = url;
// Set file name
a.download = name;
a.style.display = 'none';
document.body.appendChild(a);
a.click();
// Clean up by removing the anchor and revoking the URL
document.body.removeChild(a);
URL.revokeObjectURL(url);
}

View file

@ -0,0 +1,237 @@
/* eslint-disable prefer-regex-literals */
import type { Cookie, Har, Header, Param, QueryString } from 'har-format';
export interface PossibleScrubItems {
headers: string[]
cookies: string[]
queryArgs: string[]
postParams: string[]
mimeTypes: string[]
}
const defaultMimeTypesList = ['application/javascript', 'text/javascript'];
const defaultWordList = [
'Authorization',
'SAMLRequest',
'SAMLResponse',
'access_token',
'appID',
'assertion',
'auth',
'authenticity_token',
'challenge',
'client_id',
'client_secret',
'code',
'code_challenge',
'code_verifier',
'email',
'facetID',
'fcParams',
'id_token',
'password',
'refresh_token',
'serverData',
'shdf',
'state',
'token',
'usg',
'vses2',
'x-client-data',
];
export const defaultScrubItems = [...defaultMimeTypesList, ...defaultWordList];
// The default list of regexes that aren't word dependent
// Uses double list so it matches format of word regex
const defaultRegex = [
[
// Redact signature on JWTs
{
regex: new RegExp(
'\\b(ey[A-Za-z0-9-_=]+)\\.(ey[A-Za-z0-9-_=]+)\\.[A-Za-z0-9-_.+/=]+\\b',
'g',
),
replacement: '$1.$2.redacted',
},
],
];
function buildRegex(word: string) {
return [
{
// [full word]=[capture]
regex: new RegExp(
`([\\s";,&?]+${word}=)([\\w+-_/=#|.%&:!*()\`~'"]+?)(&|\\\\",|",|"\\s|"}}|;){1}`,
'g',
),
replacement: `$1[${word} redacted]$3`,
},
// Set up this way in case "value" isn't directly after "name"
// {
// "name": "[word]",
// "something": "not wanted",
// "value": "[capture]"
// }
{
regex: new RegExp(
`("name": "${word}",[\\s\\w+:"-\\%!*()\`~'.,#]*?"value": ")((?:\\\\"|[^"])*?)(")`,
'g',
),
replacement: `$1[${word} redacted]$3`,
},
// "name" comes after "value"
// {
// "value": "[capture]",
// "something": "not wanted",
// "name": "[word]"
// }
{
regex: new RegExp(
`("value": ")([\\w+-_:&+=#$~/()\\\\.\\,*!|%"\\s;]+)("[,\\s}}]+)([\\s\\w+:"-\\\\%!*\`()~'#.]*"name": "${word}")`,
'g',
),
replacement: `$1[${word} redacted]$3$4`,
},
];
}
function removeContentForMimeTypes(input: string, scrubList: string[]) {
const harJSON = JSON.parse(input);
const entries = harJSON.log.entries;
if (!entries) {
throw new Error('failed to find entries in HAR file');
}
for (const entry of entries) {
const response = entry.response;
if (response && scrubList.includes(response.content.mimeType)) {
response.content.text = `[${response.content.mimeType} redacted]`;
}
}
return JSON.stringify(harJSON, null, 2);
}
export function getHarInfo(input: string): PossibleScrubItems {
const output = {
headers: new Set<string>(),
queryArgs: new Set<string>(),
cookies: new Set<string>(),
postParams: new Set<string>(),
mimeTypes: new Set<string>(),
};
const harJSON: Har = JSON.parse(input);
const entries = harJSON.log.entries;
if (!entries) {
throw new Error('failed to find entries in HAR file');
}
for (const entry of entries) {
const response = entry.response;
response.headers.map((header: Header) => output.headers.add(header.name));
response.cookies.map((cookie: Cookie) => output.cookies.add(cookie.name));
output.mimeTypes.add(response.content.mimeType);
const request = entry.request;
request.headers.map((header: Header) => output.headers.add(header.name));
request.queryString.map((arg: QueryString) =>
output.queryArgs.add(arg.name),
);
request.cookies.map((cookie: Cookie) => output.cookies.add(cookie.name));
if (request.postData) {
request.postData.params?.map((param: Param) =>
output.postParams.add(param.name),
);
}
}
return {
headers: [...output.headers].sort((a, b) => a.localeCompare(b)),
queryArgs: [...output.queryArgs].sort((a, b) => a.localeCompare(b)),
cookies: [...output.cookies].sort((a, b) => a.localeCompare(b)),
postParams: [...output.postParams].sort((a, b) => a.localeCompare(b)),
mimeTypes: [...output.mimeTypes].sort((a, b) => a.localeCompare(b)),
};
}
function getScrubMimeTypes(
options?: SanitizeOptions,
possibleScrubItems?: PossibleScrubItems,
) {
if (options?.allMimeTypes && !!possibleScrubItems) {
return possibleScrubItems.mimeTypes;
}
return options?.scrubMimetypes || defaultMimeTypesList;
}
function getScrubWords(
options?: SanitizeOptions,
possibleScrubItems?: PossibleScrubItems,
) {
let scrubWords = options?.scrubWords || [];
if (options?.allCookies && !!possibleScrubItems) {
scrubWords = scrubWords.concat(possibleScrubItems.cookies);
}
if (options?.allHeaders && !!possibleScrubItems) {
scrubWords = scrubWords.concat(possibleScrubItems.headers);
}
if (options?.allQueryArgs && !!possibleScrubItems) {
scrubWords = scrubWords.concat(possibleScrubItems.queryArgs);
}
if (options?.allPostParams && !!possibleScrubItems) {
scrubWords = scrubWords.concat(possibleScrubItems.postParams);
}
return scrubWords || defaultScrubItems;
}
interface SanitizeOptions {
scrubWords?: string[]
scrubMimetypes?: string[]
allCookies?: boolean
allHeaders?: boolean
allQueryArgs?: boolean
allMimeTypes?: boolean
allPostParams?: boolean
}
export function sanitize(input: string, options?: SanitizeOptions) {
let possibleScrubItems: PossibleScrubItems | undefined;
if (
options?.allCookies
|| options?.allHeaders
|| options?.allMimeTypes
|| options?.allQueryArgs
|| options?.allPostParams
) {
// we have to parse the HAR to get the full list of things we could scrub
possibleScrubItems = getHarInfo(input);
}
// Remove specific mime responses first
input = removeContentForMimeTypes(
input,
getScrubMimeTypes(options, possibleScrubItems),
);
// trim the list of words we are looking for down to the ones actually in the HAR file
const wordList = getScrubWords(options, possibleScrubItems).filter(val =>
input.includes(val),
);
// build list of regexes needed to actually scrub the file
const wordSpecificScrubList = wordList.map(word => buildRegex(word));
const allScrubList = defaultRegex.concat(wordSpecificScrubList);
for (const scrubList of allScrubList) {
for (const scrub of scrubList) {
input = input.replace(scrub.regex, scrub.replacement);
}
}
return input;
}

View file

@ -0,0 +1,18 @@
export type HashMap = Record<string, string[]>;
export function toHashString(obj: HashMap): string {
const params = new URLSearchParams();
Object.entries(obj).forEach(([key, stringArr]) => {
stringArr.forEach((value) => {
params.append(key, value);
});
});
return `#${params.toString()}`;
}
export function getHashMap(hash: string): HashMap {
const existingParams = new URLSearchParams(hash.replace('#', ''));
return Object.fromEntries(
[...existingParams.keys()].map(key => [key, existingParams.getAll(key)]),
);
}

View file

@ -12,6 +12,7 @@ import { tool as jsonToXml } from './json-to-xml';
import { tool as regexTester } from './regex-tester';
import { tool as regexMemo } from './regex-memo';
import { tool as markdownToHtml } from './markdown-to-html';
import { tool as harSanitizer } from './har-sanitizer';
import { tool as pdfSignatureChecker } from './pdf-signature-checker';
import { tool as numeronymGenerator } from './numeronym-generator';
import { tool as macAddressGenerator } from './mac-address-generator';
@ -164,7 +165,15 @@ export const toolsByCategory: ToolCategory[] = [
},
{
name: 'Network',
components: [ipv4SubnetCalculator, ipv4AddressConverter, ipv4RangeExpander, macAddressLookup, macAddressGenerator, ipv6UlaGenerator],
components: [
ipv4SubnetCalculator,
ipv4AddressConverter,
ipv4RangeExpander,
macAddressLookup,
macAddressGenerator,
ipv6UlaGenerator,
harSanitizer,
],
},
{
name: 'Math',