refac: enhanced response content sanitisation

'<' and '>' can be correctly displayed now
This commit is contained in:
Timothy J. Baek 2024-08-15 00:08:15 +02:00
parent c8badfe21f
commit 5a6ece9513
7 changed files with 39 additions and 43 deletions

7
package-lock.json generated
View File

@ -18,6 +18,7 @@
"codemirror": "^6.0.1",
"crc-32": "^1.2.2",
"dayjs": "^1.11.10",
"dompurify": "^3.1.6",
"eventsource-parser": "^1.1.2",
"file-saver": "^2.0.5",
"fuse.js": "^7.0.0",
@ -3918,9 +3919,9 @@
}
},
"node_modules/dompurify": {
"version": "3.1.5",
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.5.tgz",
"integrity": "sha512-lwG+n5h8QNpxtyrJW/gJWckL+1/DQiYMX8f7t8Z2AZTPw1esVrqjI63i7Zc2Gz0aKzLVMYC1V1PL/ky+aY/NgA=="
"version": "3.1.6",
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.1.6.tgz",
"integrity": "sha512-cTOAhc36AalkjtBpfG6O8JimdTMWNXjiePT2xQH/ppBGi/4uIpmj8eKyIkMJErXWARyINV/sB38yf8JCLF5pbQ=="
},
"node_modules/domutils": {
"version": "3.1.0",

View File

@ -59,6 +59,7 @@
"codemirror": "^6.0.1",
"crc-32": "^1.2.2",
"dayjs": "^1.11.10",
"dompurify": "^3.1.6",
"eventsource-parser": "^1.1.2",
"file-saver": "^2.0.5",
"fuse.js": "^7.0.0",

View File

@ -261,7 +261,7 @@ __builtins__.input = input`);
<div
class="flex justify-between bg-[#202123] text-white text-xs px-4 pt-1 pb-0.5 rounded-t-lg overflow-x-auto"
>
<div class="p-1">{@html lang}</div>
<div class="p-1">{lang}</div>
<div class="flex items-center">
{#if lang.toLowerCase() === 'python' || lang.toLowerCase() === 'py' || (lang === '' && checkPythonCode(code))}

View File

@ -1,4 +1,5 @@
<script lang="ts">
import DOMPurify from 'dompurify';
import type { Token } from 'marked';
import { revertSanitizedResponseContent, unescapeHtml } from '$lib/utils';
import { onMount } from 'svelte';
@ -14,7 +15,12 @@
{#if token.type === 'escape'}
{unescapeHtml(token.text)}
{:else if token.type === 'html'}
{@html token.text}
{@const html = DOMPurify.sanitize(token.text)}
{#if html}
{@html html}
{:else}
{token.text}
{/if}
{:else if token.type === 'link'}
<a href={token.href} target="_blank" rel="nofollow" title={token.title}>{token.text}</a>
{:else if token.type === 'image'}

View File

@ -1,4 +1,5 @@
<script lang="ts">
import DOMPurify from 'dompurify';
import { onMount } from 'svelte';
import type { Token } from 'marked';
import { revertSanitizedResponseContent, unescapeHtml } from '$lib/utils';
@ -91,7 +92,12 @@
</ul>
{/if}
{:else if token.type === 'html'}
{@html token.text}
{@const html = DOMPurify.sanitize(token.text)}
{#if html}
{@html html}
{:else}
{token.text}
{/if}
{:else if token.type === 'paragraph'}
<p>
<MarkdownInlineTokens id={`${id}-${tokenIdx}-p`} tokens={token.tokens ?? []} />

View File

@ -18,8 +18,7 @@
approximateToHumanReadable,
extractSentences,
replaceTokens,
revertSanitizedResponseContent,
sanitizeResponseContent
processResponseContent
} from '$lib/utils';
import { WEBUI_BASE_URL } from '$lib/constants';
@ -88,7 +87,7 @@
$: (async () => {
if (message?.content) {
tokens = marked.lexer(
replaceTokens(sanitizeResponseContent(message?.content), model?.name, $user?.name)
replaceTokens(processResponseContent(message?.content), model?.name, $user?.name)
);
}
})();

View File

@ -23,39 +23,6 @@ const convertLatexToSingleLine = (content) => {
return content;
};
export const sanitizeResponseContent = (content: string) => {
// replace single backslash with double backslash
content = content.replace(/\\\\/g, '\\\\\\\\');
content = convertLatexToSingleLine(content);
// First, temporarily replace valid <video> tags with a placeholder
const videoTagRegex = /<video\s+src="([^"]+)"\s+controls><\/video>/gi;
const placeholders: string[] = [];
content = content.replace(videoTagRegex, (_, src) => {
const placeholder = `{{VIDEO_${placeholders.length}}}`;
placeholders.push(`<video src="${src}" controls></video>`);
return placeholder;
});
// Now apply the sanitization to the rest of the content
content = content
.replace(/<\|[a-z]*$/, '')
.replace(/<\|[a-z]+\|$/, '')
.replace(/<$/, '')
.replaceAll(/<\|[a-z]+\|>/g, ' ')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.trim();
// Replace placeholders with original <video> tags
placeholders.forEach((placeholder, index) => {
content = content.replace(`{{VIDEO_${index}}}`, placeholder);
});
return content.trim();
};
export const replaceTokens = (content, char, user) => {
const charToken = /{{char}}/gi;
const userToken = /{{user}}/gi;
@ -87,8 +54,24 @@ export const replaceTokens = (content, char, user) => {
return content;
};
export const sanitizeResponseContent = (content: string) => {
return content
.replace(/<\|[a-z]*$/, '')
.replace(/<\|[a-z]+\|$/, '')
.replace(/<$/, '')
.replaceAll(/<\|[a-z]+\|>/g, ' ')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.trim();
};
export const processResponseContent = (content: string) => {
content = convertLatexToSingleLine(content);
return content.trim();
};
export const revertSanitizedResponseContent = (content: string) => {
return content.replaceAll('&lt;', '<').replaceAll('&gt;', '>').replaceAll('\\\\', '\\');
return content.replaceAll('&lt;', '<').replaceAll('&gt;', '>');
};
export function unescapeHtml(html: string) {