diff --git a/.changeset/fix-email-htmltotext-sanitization.md b/.changeset/fix-email-htmltotext-sanitization.md new file mode 100644 index 000000000..2d5b85bed --- /dev/null +++ b/.changeset/fix-email-htmltotext-sanitization.md @@ -0,0 +1,19 @@ +--- +'@objectstack/plugin-email': patch +--- + +Harden `htmlToText` against double-escaping and incomplete tag stripping + +Fixes two CodeQL high-severity alerts in `template-engine.ts`: + +- `js/double-escaping`: the order-dependent chain of single-entity + `.replace()` calls could double-unescape (e.g. `&lt;` → `<` → `<`). + Entities are now decoded in a single left-to-right pass via one alternation + regex, so each entity decodes exactly once. +- `js/incomplete-multi-character-sanitization`: the single `<[^>]+>` strip + could leave a live tag behind on crafted/overlapping input + (e.g. `ipt>`). Tag stripping now loops until the string is + stable, and runs before entity decoding so decoding cannot re-introduce a + tag. + +Adds adversarial unit tests covering nested entities and overlapping tags. diff --git a/packages/plugins/plugin-email/src/template-engine.test.ts b/packages/plugins/plugin-email/src/template-engine.test.ts index 2475b5386..645fabc36 100644 --- a/packages/plugins/plugin-email/src/template-engine.test.ts +++ b/packages/plugins/plugin-email/src/template-engine.test.ts @@ -72,5 +72,38 @@ describe('template-engine', () => { it('collapses 3+ newlines to 2', () => { expect(htmlToText('

a

b

')).toBe('a\nb'); }); + + describe('adversarial sanitization', () => { + it('does not double-unescape entities', () => { + // &lt; must decode ONCE to the literal text "<", never to "<". + const out = htmlToText('&lt;script&gt;'); + expect(out).toBe('<script>'); + expect(out).not.toContain('<'); + expect(out).not.toContain('>'); + }); + + it('decodes single-escaped entities exactly once', () => { + // Sanity counterpart: single-escaped sequences still decode normally. + expect(htmlToText('a && b')).toBe('a && b'); + }); + + it('strips overlapping/nested tags so no tag survives', () => { + const out = htmlToText('ipt>alert(1)'); + expect(out).not.toContain('<'); + expect(out.toLowerCase()).not.toContain(' { + const out = htmlToText('<