diff --git a/packages/bindx-editor/src/plugins/behaviour/paste/HtmlDeserializer.ts b/packages/bindx-editor/src/plugins/behaviour/paste/HtmlDeserializer.ts index 3a4a2172..f403cf67 100644 --- a/packages/bindx-editor/src/plugins/behaviour/paste/HtmlDeserializer.ts +++ b/packages/bindx-editor/src/plugins/behaviour/paste/HtmlDeserializer.ts @@ -103,7 +103,19 @@ export class HtmlDeserializer { deserializeBlocks(list: Node[], cumulativeTextAttrs: TextAttrs): Descendant[] { const result = this.processNodeListPaste(list, cumulativeTextAttrs) - return result?.texts ?? result?.elements ?? [] + if (result === null) { + return [] + } + // `deserializeBlocks` is the block-content boundary — it is called both at the top level + // (from `withPaste`) and for a block's children (from each block plugin's `next`). Interior + // whitespace has already been collapsed per text node, but a run assembled from + // pretty-printed / indented source HTML still carries a leading/trailing space at the block + // edge (e.g. `
\n\t\tSome text\n
` → `" Some text "`). Trim those edges so pasting does + // not prepend or append a stray space, matching how a browser renders `white-space: normal`. + if (result.texts !== undefined) { + return trimBlockEdgeWhitespace(result.texts) + } + return result.elements } private deserializeTextNode(node: Node, cumulativeTextAttrs: TextAttrs): Descendant[] | null { @@ -166,3 +178,24 @@ export class HtmlDeserializer { return null } } + +// Trims leading whitespace from the first text leaf and trailing whitespace from the last text leaf +// of a block's inline content, descending through inline wrappers (e.g. anchors) to reach the edge leaf. +const trimBlockEdgeWhitespace = (nodes: Descendant[]): Descendant[] => { + const trimmedStart = mapEdgeTextLeaf(nodes, 'start', text => text.replace(/^\s+/, '')) + return mapEdgeTextLeaf(trimmedStart, 'end', text => text.replace(/\s+$/, '')) +} + +const mapEdgeTextLeaf = (nodes: Descendant[], edge: 'start' | 'end', map: (text: string) => string): Descendant[] => { + const index = edge === 'start' ? 0 : nodes.length - 1 + const node = nodes[index] + if (node === undefined) { + return nodes + } + const mapped: Descendant = SlateText.isText(node) + ? { ...node, text: map(node.text) } + : { ...node, children: mapEdgeTextLeaf(node.children, edge, map) } + const copy = nodes.slice() + copy[index] = mapped + return copy +} diff --git a/tests/unit/editor/repro-paste-leading-space.test.ts b/tests/unit/editor/repro-paste-leading-space.test.ts new file mode 100644 index 00000000..32c550ab --- /dev/null +++ b/tests/unit/editor/repro-paste-leading-space.test.ts @@ -0,0 +1,42 @@ +// Regression test for https://github.com/contember/bindx/issues/60 +import { describe, expect, test } from 'bun:test' +import { HtmlDeserializer } from '@contember/bindx-editor' +import type { Descendant } from 'slate' + +// The paste pipeline (withPaste) builds an HtmlDeserializer with a default-element +// factory and no extra plugins, then feeds it the parsed clipboard HTML via +// `deserializeBlocks`. We exercise that same path directly. +const createDeserializer = () => + new HtmlDeserializer((children: Descendant[]) => ({ type: 'paragraph', children }) as any, []) + +const deserializeHtml = (html: string): Descendant[] => { + const doc = new DOMParser().parseFromString(html, 'text/html') + return createDeserializer().deserializeBlocks(Array.from(doc.body.childNodes), {}) +} + +// Collect every text leaf's string, in document order. +const gatherText = (nodes: Descendant[]): string => + nodes + .map(node => ('text' in node ? (node.text as string) : gatherText((node as any).children ?? []))) + .join('') + +describe('HtmlDeserializer paste whitespace', () => { + test('should not prepend a leading space when pasting indented block HTML', () => { + // Copying from any pretty-printed / indented HTML source yields text nodes + // like "\n\t\tSome text\n". The deserializer collapses the leading newline + + // indentation, but must not leave it as a stray leading space at the block edge. + const result = deserializeHtml('\n\t\tSome text\n
') + const text = gatherText(result) + + expect(text.startsWith(' ')).toBe(false) + expect(text.endsWith(' ')).toBe(false) + expect(text).toBe('Some text') + }) + + test('should still collapse internal whitespace runs to a single space', () => { + // Trimming block edges must not break the CSS `white-space: normal` collapsing + // of interior whitespace between words. + const result = deserializeHtml('\n\tfoo\n\t\tbar\n
') + expect(gatherText(result)).toBe('foo bar') + }) +})