From 901498750df2d85e0081190cae87a19a64ce0c13 Mon Sep 17 00:00:00 2001 From: sunyiteng Date: Thu, 21 May 2026 17:59:01 +0800 Subject: [PATCH] fix: separate block markdown children --- src/react/render-spacing.test.tsx | 188 ++++++++++++++++++++++++++++++ src/react/render.test.tsx | 15 --- src/react/render.ts | 186 +++++++++++++++++++++++------ 3 files changed, 341 insertions(+), 48 deletions(-) create mode 100644 src/react/render-spacing.test.tsx diff --git a/src/react/render-spacing.test.tsx b/src/react/render-spacing.test.tsx new file mode 100644 index 0000000..cb95cde --- /dev/null +++ b/src/react/render-spacing.test.tsx @@ -0,0 +1,188 @@ +import { describe, expect, it } from 'vitest'; +import { renderToMarkdownString } from './render'; + +describe('renderToMarkdownString - child boundary spacing', () => { + it('separates adjacent transparent block wrappers', async () => { + expect( + await renderToMarkdownString( + <> +
Row 1
+
Row 2
+ , + ), + ).toMatchInlineSnapshot(` + "Row 1 + + Row 2" + `); + }); + + it('separates nested transparent block wrappers', async () => { + expect( + await renderToMarkdownString( +
+
+
Intro
+
+
+
Details
+
+
, + ), + ).toMatchInlineSnapshot(` + "Intro + + Details" + `); + }); + + it('separates text from adjacent block wrappers in flow containers', async () => { + expect( + await renderToMarkdownString( + <> + Lead +
Section
+ + Tail + , + ), + ).toMatchInlineSnapshot(` + "Lead + + Section + + Aside + + Tail" + `); + }); + + it('separates blocks even when the previous text ends with a space', async () => { + expect( + await renderToMarkdownString( + <> + {'Lead '} +
Section
+ , + ), + ).toMatchInlineSnapshot(` + "Lead + + Section" + `); + }); + + it('completes block separation when the boundary already has one newline', async () => { + expect( + await renderToMarkdownString( + <> + {'Lead\n'} +
Section
+ , + ), + ).toMatchInlineSnapshot(` + "Lead + + Section" + `); + }); + + it('does not over-separate blocks that already start with one newline', async () => { + expect( + await renderToMarkdownString( + <> + {'Lead'} +
+            {'const value = 1;\n'}
+          
+ , + ), + ).toMatchInlineSnapshot(` + "Lead + + \`\`\`ts + const value = 1; + + \`\`\` + " + `); + }); + + it('does not add duplicate spacing around markdown blocks that already delimit themselves', async () => { + expect( + await renderToMarkdownString( +
+

Title

+

Paragraph

+
Footer
+
, + ), + ).toMatchInlineSnapshot(` + "## Title + + Paragraph + + Footer" + `); + }); + + it('keeps inline siblings and punctuation joined exactly', async () => { + expect( + await renderToMarkdownString( +

+ foo + bar,docs. +

, + ), + ).toMatchInlineSnapshot(` + "**foo**bar,[docs](/docs). + + " + `); + }); + + it('does not insert spaces into comma-separated inline React children', async () => { + expect( + await renderToMarkdownString(

ref-value,memo-value,provided

), + ).toMatchInlineSnapshot(` + "ref-value,memo-value,provided + + " + `); + }); + + it('keeps raw markdown text nodes untouched', async () => { + expect( + await renderToMarkdownString( + <> + {'# Code Example\\n'} + {'\n'} + {'```tsx\nconsole.log("Hello, world!");\n```\n'} + , + ), + ).toMatchInlineSnapshot(` + "# Code Example\\n + \`\`\`tsx + console.log("Hello, world!"); + \`\`\` + " + `); + }); + + it('keeps fenced code block content untouched', async () => { + expect( + await renderToMarkdownString( +
+          {'const values = ["a", "b"];\n'}
+        
, + ), + ).toMatchInlineSnapshot(` + " + \`\`\`ts + const values = ["a", "b"]; + + \`\`\` + " + `); + }); +}); diff --git a/src/react/render.test.tsx b/src/react/render.test.tsx index 21816e8..e0dc7ad 100644 --- a/src/react/render.test.tsx +++ b/src/react/render.test.tsx @@ -264,21 +264,6 @@ describe('renderToMarkdownString - styles', () => { " `); }); - - it('renders two row correctly', async () => { - const Comp1 = () => { - return ( - <> -
Row 1
-
Row 2
- - ); - }; - - expect(await renderToMarkdownString()).toMatchInlineSnapshot( - `"Row 1Row 2"`, - ); - }); }); describe('renderToMarkdownString - effects never execute (SSR behavior)', () => { diff --git a/src/react/render.ts b/src/react/render.ts index 8d50d05..8df9a05 100644 --- a/src/react/render.ts +++ b/src/react/render.ts @@ -31,6 +31,40 @@ let realCurrent: Record | null = null; let cachedTarget: unknown = null; let cachedProxy: unknown = null; +const transparentBlockTypes = new Set([ + 'address', + 'article', + 'aside', + 'details', + 'div', + 'figcaption', + 'figure', + 'footer', + 'form', + 'header', + 'main', + 'nav', + 'section', + 'summary', +]); + +const markdownBlockTypes = new Set([ + 'blockquote', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'hr', + 'li', + 'ol', + 'p', + 'pre', + 'table', + 'ul', +]); + function installEffectInterceptor(): () => void { if (!ReactSharedInternals) { return noop; @@ -108,52 +142,138 @@ function installEffectInterceptor(): () => void { }; } +function isFlowContainer(type: string): boolean { + return type === 'root' || transparentBlockTypes.has(type); +} + +function isBlockBoundary(child: MarkdownNode | TextNode): boolean { + return ( + child instanceof MarkdownNode && + (transparentBlockTypes.has(child.type) || + markdownBlockTypes.has(child.type)) + ); +} + +function startsWithNewline(value: string): boolean { + return value.charCodeAt(0) === 10; +} + +function endsWithNewline(value: string): boolean { + return value.charCodeAt(value.length - 1) === 10; +} + +function hasBlankLineBoundary( + previousMarkdown: string, + markdown: string, +): boolean { + return ( + previousMarkdown.endsWith('\n\n') || + markdown.startsWith('\n\n') || + (endsWithNewline(previousMarkdown) && startsWithNewline(markdown)) + ); +} + +function getBlockSeparator( + previousChild: MarkdownNode | TextNode, + child: MarkdownNode | TextNode, + previousMarkdown: string, + markdown: string, +): string { + if ( + previousMarkdown.length === 0 || + markdown.length === 0 || + (!isBlockBoundary(previousChild) && !isBlockBoundary(child)) || + hasBlankLineBoundary(previousMarkdown, markdown) + ) { + return ''; + } + + if (endsWithNewline(previousMarkdown) || startsWithNewline(markdown)) { + return '\n'; + } + + return '\n\n'; +} + +function childToMarkdown(child: MarkdownNode | TextNode): string { + if (child instanceof TextNode) { + return child.text; + } + return toMarkdown(child); +} + +function renderChildren(root: MarkdownNode): string { + const { children } = root; + + if (children.length === 0) { + return ''; + } + + const shouldSeparateBlocks = isFlowContainer(root.type); + const parts: string[] = []; + let previousChild: MarkdownNode | TextNode | undefined; + let previousMarkdown = ''; + + for (const child of children) { + const markdown = childToMarkdown(child); + const separator = + shouldSeparateBlocks && previousChild + ? getBlockSeparator(previousChild, child, previousMarkdown, markdown) + : ''; + + if (separator) { + parts.push(separator); + } + + parts.push(markdown); + + if (markdown.length > 0) { + previousChild = child; + previousMarkdown = markdown; + } + } + + return parts.join(''); +} + // Convert node tree to Markdown string function toMarkdown(root: MarkdownNode): string { const { type, props, children } = root; - // Get children's Markdown - const childrenMd = () => - children - .map((child) => { - if (child instanceof TextNode) { - return child.text; - } - return toMarkdown(child); - }) - .join(''); + // Get children's Markdown lazily so ignored nodes do not serialize children. + const getChildrenMarkdown = () => renderChildren(root); // Generate corresponding Markdown based on element type switch (type) { case 'root': - return childrenMd(); + return getChildrenMarkdown(); case 'h1': - return `# ${childrenMd()}\n\n`; + return `# ${getChildrenMarkdown()}\n\n`; case 'h2': - return `## ${childrenMd()}\n\n`; + return `## ${getChildrenMarkdown()}\n\n`; case 'h3': - return `### ${childrenMd()}\n\n`; + return `### ${getChildrenMarkdown()}\n\n`; case 'h4': - return `#### ${childrenMd()}\n\n`; + return `#### ${getChildrenMarkdown()}\n\n`; case 'h5': - return `##### ${childrenMd()}\n\n`; + return `##### ${getChildrenMarkdown()}\n\n`; case 'h6': - return `###### ${childrenMd()}\n\n`; + return `###### ${getChildrenMarkdown()}\n\n`; case 'p': - return `${childrenMd()}\n\n`; + return `${getChildrenMarkdown()}\n\n`; case 'strong': case 'b': - return `**${childrenMd()}**`; + return `**${getChildrenMarkdown()}**`; case 'em': case 'i': - return `*${childrenMd()}*`; + return `*${getChildrenMarkdown()}*`; case 'code': // When is nested inside
, it represents the code block body,
       // so we must not wrap it with inline backticks (would create nested fences).
       if (root.parent?.type === 'pre') {
-        return childrenMd();
+        return getChildrenMarkdown();
       }
-      return `\`${childrenMd()}\``;
+      return `\`${getChildrenMarkdown()}\``;
     case 'pre': {
       const _language =
         props['data-lang'] || props.language || props.lang || '';
@@ -164,23 +284,23 @@ function toMarkdown(root: MarkdownNode): string {
         ? '````'
         : '```';
 
-      return `\n${block}${language}${title ? ` title=${title}` : ''}\n${childrenMd()}\n${block}\n`;
+      return `\n${block}${language}${title ? ` title=${title}` : ''}\n${getChildrenMarkdown()}\n${block}\n`;
     }
     case 'a':
-      return `[${childrenMd()}](${props.href || '#'})`;
+      return `[${getChildrenMarkdown()}](${props.href || '#'})`;
     case 'img':
       return `![${props.alt || ''}](${props.src || ''})`;
     case 'ul':
-      return `${childrenMd()}\n`;
+      return `${getChildrenMarkdown()}\n`;
     case 'ol':
-      return `${childrenMd()}\n`;
+      return `${getChildrenMarkdown()}\n`;
     case 'li': {
       const isOrdered = root.parent && root.parent.type === 'ol';
       const prefix = isOrdered ? '1. ' : '- ';
-      return `${prefix}${childrenMd()}\n`;
+      return `${prefix}${getChildrenMarkdown()}\n`;
     }
     case 'blockquote':
-      return `> ${childrenMd().split('\n').join('\n> ')}\n\n`;
+      return `> ${getChildrenMarkdown().split('\n').join('\n> ')}\n\n`;
     case 'br':
       return '\n';
     case 'hr':
@@ -188,11 +308,11 @@ function toMarkdown(root: MarkdownNode): string {
     case 'style':
       return '';
     case 'table':
-      return `${childrenMd()}\n`;
+      return `${getChildrenMarkdown()}\n`;
     case 'thead':
-      return childrenMd();
+      return getChildrenMarkdown();
     case 'tbody':
-      return childrenMd();
+      return getChildrenMarkdown();
     case 'tr': {
       const cells = children
         .filter((child): child is MarkdownNode => child instanceof MarkdownNode)
@@ -208,9 +328,9 @@ function toMarkdown(root: MarkdownNode): string {
     }
     case 'th':
     case 'td':
-      return childrenMd();
+      return getChildrenMarkdown();
     default:
-      return childrenMd();
+      return getChildrenMarkdown();
   }
 }