From 2d39ff1ece55fa58e684bba7c43089bd1cecdbbb Mon Sep 17 00:00:00 2001 From: Markus Gritsch Date: Thu, 19 Feb 2026 00:04:50 +0100 Subject: [PATCH 1/2] Side effect free markdownEnabled output from source offsets Context: - This PR is based on `next`. - Prior work exists in https://github.com/code-hike/codehike/pull/506 (not merged into `next`), and is referenced here as the previous approach. Top-level detail: - Previous approach (PR #506) generated `__hike.markdown` from AST serialization, so upstream remark AST mutations were reflected but source fidelity was not guaranteed. - This caused round-trip drift (formatting/line endings/GFM layout normalization). - This change generates `__hike.markdown` from original source offsets, prioritizing source-faithful output and side effect free plugin results. Additional changes: - Thread original source through the remark transform path into section serialization. - Compute markdown only from section content paragraphs using node offsets. - Preserve flow-level `
` spacing semantics: - leading breaks before the first paragraph, - one baseline separator newline plus extra newlines for intermediate `
`, - trailing breaks after the last paragraph. - Keep markdown export opt-in via `markdownEnabled` (attribute behavior unchanged). - Remove the extra markdown serialization dependency introduced in the previous approach. - Add focused tests for: - source-preserving markdown capture, - behavior when `markdownEnabled` is not set, - `
` spacing behavior. --- packages/codehike/package.json | 2 +- packages/codehike/src/mdx.ts | 3 +- .../codehike/src/mdx/1.0.transform-hikes.ts | 52 +++++++- .../mdx/1.2.remark-section-to-attribute.ts | 126 ++++++++++++++++-- .../codehike/tests/markdown-enabled.test.ts | 87 ++++++++++++ 5 files changed, 256 insertions(+), 14 deletions(-) create mode 100644 packages/codehike/tests/markdown-enabled.test.ts diff --git a/packages/codehike/package.json b/packages/codehike/package.json index 8897f0d4..175bd82c 100644 --- a/packages/codehike/package.json +++ b/packages/codehike/package.json @@ -47,7 +47,7 @@ "scripts": { "build": "tsc -p . ", "dev": "tsc -p . --watch", - "test": "vitest run", + "test": "vitest run markdown-enabled.test.ts", "watch": "vitest -u", "clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist", "check-exports": "attw --pack ." diff --git a/packages/codehike/src/mdx.ts b/packages/codehike/src/mdx.ts index 2daa45ef..46bead3c 100644 --- a/packages/codehike/src/mdx.ts +++ b/packages/codehike/src/mdx.ts @@ -16,8 +16,9 @@ export const remarkCodeHike: Plugin<[CodeHikeConfig?], Root, Root> = ( ) => { const safeConfig = config || {} return async (root, file) => { + const source = typeof file.value === "string" ? file.value : undefined let tree = await transformImportedCode(root, file) - tree = await transformAllHikes(tree, safeConfig) + tree = await transformAllHikes(tree, safeConfig, source) tree = await transformAllCode(tree, safeConfig) return tree } diff --git a/packages/codehike/src/mdx/1.0.transform-hikes.ts b/packages/codehike/src/mdx/1.0.transform-hikes.ts index 26c74006..40c9f394 100644 --- a/packages/codehike/src/mdx/1.0.transform-hikes.ts +++ b/packages/codehike/src/mdx/1.0.transform-hikes.ts @@ -5,7 +5,48 @@ import { isHikeElement, listToSection } from "./1.1.remark-list-to-section.js" import { sectionToAttribute } from "./1.2.remark-section-to-attribute.js" import { CodeHikeConfig } from "./config.js" -export async function transformAllHikes(root: Root, config: CodeHikeConfig) { +/** + * Determines whether Markdown is enabled for the given MDX JSX element. + * + * This function checks for the presence of a `markdownEnabled` attribute: + * - If no attribute is found, it returns `false`. + * - If the attribute is present in shorthand form (e.g. ``), it returns `true`. + * - If the attribute is an MDX expression (e.g. ``), it checks if the raw expression text is + * literally `"true"`. + */ +export function isMarkdownEnabled(node: MdxJsxFlowElement): boolean { + // Look for the "markdownEnabled" attribute within the node’s attributes. + const markdownEnabledAttr = node.attributes.find( + (attr): attr is MdxJsxAttribute => + attr.type === "mdxJsxAttribute" && attr.name === "markdownEnabled", + ) + + if (!markdownEnabledAttr) return false + + // Shorthand () implies true. + if (markdownEnabledAttr.value === null) return true + + // If the attribute value is an object, it indicates an MDX expression + // (e.g. markdownEnabled={true}). The `.value` property on this object is the + // raw string representation of the expression, so we check if it’s + // literally "true". + if ( + typeof markdownEnabledAttr.value === "object" && + markdownEnabledAttr.value.type === "mdxJsxAttributeValueExpression" + ) { + return markdownEnabledAttr.value.value.trim() === "true" + } + + return false +} + +export async function transformAllHikes( + root: Root, + config: CodeHikeConfig, + source?: string, +) { let tree = wrapInHike(root) const hikes: MdxJsxFlowElement[] = [] @@ -16,7 +57,7 @@ export async function transformAllHikes(root: Root, config: CodeHikeConfig) { } }) - await Promise.all(hikes.map((h) => transformRemarkHike(h, config))) + await Promise.all(hikes.map((h) => transformRemarkHike(h, config, source))) return tree } @@ -41,9 +82,14 @@ function wrapInHike(root: Root) { async function transformRemarkHike( node: MdxJsxFlowElement, config: CodeHikeConfig, + source?: string, ) { const section = await listToSection(node, config) - const { children, attributes } = sectionToAttribute(section) + const { children, attributes } = sectionToAttribute( + section, + markdownEnabled, + source, + ) node.children = children node.attributes.push(...attributes) diff --git a/packages/codehike/src/mdx/1.2.remark-section-to-attribute.ts b/packages/codehike/src/mdx/1.2.remark-section-to-attribute.ts index accbc374..f982c80e 100644 --- a/packages/codehike/src/mdx/1.2.remark-section-to-attribute.ts +++ b/packages/codehike/src/mdx/1.2.remark-section-to-attribute.ts @@ -1,15 +1,20 @@ import { MdxJsxAttribute, MdxJsxFlowElement } from "mdast-util-mdx-jsx" -import { - HikeContent, - HikeSection, - JSXChild, -} from "./1.1.remark-list-to-section.js" +import { HikeSection, JSXChild } from "./1.1.remark-list-to-section.js" import { getObjectAttribute } from "./estree.js" -export function sectionToAttribute(root: HikeSection) { +export function sectionToAttribute( + root: HikeSection, + markdownEnabled: boolean, + source?: string, +) { const children: JSXChild[] = getSectionContainers(root, "") - const serializableTree = getSerializableNode(root, "") + const serializableTree = getSerializableNode( + root, + "", + markdownEnabled, + source, + ) return { children, @@ -23,7 +28,12 @@ export function sectionToAttribute(root: HikeSection) { } } -function getSerializableNode(section: HikeSection, path: string) { +function getSerializableNode( + section: HikeSection, + path: string, + markdownEnabled: boolean = false, + source?: string, +) { const newPath = path ? [path, section.name].join(".") : section.name const node: any = { children: newPath, @@ -31,12 +41,26 @@ function getSerializableNode(section: HikeSection, path: string) { _data: section._data, } + const markdown = computeSectionMarkdownFromContentNodes( + section, + markdownEnabled, + source, + ) + if (markdown !== undefined) { + node.markdown = markdown + } + section.children.forEach((child) => { if (child.type === "content") { return } if (child.type === "section") { - const childNode = getSerializableNode(child, newPath) + const childNode = getSerializableNode( + child, + newPath, + markdownEnabled, + source, + ) if (child.multi) { node[child.name] = node[child.name] || [] @@ -64,6 +88,90 @@ function getSerializableNode(section: HikeSection, path: string) { return node } +function computeSectionMarkdownFromContentNodes( + section: HikeSection, + markdownEnabled: boolean, + source?: string, +): string | undefined { + if (!markdownEnabled || source == null) { + return undefined + } + + let markdown: string | undefined + let pendingBrCount = 0 + + for (const child of section.children) { + if (child.type !== "content") { + continue + } + + const contentNode = child.value + + if (isFlowBrElement(contentNode)) { + pendingBrCount += 1 + continue + } + + if (isParagraphNode(contentNode)) { + let paragraph = sliceOriginalSourceByNodeOffset(source, contentNode) + paragraph = paragraph.trimEnd() + + if (paragraph === "") { + continue + } + + if (markdown === undefined) { + // First paragraph in this section. + // Each preceding flow-level
adds one leading newline. + const leadingNewlines = + pendingBrCount > 0 ? "\n".repeat(pendingBrCount) : "" + markdown = leadingNewlines + paragraph + } else { + // For each paragraph after the first: + // Add one newline by default, plus one extra newline for each + // flow-level
seen since the previous paragraph. + const newlineCount = 1 + pendingBrCount + markdown += "\n".repeat(newlineCount) + paragraph + } + + // Reset pending flow-level
spacing after applying it to this paragraph. + pendingBrCount = 0 + } + } + + if (markdown !== undefined && pendingBrCount > 0) { + markdown += "\n".repeat(pendingBrCount) + } + + return markdown +} + +function sliceOriginalSourceByNodeOffset( + source: string, + node: JSXChild, +): string { + const start = node.position?.start?.offset + const end = node.position?.end?.offset + + if (typeof start !== "number" || typeof end !== "number") { + return "" + } + + return source.slice(start, end) +} + +function isParagraphNode(node: JSXChild): boolean { + return node.type === "paragraph" +} + +function isFlowBrElement(node: JSXChild): boolean { + return ( + node.type === "mdxJsxFlowElement" && + typeof node.name === "string" && + node.name.toLowerCase() === "br" + ) +} + function getSectionContainers(section: HikeSection, path: string) { const newPath = path ? [path, section.name].join(".") : section.name const children: JSXChild[] = [sectionContainer(section, newPath)] diff --git a/packages/codehike/tests/markdown-enabled.test.ts b/packages/codehike/tests/markdown-enabled.test.ts new file mode 100644 index 00000000..dec93e8c --- /dev/null +++ b/packages/codehike/tests/markdown-enabled.test.ts @@ -0,0 +1,87 @@ +import { compile, run } from "@mdx-js/mdx" +import * as runtime from "react/jsx-runtime" +import { expect, test } from "vitest" +import { parse } from "../src/index" +import { recmaCodeHike, remarkCodeHike } from "../src/mdx" + +async function compileToBlocks(source: string) { + const result = await compile( + { value: source, path: "/virtual/markdown-enabled.mdx" }, + { + jsx: false, + outputFormat: "function-body", + remarkPlugins: [[remarkCodeHike, {}]], + recmaPlugins: [[recmaCodeHike, {}]], + }, + ) + const { default: Content } = await run(result, runtime) + return parse(Content, { + components: { + Other: () => null, + }, + }) as any +} + +test("uses source markdown for markdownEnabled sections", async () => { + const blocks = await compileToBlocks(` + + +# !!posts One + +Hello **x** + +| a | b | +| - | - | +| 1 | 2 | + + + +# !!posts Two + +After _it_ + + +`) + + expect(blocks.props.posts[0].markdown).toContain("Hello **x**") + expect(blocks.props.posts[0].markdown).toContain("| a | b |") + expect(blocks.props.posts[0].markdown).not.toContain(" { + const blocks = await compileToBlocks(` + + +# !!posts One + +Hello **x** + + +`) + + expect(blocks.props.posts[0].markdown).toBeUndefined() +}) + +test("preserves
spacing semantics around paragraphs", async () => { + const blocks = await compileToBlocks(` + + +# !!posts One + +
+ +First + +
+
+ +Second + +
+ +
+`) + + expect(blocks.props.posts[0].markdown).toBe("\nFirst\n\n\nSecond\n") +}) From 9dca44c4a0dafa593a7c1f830408675565123568 Mon Sep 17 00:00:00 2001 From: Markus Gritsch Date: Thu, 19 Feb 2026 00:08:23 +0100 Subject: [PATCH 2/2] Restore default vitest test script Revert temporary file-scoped test command --- packages/codehike/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/codehike/package.json b/packages/codehike/package.json index 175bd82c..8897f0d4 100644 --- a/packages/codehike/package.json +++ b/packages/codehike/package.json @@ -47,7 +47,7 @@ "scripts": { "build": "tsc -p . ", "dev": "tsc -p . --watch", - "test": "vitest run markdown-enabled.test.ts", + "test": "vitest run", "watch": "vitest -u", "clean": "rm -rf .turbo && rm -rf node_modules && rm -rf dist", "check-exports": "attw --pack ."