diff --git a/go.mod b/go.mod index 97ef1cf2..957cd115 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( github.com/spf13/cobra v1.10.2 github.com/spf13/pflag v1.0.10 github.com/stretchr/testify v1.11.1 + github.com/yuin/goldmark v1.7.13 github.com/zalando/go-keyring v0.2.8 golang.org/x/mod v0.34.0 golang.org/x/sys v0.42.0 @@ -70,7 +71,6 @@ require ( github.com/rivo/uniseg v0.4.7 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect - github.com/yuin/goldmark v1.7.13 // indirect github.com/yuin/goldmark-emoji v1.0.6 // indirect golang.org/x/net v0.38.0 // indirect golang.org/x/sync v0.20.0 // indirect diff --git a/internal/richtext/richtext.go b/internal/richtext/richtext.go index 04f3f210..db59b711 100644 --- a/internal/richtext/richtext.go +++ b/internal/richtext/richtext.go @@ -3,6 +3,7 @@ package richtext import ( + "bytes" "errors" "fmt" "html" @@ -15,40 +16,18 @@ import ( "unicode/utf8" "github.com/charmbracelet/glamour" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + gmhtml "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" ) -// Pre-compiled regexes for MarkdownToHTML list detection -var ( - ulPattern = regexp.MustCompile(`^(\s*)[-*+]\s+(.*)$`) - olPattern = regexp.MustCompile(`^(\s*)\d+\.\s+(.*)$`) -) - -// CommonMark §2.4: any ASCII punctuation may be backslash-escaped. -// Exact set: !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ -// -// We intentionally omit @ from the set: in Basecamp context \@ is the -// idiomatic way to suppress a mention ping, so it must pass through -// literally and not be unescaped into a bare @ that ResolveMentions -// would convert into a mention. -const commonMarkEscapablePunctuation = "!\"#$%&'()*+,-./:;<=>?[\\]^_`{|}~" - -// Pre-compiled regexes for convertInline (Markdown → HTML inline elements) -var ( - reCodeSpan = regexp.MustCompile("`([^`]+)`") - reBoldStar = regexp.MustCompile(`\*\*([^*]+)\*\*`) - reBoldUnder = regexp.MustCompile(`__([^_]+)__`) - reItalicStar = regexp.MustCompile(`\*([^*]+)\*`) - reItalicUnder = regexp.MustCompile(`(?:^|[^a-zA-Z0-9])_([^_]+)_(?:[^a-zA-Z0-9]|$)`) - reItalicInner = regexp.MustCompile(`_([^_]+)_`) - reImage = regexp.MustCompile(`!\[([^\]]*)\]\(([^)]+)\)`) - reLink = regexp.MustCompile(`\[([^\]]+)\]\(([^)]+)\)`) - reStrikethrough = regexp.MustCompile(`~~([^~]+)~~`) - - // Protect escaped backticks before code-span detection so \` does not start a code span. - reEscapedBacktick = regexp.MustCompile("\\\\`") - // Matches a backslash followed by any CommonMark-escapable ASCII punctuation character. - reBackslashEscape = regexp.MustCompile(`\\([` + regexp.QuoteMeta(commonMarkEscapablePunctuation) + `])`) -) +// Pre-compiled regexes for IsHTML detection (code span stripping) +var reCodeSpan = regexp.MustCompile("`([^`]+)`") // Pre-compiled regexes for HTMLToMarkdown (HTML → Markdown block elements) var ( @@ -58,14 +37,11 @@ var ( reH4 = regexp.MustCompile(`(?i)]*>(.*?)`) reH5 = regexp.MustCompile(`(?i)]*>(.*?)`) reH6 = regexp.MustCompile(`(?i)]*>(.*?)`) - reBlockquote = regexp.MustCompile(`(?i)]*>(.*?)`) + reBlockquote = regexp.MustCompile(`(?is)]*>(.*?)`) reCodeBlock = regexp.MustCompile(`(?is)]*>]*(?:class="language-([^"]*)")?[^>]*>(.*?)`) reCodeLang = regexp.MustCompile(`class="language-([^"]*)"`) reCodeInner = regexp.MustCompile(`(?is)]*>([\s\S]*?)`) - reUL = regexp.MustCompile(`(?is)]*>(.*?)`) - reOL = regexp.MustCompile(`(?is)]*>(.*?)`) - reLI = regexp.MustCompile(`(?is)]*>(.*?)`) - reP = regexp.MustCompile(`(?i)]*>(.*?)

`) + reP = regexp.MustCompile(`(?is)]*>(.*?)

`) reBR = regexp.MustCompile(`(?i)`) reHR = regexp.MustCompile(`(?i)`) ) @@ -131,355 +107,229 @@ var reMarkdownPatterns = []*regexp.Regexp{ regexp.MustCompile(`^>\s`), } -// MarkdownToHTML converts Markdown text to HTML suitable for Basecamp's rich text fields. -// It handles common Markdown syntax: headings, bold, italic, links, lists, code blocks, and blockquotes. -// If the input already appears to be HTML, it is returned unchanged to preserve existing formatting. -func MarkdownToHTML(md string) string { - if md == "" { - return "" - } - - // If input is already HTML, return unchanged to preserve existing content - if IsHTML(md) { - return md - } - - // Normalize line endings - md = strings.ReplaceAll(md, "\r\n", "\n") - md = strings.ReplaceAll(md, "\r", "\n") - - var result strings.Builder - lines := strings.Split(md, "\n") - - var inCodeBlock bool - var codeBlockLang string - var codeLines []string - var inList bool - var listItems []string - var listType string // "ul" or "ol" - var pendingBreak bool - var paraLines []string - - flushPendingBreak := func() { - if pendingBreak { - result.WriteString("
\n") - pendingBreak = false - } - } +// mdConverter is the goldmark Markdown-to-HTML converter configured for Trix compatibility. +var mdConverter = goldmark.New( + goldmark.WithExtensions(extension.Strikethrough), + goldmark.WithRendererOptions(gmhtml.WithUnsafe()), + goldmark.WithParserOptions( + parser.WithInlineParsers( + util.Prioritized(&escapedAtParser{}, 900), + ), + parser.WithASTTransformers( + util.Prioritized(&trixTransformer{}, 100), + ), + ), + goldmark.WithRendererOptions( + renderer.WithNodeRenderers( + util.Prioritized(&trixRenderer{}, 500), + ), + ), +) - flushParagraph := func() { - if len(paraLines) > 0 { - flushPendingBreak() - text := strings.Join(paraLines, " ") - result.WriteString("

" + convertInline(text) + "

\n") - paraLines = nil - } - } +// TrixBreak is a custom block node that renders as
\n for Trix paragraph spacing. +type TrixBreak struct{ ast.BaseBlock } - flushList := func() { - if len(listItems) > 0 { - result.WriteString("<" + listType + ">\n") - for _, item := range listItems { - result.WriteString("
  • " + item + "
  • \n") - } - result.WriteString("\n") - listItems = nil - inList = false - } - } +// KindTrixBreak is the node kind for TrixBreak. +var KindTrixBreak = ast.NewNodeKind("TrixBreak") - for i := range lines { - line := lines[i] - - // Handle code blocks - if after, ok := strings.CutPrefix(line, "```"); ok { - if inCodeBlock { - // End code block - code := strings.Join(codeLines, "\n") - code = escapeHTML(code) - if codeBlockLang != "" { - // Sanitize language to prevent attribute injection - safeLang := sanitizeLanguage(codeBlockLang) - result.WriteString("
    " + code + "
    \n") - } else { - result.WriteString("
    " + code + "
    \n") - } - inCodeBlock = false - codeLines = nil - codeBlockLang = "" - } else { - // Start code block - flushParagraph() - flushList() - flushPendingBreak() - inCodeBlock = true - codeBlockLang = after - } - continue - } +func (n *TrixBreak) Kind() ast.NodeKind { return KindTrixBreak } +func (n *TrixBreak) Dump(source []byte, level int) { ast.DumpHelper(n, source, level, nil, nil) } - if inCodeBlock { - codeLines = append(codeLines, line) - continue - } +// EscapedAt is a custom inline node that renders as literal \@. +type EscapedAt struct{ ast.BaseInline } - // Check for list items (using precompiled regexes) - ulMatch := ulPattern.FindStringSubmatch(line) - olMatch := olPattern.FindStringSubmatch(line) - - if ulMatch != nil { - flushParagraph() - if !inList || listType != "ul" { - flushList() - flushPendingBreak() - inList = true - listType = "ul" - } - pendingBreak = false // blank was between items, not after the list - listItems = append(listItems, convertInline(ulMatch[2])) - continue - } +// KindEscapedAt is the node kind for EscapedAt. +var KindEscapedAt = ast.NewNodeKind("EscapedAt") - if olMatch != nil { - flushParagraph() - if !inList || listType != "ol" { - flushList() - flushPendingBreak() - inList = true - listType = "ol" - } - pendingBreak = false // blank was between items, not after the list - listItems = append(listItems, convertInline(olMatch[2])) - continue - } +func (n *EscapedAt) Kind() ast.NodeKind { return KindEscapedAt } +func (n *EscapedAt) Dump(source []byte, level int) { ast.DumpHelper(n, source, level, nil, nil) } - // Empty line - handle differently based on context - if strings.TrimSpace(line) == "" { - if inList { - // In a list: empty lines between items create spacing but don't break the list. - // Record pending break so content after the list gets proper separation. - pendingBreak = true - continue - } - // Not in a list: flush paragraph and record break - flushParagraph() - if result.Len() > 0 { - pendingBreak = true - } - continue - } +// escapedAtParser intercepts \@ before goldmark's standard backslash escape handling. +type escapedAtParser struct{} - // Check for list continuation lines (indented text that continues previous list item) - if inList && len(listItems) > 0 { - // Check if line is indented (starts with spaces or tabs) - if strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t") { - // This is a continuation of the last list item - trimmedLine := strings.TrimSpace(line) - // Append to last list item with
    separator - lastItemIndex := len(listItems) - 1 - listItems[lastItemIndex] = listItems[lastItemIndex] + "
    \n" + convertInline(trimmedLine) - pendingBreak = false // blank was before continuation, not after the list - continue - } - } +func (p *escapedAtParser) Trigger() []byte { return []byte{'\\'} } - // Not a list item or continuation, flush any pending list - flushList() +func (p *escapedAtParser) Parse(_ ast.Node, block text.Reader, _ parser.Context) ast.Node { + line, _ := block.PeekLine() + if len(line) < 2 || line[0] != '\\' || line[1] != '@' { + return nil + } + block.Advance(2) + return &EscapedAt{} +} - // Headings - if strings.HasPrefix(line, "#") { - flushParagraph() - flushPendingBreak() - } - if after, ok := strings.CutPrefix(line, "######"); ok { - result.WriteString("
    " + convertInline(strings.TrimSpace(after)) + "
    \n") - continue - } - if after, ok := strings.CutPrefix(line, "#####"); ok { - result.WriteString("
    " + convertInline(strings.TrimSpace(after)) + "
    \n") - continue - } - if after, ok := strings.CutPrefix(line, "####"); ok { - result.WriteString("

    " + convertInline(strings.TrimSpace(after)) + "

    \n") - continue - } - if after, ok := strings.CutPrefix(line, "###"); ok { - result.WriteString("

    " + convertInline(strings.TrimSpace(after)) + "

    \n") - continue - } - if after, ok := strings.CutPrefix(line, "##"); ok { - result.WriteString("

    " + convertInline(strings.TrimSpace(after)) + "

    \n") - continue - } - if after, ok := strings.CutPrefix(line, "#"); ok { - result.WriteString("

    " + convertInline(strings.TrimSpace(after)) + "

    \n") - continue - } +// trixTransformer modifies the AST for Trix-compatible HTML output. +type trixTransformer struct{} - // Blockquote - if strings.HasPrefix(line, ">") { - flushParagraph() - flushPendingBreak() +func (t *trixTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { + // Phase 1: Force tight lists, convert soft breaks to hard in list items, + // and unwrap blockquote paragraphs + _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil } - if after, ok := strings.CutPrefix(line, ">"); ok { - quote := strings.TrimSpace(after) - result.WriteString("
    " + convertInline(quote) + "
    \n") - continue + switch v := n.(type) { + case *ast.List: + v.IsTight = true + for li := v.FirstChild(); li != nil; li = li.NextSibling() { + replaceParagraphsWithTextBlocks(li) + convertSoftBreaksToHard(li) + } + case *ast.Blockquote: + replaceParagraphsWithTextBlocks(v) + convertSoftBreaksToHard(v) + insertBreaksBetweenTextBlocks(v) } + return ast.WalkContinue, nil + }) - // Horizontal rule - trimmed := strings.TrimSpace(line) - if len(trimmed) >= 3 && (allChars(trimmed, '-') || allChars(trimmed, '*') || allChars(trimmed, '_')) { - flushParagraph() - flushPendingBreak() - result.WriteString("
    \n") - continue + // Phase 2: Insert TrixBreak nodes before blank-line-separated top-level blocks + for child := node.FirstChild(); child != nil; child = child.NextSibling() { + if child.HasBlankPreviousLines() && child.PreviousSibling() != nil { + br := &TrixBreak{} + node.InsertBefore(node, child, br) } - - // Accumulate paragraph lines - paraLines = append(paraLines, line) } +} - // Flush any remaining paragraph or list - flushParagraph() - flushList() - - // Handle unclosed code block - if inCodeBlock && len(codeLines) > 0 { - code := strings.Join(codeLines, "\n") - code = escapeHTML(code) - result.WriteString("
    " + code + "
    \n") +func replaceParagraphsWithTextBlocks(parent ast.Node) { + for child := parent.FirstChild(); child != nil; { + next := child.NextSibling() + if p, ok := child.(*ast.Paragraph); ok { + tb := ast.NewTextBlock() + for gc := p.FirstChild(); gc != nil; { + gnext := gc.NextSibling() + tb.AppendChild(tb, gc) + gc = gnext + } + tb.SetLines(p.Lines()) + parent.ReplaceChild(parent, p, tb) + } + child = next } - - return strings.TrimSpace(result.String()) } -// convertInline converts inline Markdown elements (bold, italic, links, code) to HTML. -// Code spans and backslash escapes are protected from further processing to preserve -// their literal content. -func convertInline(text string) string { - // Protect escaped backticks before code-span detection so \` remains literal - // and cannot be interpreted as a code-span delimiter. - var escapedBackticks []string - text = reEscapedBacktick.ReplaceAllStringFunc(text, func(_ string) string { - idx := len(escapedBackticks) - escapedBackticks = append(escapedBackticks, "`") - return "\x00ESCBT" + strconv.Itoa(idx) + "\x00" - }) - - // Extract code spans — their content must be completely literal. - var codeSpans []string - text = reCodeSpan.ReplaceAllStringFunc(text, func(match string) string { - inner := reCodeSpan.FindStringSubmatch(match) - if len(inner) >= 2 { - idx := len(codeSpans) - codeSpans = append(codeSpans, inner[1]) - return "\x00CODE" + strconv.Itoa(idx) + "\x00" +func convertSoftBreaksToHard(parent ast.Node) { + _ = ast.Walk(parent, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil } - return match - }) - - // Process backslash escapes (CommonMark §2.4): a backslash before an ASCII - // punctuation character produces the literal character. We extract these into - // placeholders so they are not treated as Markdown delimiters and restore - // them afterward. We use attribute-safe escaping on restore because escaped - // punctuation can be captured inside href/src values before link/image HTML is built. - var escaped []string - text = reBackslashEscape.ReplaceAllStringFunc(text, func(match string) string { - idx := len(escaped) - escaped = append(escaped, match[1:]) // the punctuation character after the backslash - return "\x00ESC" + strconv.Itoa(idx) + "\x00" + if t, ok := n.(*ast.Text); ok && t.SoftLineBreak() { + t.SetSoftLineBreak(false) + t.SetHardLineBreak(true) + } + return ast.WalkContinue, nil }) +} - // Escape HTML entities - text = escapeHTML(text) - - // Bold with ** or __ - text = reBoldStar.ReplaceAllString(text, "$1") - text = reBoldUnder.ReplaceAllString(text, "$1") - - // Italic with * or _ (but not inside words for _) - text = reItalicStar.ReplaceAllString(text, "$1") - text = reItalicUnder.ReplaceAllStringFunc(text, func(s string) string { - inner := reItalicInner.FindStringSubmatch(s) - if len(inner) >= 2 { - prefix := "" - suffix := "" - if len(s) > 0 && s[0] != '_' { - prefix = string(s[0]) - } - if len(s) > 0 && s[len(s)-1] != '_' { - suffix = string(s[len(s)-1]) +func insertBreaksBetweenTextBlocks(parent ast.Node) { + for child := parent.FirstChild(); child != nil; child = child.NextSibling() { + if _, ok := child.(*ast.TextBlock); ok { + if next := child.NextSibling(); next != nil { + if _, ok := next.(*ast.TextBlock); ok { + br := &TrixBreak{} + parent.InsertAfter(parent, child, br) + } } - return prefix + "" + inner[1] + "" + suffix } - return s - }) + } +} - // Images ![alt](url) - MUST come before links since image syntax contains link syntax - text = reImage.ReplaceAllStringFunc(text, func(match string) string { - parts := reImage.FindStringSubmatch(match) - if len(parts) >= 3 { - alt := escapeAttr(parts[1]) - src := resolveDestinationEscapes(parts[2], escaped, escapedBackticks) - src = escapeAttr(src) - return `` + alt + `` - } - return match - }) +// trixRenderer provides custom rendering for Trix-compatible HTML output. +type trixRenderer struct{} - // Links [text](url) - text = reLink.ReplaceAllStringFunc(text, func(match string) string { - parts := reLink.FindStringSubmatch(match) - if len(parts) >= 3 { - linkText := parts[1] - href := resolveDestinationEscapes(parts[2], escaped, escapedBackticks) - href = escapeAttr(href) - return `` + linkText + `` - } - return match - }) +func (r *trixRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(ast.KindRawHTML, r.renderRawHTML) + reg.Register(ast.KindHTMLBlock, r.renderHTMLBlock) + reg.Register(ast.KindBlockquote, r.renderBlockquote) + reg.Register(KindTrixBreak, r.renderTrixBreak) + reg.Register(KindEscapedAt, r.renderEscapedAt) +} - // Strikethrough ~~text~~ - text = reStrikethrough.ReplaceAllString(text, "$1") +func (r *trixRenderer) renderBlockquote(w util.BufWriter, _ []byte, _ ast.Node, entering bool) (ast.WalkStatus, error) { + if entering { + _, _ = w.WriteString("
    ") + } else { + _, _ = w.WriteString("
    \n") + } + return ast.WalkContinue, nil +} - // Restore backslash-escaped characters in body text. Placeholders inside - // link/image destinations were already resolved with percent-encoding above. - escapedRendered := make([]string, len(escaped)) - for i, ch := range escaped { - escapedRendered[i] = escapeAttr(ch) +func (r *trixRenderer) renderRawHTML(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + n, ok := node.(*ast.RawHTML) + if !ok { + return ast.WalkContinue, nil } - text = restorePlaceholders(text, "ESC", escapedRendered) - text = restorePlaceholders(text, "ESCBT", escapedRenderedBackticks(escapedBackticks)) - - // Restore code spans (HTML-escape their content since extraction now - // happens before escapeHTML to allow backslash-escape processing). - codeRendered := make([]string, len(codeSpans)) - for i, code := range codeSpans { - codeRendered[i] = "" + escapeHTML(code) + "" + for i := 0; i < n.Segments.Len(); i++ { + seg := n.Segments.At(i) + _, _ = w.Write(util.EscapeHTML(seg.Value(source))) } - text = restorePlaceholders(text, "CODE", codeRendered) + return ast.WalkContinue, nil +} - return text +func (r *trixRenderer) renderHTMLBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + n, ok := node.(*ast.HTMLBlock) + if !ok { + return ast.WalkContinue, nil + } + lines := n.Lines() + parts := make([]string, 0, lines.Len()+1) + for i := 0; i < lines.Len(); i++ { + seg := lines.At(i) + escaped := strings.TrimRight(string(util.EscapeHTML(seg.Value(source))), "\n") + parts = append(parts, escaped) + } + if n.HasClosure() { + escaped := strings.TrimRight(string(util.EscapeHTML(n.ClosureLine.Value(source))), "\n") + parts = append(parts, escaped) + } + _, _ = w.WriteString("

    " + strings.Join(parts, " ") + "

    \n") + return ast.WalkContinue, nil } -func escapedRenderedBackticks(backticks []string) []string { - rendered := make([]string, len(backticks)) - for i, bt := range backticks { - rendered[i] = escapeAttr(bt) +func (r *trixRenderer) renderTrixBreak(w util.BufWriter, _ []byte, _ ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil } - return rendered + _, _ = w.WriteString("
    \n") + return ast.WalkContinue, nil } -func restorePlaceholders(text, prefix string, replacements []string) string { - if len(replacements) == 0 { - return text +func (r *trixRenderer) renderEscapedAt(w util.BufWriter, _ []byte, _ ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil } - pairs := make([]string, 0, len(replacements)*2) - for i, repl := range replacements { - pairs = append(pairs, "\x00"+prefix+strconv.Itoa(i)+"\x00", repl) + _, _ = w.WriteString(`\@`) + return ast.WalkContinue, nil +} + +// MarkdownToHTML converts Markdown text to HTML suitable for Basecamp's rich text fields. +// It uses goldmark with custom AST transformations for Trix editor compatibility. +// If the input already appears to be HTML, it is returned unchanged to preserve existing formatting. +func MarkdownToHTML(md string) string { + if md == "" { + return "" + } + + if IsHTML(md) { + return md + } + + md = strings.ReplaceAll(md, "\r\n", "\n") + md = strings.ReplaceAll(md, "\r", "\n") + + var buf bytes.Buffer + if err := mdConverter.Convert([]byte(md), &buf); err != nil { + return "

    " + html.EscapeString(md) + "

    " } - return strings.NewReplacer(pairs...).Replace(text) + + return strings.TrimSpace(buf.String()) } // escapeHTML escapes special HTML characters. @@ -498,73 +348,6 @@ func escapeAttr(s string) string { return s } -// percentEncodeChar percent-encodes a single byte for use in URL destinations. -// Characters left literal match the destination-safe set derived from markdown-it: -// !$&'()*+,-./:;=?@_~# -// Everything else gets %XX hex encoding. -func percentEncodeChar(ch byte) string { - switch { - case ch >= 'A' && ch <= 'Z', ch >= 'a' && ch <= 'z', ch >= '0' && ch <= '9': - return string(ch) - case ch == '!' || ch == '$' || ch == '&' || ch == '\'' || - ch == '(' || ch == ')' || ch == '*' || ch == '+' || - ch == ',' || ch == '-' || ch == '.' || ch == '/' || - ch == ':' || ch == ';' || ch == '=' || ch == '?' || - ch == '@' || ch == '_' || ch == '~' || ch == '#': - return string(ch) - default: - return fmt.Sprintf("%%%02X", ch) - } -} - -// resolveDestinationEscapes restores ESC and ESCBT placeholders within a link/image -// destination using percent-encoding instead of HTML entity escaping. -func resolveDestinationEscapes(dest string, escaped []string, escapedBackticks []string) string { - for i, ch := range escaped { - placeholder := "\x00ESC" + strconv.Itoa(i) + "\x00" - if strings.Contains(dest, placeholder) { - var encoded strings.Builder - for j := range len(ch) { - encoded.WriteString(percentEncodeChar(ch[j])) - } - dest = strings.ReplaceAll(dest, placeholder, encoded.String()) - } - } - for i, bt := range escapedBackticks { - placeholder := "\x00ESCBT" + strconv.Itoa(i) + "\x00" - if strings.Contains(dest, placeholder) { - var encoded strings.Builder - for j := range len(bt) { - encoded.WriteString(percentEncodeChar(bt[j])) - } - dest = strings.ReplaceAll(dest, placeholder, encoded.String()) - } - } - return dest -} - -// sanitizeLanguage sanitizes a code block language identifier to prevent attribute injection. -// Only allows alphanumeric characters, hyphens, and underscores. -func sanitizeLanguage(lang string) string { - var result strings.Builder - for _, r := range lang { - if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '-' || r == '_' { - result.WriteRune(r) - } - } - return result.String() -} - -// allChars returns true if the string consists entirely of the given character. -func allChars(s string, c byte) bool { - for i := range len(s) { - if s[i] != c && s[i] != ' ' { - return false - } - } - return true -} - // glamourCache caches glamour renderers by width to avoid repeated construction. var ( glamourMu sync.Mutex @@ -647,66 +430,38 @@ func HTMLToMarkdown(html string) string { html = reH5.ReplaceAllString(html, "##### $1\n\n") html = reH6.ReplaceAllString(html, "###### $1\n\n") - // Blockquotes - html = reBlockquote.ReplaceAllStringFunc(html, func(s string) string { + // Blockquotes — convert inner block elements (lists, code, paragraphs) to + // Markdown first, then prefix each line with >. Loop handles nesting: + // the lazy regex matches outermost open → innermost close, so each pass + // converts one level and the next pass handles the enclosing level. + convertBlockquote := func(s string) string { inner := reBlockquote.FindStringSubmatch(s) if len(inner) >= 2 { - lines := strings.Split(strings.TrimSpace(inner[1]), "\n") + content := blockquoteInnerToMarkdown(inner[1]) + lines := strings.Split(content, "\n") result := make([]string, 0, len(lines)) for _, line := range lines { - result = append(result, "> "+strings.TrimSpace(line)) + if line == "" { + result = append(result, ">") + } else { + result = append(result, "> "+line) + } } return strings.Join(result, "\n") + "\n\n" } return s - }) + } + for reBlockquote.MatchString(html) { + html = reBlockquote.ReplaceAllStringFunc(html, convertBlockquote) + } - // Code blocks (use (?is) for case-insensitive and dotall mode to match multi-line content) + // Code blocks html = reCodeBlock.ReplaceAllStringFunc(html, func(s string) string { - langMatch := reCodeLang.FindStringSubmatch(s) - lang := "" - if len(langMatch) >= 2 { - lang = langMatch[1] - } - codeMatch := reCodeInner.FindStringSubmatch(s) - if len(codeMatch) >= 2 { - code := unescapeHTML(codeMatch[1]) - return "```" + lang + "\n" + code + "\n```\n\n" - } - return s + return convertCodeBlockHTML(s) + "\n\n" }) - // Unordered lists - html = reUL.ReplaceAllStringFunc(html, func(s string) string { - inner := reUL.FindStringSubmatch(s) - if len(inner) >= 2 { - items := reLI.FindAllStringSubmatch(inner[1], -1) - var result []string - for _, item := range items { - if len(item) >= 2 { - result = append(result, "- "+strings.TrimSpace(item[1])) - } - } - return strings.Join(result, "\n") + "\n\n" - } - return s - }) - - // Ordered lists - html = reOL.ReplaceAllStringFunc(html, func(s string) string { - inner := reOL.FindStringSubmatch(s) - if len(inner) >= 2 { - items := reLI.FindAllStringSubmatch(inner[1], -1) - var result []string - for i, item := range items { - if len(item) >= 2 { - result = append(result, strconv.Itoa(i+1)+". "+strings.TrimSpace(item[1])) - } - } - return strings.Join(result, "\n") + "\n\n" - } - return s - }) + // Lists — use balanced-tag replacement to handle nesting correctly. + html = replaceBalancedListBlocks(html) // Paragraphs html = reP.ReplaceAllString(html, "$1\n\n") @@ -789,6 +544,202 @@ func HTMLToMarkdown(html string) string { return strings.TrimSpace(html) } +// reBRLine matches a
    tag followed by an optional newline, collapsing +// the pair to a single \n. goldmark's hard-break output is
    \n; Trix API +// content may have standalone
    . +var reBRLine = regexp.MustCompile(`(?i)\n?`) + +// formatListItem converts a list item's HTML content to Markdown, handling +//
    tags as indented continuation lines. +func formatListItem(prefix, indent, content string) string { + content = strings.TrimSpace(content) + content = reBRLine.ReplaceAllString(content, "\n") + lines := strings.Split(content, "\n") + var parts []string + for i, line := range lines { + if i == 0 { + parts = append(parts, prefix+strings.TrimSpace(line)) + } else { + // Preserve existing indentation from nested list conversion + parts = append(parts, indent+line) + } + } + return strings.Join(parts, "\n") +} + +// convertCodeBlockHTML converts a
    ...
    match to Markdown. +// Entities are left escaped so that later regex passes (reP, reStripTags) don't +// corrupt code content like <p>. The global unescapeHTML at the end of +// HTMLToMarkdown converts them. +func convertCodeBlockHTML(s string) string { + langMatch := reCodeLang.FindStringSubmatch(s) + lang := "" + if len(langMatch) >= 2 { + lang = langMatch[1] + } + codeMatch := reCodeInner.FindStringSubmatch(s) + if len(codeMatch) >= 2 { + code := strings.TrimSuffix(codeMatch[1], "\n") + return "```" + lang + "\n" + code + "\n```" + } + return s +} + +// reLIOpen matches an opening
  • tag (with optional attributes). +var reLIOpen = regexp.MustCompile(`(?i)]*>`) + +// hasPrefixFold checks if s starts with prefix using ASCII case-insensitive +// comparison. Safe for HTML tag matching without ToLower index desync. +func hasPrefixFold(s, prefix string) bool { + return len(s) >= len(prefix) && strings.EqualFold(s[:len(prefix)], prefix) +} + +// extractListItems extracts top-level
  • content by tracking nesting depth, +// correctly handling nested
  • tags that trip up regex-based extraction. +// Nested
      /
        inside items are recursively converted to Markdown. +func extractListItems(html string) []string { + var items []string + i := 0 + for { + // Find next top-level
      1. opening tag (regex is case-insensitive) + loc := reLIOpen.FindStringIndex(html[i:]) + if loc == nil { + break + } + contentStart := i + loc[1] + + // Walk forward tracking
      2. depth to find the matching
      3. + depth := 1 + j := contentStart + for j < len(html) && depth > 0 { + if hasPrefixFold(html[j:], "") { + depth-- + if depth == 0 { + content := html[contentStart:j] + content = replaceBalancedListBlocks(content) + items = append(items, content) + j += 5 + break + } + j += 5 + } else if loc := reLIOpen.FindStringIndex(html[j:]); loc != nil && loc[0] == 0 { + depth++ + j += loc[1] + } else { + j++ + } + } + i = j + } + return items +} + +var reListOpen = regexp.MustCompile(`(?i)<(ul|ol)[^>]*>`) + +// replaceBalancedListBlocks finds top-level
          /
            blocks by tracking tag +// depth and converts each to Markdown. Handles nesting correctly where regex +// lazy/greedy matching cannot. +func replaceBalancedListBlocks(html string) string { + var result strings.Builder + i := 0 + for { + loc := reListOpen.FindStringSubmatchIndex(html[i:]) + if loc == nil { + result.WriteString(html[i:]) + break + } + matchStart := i + loc[0] + tag := strings.ToLower(html[i+loc[2] : i+loc[3]]) // "ul" or "ol" + contentStart := i + loc[1] + + result.WriteString(html[i:matchStart]) + + depth := 1 + j := contentStart + for j < len(html) && depth > 0 { + // Decrement for any list close tag (handles mixed
              /
                nesting) + if hasPrefixFold(html[j:], "
            ") || hasPrefixFold(html[j:], "
          ") { + closeLen := 5 // len("
        ") == len("
      ") + depth-- + if depth == 0 { + inner := html[contentStart:j] + var md string + if tag == "ul" { + md = convertULInner(inner) + } else { + md = convertOLInner(inner) + } + s := result.String() + if len(s) > 0 && s[len(s)-1] != '\n' { + result.WriteByte('\n') + } + result.WriteString(md + "\n\n") + j += closeLen + break + } + j += closeLen + } else if loc := reListOpen.FindStringSubmatchIndex(html[j:]); loc != nil && loc[0] == 0 { + depth++ + j += loc[1] + } else { + j++ + } + } + if depth > 0 { + // Unclosed tag — write original text + result.WriteString(html[matchStart:]) + break + } + i = j + } + return result.String() +} + +// convertULInner converts inner
        content (between
          and
        ) to Markdown. +func convertULInner(inner string) string { + items := extractListItems(inner) + result := make([]string, 0, len(items)) + for _, content := range items { + result = append(result, formatListItem("- ", " ", content)) + } + return strings.Join(result, "\n") +} + +// convertOLInner converts inner
          content (between
            and
          ) to Markdown. +func convertOLInner(inner string) string { + items := extractListItems(inner) + result := make([]string, 0, len(items)) + for i, content := range items { + prefix := strconv.Itoa(i+1) + ". " + indent := strings.Repeat(" ", len(prefix)) + result = append(result, formatListItem(prefix, indent, content)) + } + return strings.Join(result, "\n") +} + +// blockquoteInnerToMarkdown converts the inner HTML of a blockquote to Markdown, +// handling nested block elements (lists, code blocks) before line-level operations. +func blockquoteInnerToMarkdown(inner string) string { + content := strings.TrimSpace(inner) + content = reCodeBlock.ReplaceAllStringFunc(content, func(s string) string { + return convertCodeBlockHTML(s) + "\n\n" + }) + content = replaceBalancedListBlocks(content) + // Replace

          with double newline (paragraph break) to separate adjacent blocks, + // then strip

          openers. Two passes so

          para1

          para2

          produces + // "para1\n\npara2" (blank line = > separator) rather than "para1para2". + content = reClosingP.ReplaceAllString(content, "\n\n") + content = reOpeningP.ReplaceAllString(content, "") + content = reBRLine.ReplaceAllString(content, "\n") + content = reMultiNewline.ReplaceAllString(content, "\n\n") + return strings.TrimSpace(content) +} + +var ( + reOpeningP = regexp.MustCompile(`(?i)]*)?>`) + reClosingP = regexp.MustCompile(`(?i)

          `) +) + // unescapeHTML converts HTML entities back to their characters. func unescapeHTML(s string) string { s = strings.ReplaceAll(s, "&", "&") @@ -934,7 +885,7 @@ func resolveMentionAnchors(html string, lookupByID PersonByIDFunc) (string, erro switch scheme { case "mention": // Zero API calls — use value as SGID, link text as display name (caller-trusted). - // Unescape HTML because convertInline already escaped the link text (e.g. & → &) + // Unescape HTML because goldmark already escaped the link text (e.g. & → &) // and MentionToHTML will re-escape — without this we'd double-encode. name := unescapeHTML(strings.TrimPrefix(displayText, "@")) tag = MentionToHTML(value, name) diff --git a/internal/richtext/richtext_test.go b/internal/richtext/richtext_test.go index 42796be7..fa9393b0 100644 --- a/internal/richtext/richtext_test.go +++ b/internal/richtext/richtext_test.go @@ -80,7 +80,7 @@ func TestMarkdownToHTML(t *testing.T) { { name: "ordered list with trailing spaces and descriptions", input: "1. **Item** - [Link](url) (time) \n Description here\n\n2. **Next** - [Link](url)", - expected: "
            \n
          1. Item - Link (time)
            \nDescription here
          2. \n
          3. Next - Link
          4. \n
          ", + expected: "
            \n
          1. Item - Link (time)
            \nDescription here
          2. \n
          3. Next - Link
          4. \n
          ", }, { name: "list followed by blank line then paragraph", @@ -88,9 +88,12 @@ func TestMarkdownToHTML(t *testing.T) { expected: "
            \n
          • Item 1
          • \n
          • Item 2
          • \n
          \n
          \n

          Following paragraph.

          ", }, { - name: "blank between list items does not leak break after list", + // CommonMark §5.4: "After" is a lazy continuation of the second list item. + // goldmark treats non-indented continuation lines as part of the list item, + // unlike our previous hand-rolled parser which ended the list. + name: "lazy continuation stays in list item", input: "- One\n\n- Two\nAfter", - expected: "
            \n
          • One
          • \n
          • Two
          • \n
          \n

          After

          ", + expected: "
            \n
          • One
          • \n
          • Two
            \nAfter
          • \n
          ", }, { name: "blockquote", @@ -100,12 +103,12 @@ func TestMarkdownToHTML(t *testing.T) { { name: "code block", input: "```go\nfunc main() {}\n```", - expected: `
          func main() {}
          `, + expected: "
          func main() {}\n
          ", }, { name: "code block without language", input: "```\nsome code\n```", - expected: "
          some code
          ", + expected: "
          some code\n
          ", }, { name: "horizontal rule with dashes", @@ -150,7 +153,7 @@ func TestMarkdownToHTML(t *testing.T) { { name: "consecutive lines join into one paragraph", input: "Line one\nLine two", - expected: "

          Line one Line two

          ", + expected: "

          Line one\nLine two

          ", }, { name: "blank line before list", @@ -160,7 +163,7 @@ func TestMarkdownToHTML(t *testing.T) { { name: "blank line before code block", input: "Intro\n\n```\ncode\n```", - expected: "

          Intro

          \n
          \n
          code
          ", + expected: "

          Intro

          \n
          \n
          code\n
          ", }, { name: "leading blank lines ignored", @@ -195,12 +198,13 @@ func TestMarkdownToHTML(t *testing.T) { { name: "code fence flushes accumulated paragraph", input: "Text\n```go\nx\n```", - expected: "

          Text

          \n
          x
          ", + expected: "

          Text

          \n
          x\n
          ", }, { - name: "horizontal rule flushes accumulated paragraph", + // CommonMark: "Text\n---" is a setext heading (h2), not paragraph + hr + name: "setext heading level 2", input: "Text\n---", - expected: "

          Text

          \n
          ", + expected: "

          Text

          ", }, { name: "code span containing HTML tag is converted not passthrough", @@ -210,7 +214,7 @@ func TestMarkdownToHTML(t *testing.T) { { name: "fenced code block containing HTML tags is converted", input: "intro\n\n```\n
          hello
          \n```", - expected: "

          intro

          \n
          \n
          <div>hello</div>
          ", + expected: "

          intro

          \n
          \n
          <div>hello</div>\n
          ", }, } @@ -301,9 +305,10 @@ func TestMarkdownToHTMLBackslashEscapes(t *testing.T) { expected: `

          x

          `, }, { + // goldmark treats \% as literal % in URLs (CommonMark spec) name: "escaped percent in link destination", input: `[x](https://example.com/\%20)`, - expected: `

          x

          `, + expected: `

          x

          `, }, { name: "escaped backslash in link destination", @@ -323,7 +328,7 @@ func TestMarkdownToHTMLBackslashEscapes(t *testing.T) { { name: "escaped percent in image src", input: `![alt](https://example.com/\%20.png)`, - expected: `

          alt

          `, + expected: `

          alt

          `, }, { name: "literal-safe chars stay literal in link destination", @@ -393,6 +398,362 @@ func TestMarkdownToHTMLBackslashEscapes(t *testing.T) { } } +func TestMarkdownToHTMLBackslashAtCounts(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "single backslash at", + input: `\@John`, + expected: `

          \@John

          `, + }, + { + name: "double backslash at", + input: `\\@John`, + expected: `

          \@John

          `, + }, + { + name: "triple backslash at", + input: `\\\@John`, + expected: `

          \\@John

          `, + }, + { + name: "quadruple backslash at", + input: `\\\\@John`, + expected: `

          \\@John

          `, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := MarkdownToHTML(tt.input) + if result != tt.expected { + t.Errorf("MarkdownToHTML(%q)\ngot: %q\nwant: %q", tt.input, result, tt.expected) + } + }) + } +} + +func TestMarkdownToHTMLMultiParagraphBlockquote(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "single line", + input: "> text", + expected: "
          text
          ", + }, + { + name: "multiline", + input: "> line1\n> line2", + expected: "
          line1
          \nline2
          ", + }, + { + name: "multi-paragraph", + input: "> para1\n>\n> para2", + expected: "
          para1\n
          \npara2
          ", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := MarkdownToHTML(tt.input) + if result != tt.expected { + t.Errorf("MarkdownToHTML(%q)\ngot: %q\nwant: %q", tt.input, result, tt.expected) + } + }) + } +} + +func TestMarkdownToHTMLRawHTMLBlock(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "single-line script tag", + input: "", + expected: "

          <script>alert(1)</script>

          ", + }, + { + name: "multiline script tag", + input: "", + expected: "

          <script> alert(1) </script>

          ", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := MarkdownToHTML(tt.input) + if result != tt.expected { + t.Errorf("MarkdownToHTML(%q)\ngot: %q\nwant: %q", tt.input, result, tt.expected) + } + }) + } +} + +func TestHTMLToMarkdownMultilineBlockquote(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "single paragraph", + input: "
          \n

          text

          \n
          ", + expected: "> text", + }, + { + name: "adjacent paragraphs", + input: "

          para1

          para2

          ", + expected: "> para1\n>\n> para2", + }, + { + name: "paragraph then list", + input: "

          intro

          • one
          • two
          ", + expected: "> intro\n>\n> - one\n> - two", + }, + { + name: "paragraph then code block", + input: "

          intro

          code
          ", + expected: "> intro\n>\n> ```\n> code\n> ```", + }, + { + name: "code block then paragraph", + input: "
          code

          tail

          ", + expected: "> ```\n> code\n> ```\n>\n> tail", + }, + { + name: "code block then nested blockquote", + input: "
          code
          nested
          ", + expected: "> ```\n> code\n> ```\n>\n> > nested", + }, + { + name: "whitespace-separated paragraphs", + input: "
          \n

          para1

          \n

          para2

          \n
          ", + expected: "> para1\n>\n> para2", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := HTMLToMarkdown(tt.input) + if result != tt.expected { + t.Errorf("HTMLToMarkdown(%q)\ngot: %q\nwant: %q", tt.input, result, tt.expected) + } + }) + } +} + +func TestHTMLToMarkdownMultilineParagraph(t *testing.T) { + input := "

          line1\nline2

          " + result := HTMLToMarkdown(input) + if !strings.Contains(result, "line1") || !strings.Contains(result, "line2") { + t.Errorf("HTMLToMarkdown(%q)\ngot: %q\nmissing content", input, result) + } +} + +func TestHTMLToMarkdownCodeFenceNewline(t *testing.T) { + input := "
          func main() {}\n
          " + result := HTMLToMarkdown(input) + if strings.Contains(result, "\n\n```") { + t.Errorf("HTMLToMarkdown(%q) has extra blank line before closing fence\ngot: %q", input, result) + } + if !strings.Contains(result, "func main() {}") { + t.Errorf("HTMLToMarkdown(%q) missing code content\ngot: %q", input, result) + } +} + +func TestHTMLToMarkdownCodePreservesHTMLEntities(t *testing.T) { + tests := []struct { + name string + input string + contains string + }{ + { + name: "p tags in code block survive reP and reStripTags", + input: "
          <p>\nhi\n</p>\n
          ", + contains: "

          \nhi\n

          ", + }, + { + name: "div tags in code block survive reStripTags", + input: "
          <div>hello</div>
          ", + contains: "
          hello
          ", + }, + { + name: "p tags in blockquoted code block", + input: "
          <p>\nhi\n</p>\n
          ", + contains: "

          \n> hi\n>

          ", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := HTMLToMarkdown(tt.input) + if !strings.Contains(result, tt.contains) { + t.Errorf("HTMLToMarkdown(%q)\ngot: %q\nmissing: %q", tt.input, result, tt.contains) + } + }) + } +} + +func TestHTMLToMarkdownNestedLists(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "nested ul compact", + input: "
          • parent
            • child
          ", + expected: "- parent\n - child", + }, + { + name: "nested ul with whitespace", + input: "
            \n
          • parent\n
              \n
            • child
            • \n
            \n
          • \n
          ", + expected: "- parent\n - child", + }, + { + name: "nested ol", + input: "
          1. parent
            1. child
          ", + expected: "1. parent\n 1. child", + }, + { + name: "mixed nesting ul then ol", + input: "
          • parent
            1. child
          ", + expected: "- parent\n 1. child", + }, + { + name: "mixed nesting ol then ul", + input: "
          1. parent
            • child
          ", + expected: "1. parent\n - child", + }, + { + name: "3-level nesting", + input: "
          • a
            • b
              • c
          ", + expected: "- a\n - b\n - c", + }, + { + name: "uppercase tags", + input: "
          • one
          • two
          ", + expected: "- one\n- two", + }, + { + name: "nested blockquote", + input: "
          nested
          ", + expected: "> > nested", + }, + { + name: "sibling lists preserved", + input: "
          • a

          text

          • b
          ", + expected: "- a\n\ntext\n\n- b", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := HTMLToMarkdown(tt.input) + if result != tt.expected { + t.Errorf("HTMLToMarkdown(%q)\ngot: %q\nwant: %q", tt.input, result, tt.expected) + } + }) + } +} + +func TestEditLoopRoundTrip(t *testing.T) { + tests := []struct { + name string + markdown string + expected string // exact expected round-trip output + }{ + { + name: "blockquote", + markdown: "> A quote", + expected: "> A quote", + }, + { + name: "multiline blockquote", + markdown: "> line1\n> line2", + expected: "> line1\n> line2", + }, + { + name: "multi-paragraph blockquote", + markdown: "> para1\n>\n> para2", + expected: "> para1\n>\n> para2", + }, + { + name: "unordered list", + markdown: "- One\n- Two\n- Three", + expected: "- One\n- Two\n- Three", + }, + { + name: "list with continuation", + markdown: "1. First\n Desc\n\n2. Second\n More", + expected: "1. First\n Desc\n2. Second\n More", + }, + { + name: "code fence", + markdown: "```go\nfunc main() {}\n```", + expected: "```go\nfunc main() {}\n```", + }, + { + name: "heading", + markdown: "# Title", + expected: "# Title", + }, + { + name: "quoted list", + markdown: "> - One\n> Two", + expected: "> - One\n> Two", + }, + { + name: "quoted code fence", + markdown: "> ```\n> code\n> ```", + expected: "> ```\n> code\n> ```", + }, + { + name: "quoted ordered list", + markdown: "> 1. First\n> 2. Second", + expected: "> 1. First\n> 2. Second", + }, + { + name: "nested unordered list", + markdown: "- parent\n - child", + expected: "- parent\n - child", + }, + { + name: "nested ordered list", + markdown: "1. parent\n 1. child", + expected: "1. parent\n 1. child", + }, + { + name: "nested blockquote", + markdown: "> > nested", + expected: "> > nested", + }, + { + name: "mixed content", + markdown: "# Title\n\nSome **bold** text.\n\n- Item 1\n- Item 2\n\n> A quote\n\n```\ncode\n```", + expected: "# Title\n\nSome **bold** text.\n\n- Item 1\n- Item 2\n\n> A quote\n\n```\ncode\n```", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + html := MarkdownToHTML(tt.markdown) + back := HTMLToMarkdown(html) + if back != tt.expected { + t.Errorf("round-trip mismatch\nmarkdown: %q\nhtml: %q\ngot: %q\nwant: %q", tt.markdown, html, back, tt.expected) + } + }) + } +} + func TestHTMLToMarkdown(t *testing.T) { tests := []struct { name string