From 86d33960f579ee7889969f2800e81dd05ae04447 Mon Sep 17 00:00:00 2001
From: Dahyun Lee <dahyun7972@email.com>
Date: Sun, 31 May 2026 18:02:01 +0900
Subject: [PATCH] feat: rich Markdown export + HwpxOxmlNote body helpers

- Add hwpx.tools.markdown_export module and HwpxDocument.export_rich_markdown()
  preserving inline styles (**bold**, *italic*, ~~strike~~), nested tables with
  colspan/rowspan, shape text, BinData image extraction, footnotes/endnotes with
  precise marker positions, hyperlinks, and #/## heading detection.
- Add HwpxOxmlNote.body_paragraph / add_run / add_hyperlink helpers so note
  bodies can be populated with mixed inline formatting and links without
  manual paragraph manipulation.
- Fix add_footnote / add_endnote: the char_pr_id_ref argument was ignored and
  the body run always got charPrIDRef="0".
- Add 27 regression tests covering inline styles, tables, footnotes, note
  helpers, hyperlinks, headings, images, roundtrip, and source-input types.

Validated against real KASA government HWPX fixtures (announcements, application
forms, research reports) and synthetic edge cases.
---
 CHANGELOG.md                      |   6 +
 README.md                         |  41 +++
 src/hwpx/document.py              |   8 +
 src/hwpx/oxml/document.py         |  57 +++-
 src/hwpx/tools/markdown_export.py | 450 ++++++++++++++++++++++++++++++
 tests/test_markdown_export.py     | 343 +++++++++++++++++++++++
 6 files changed, 904 insertions(+), 1 deletion(-)
 create mode 100644 src/hwpx/tools/markdown_export.py
 create mode 100644 tests/test_markdown_export.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d28d545..a618089 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,9 @@
 
 ## [Unreleased]
 ### 추가
+- `hwpx.tools.markdown_export.export_markdown()`와 `HwpxDocument.export_rich_markdown()`을 추가해 풍부한 Markdown 변환을 지원합니다. 인라인 서식(굵게/기울임/색상/하이라이트), 표 병합 셀(colspan/rowspan HTML), 중첩 표 재귀, `rect`/`ellipse`/`polygon` 도형 내부 paragraph, BinData 이미지 추출, `Ⅰ.`/`1.` 패턴 기반 헤딩 감지(`# `/`## `), 각주·미주(정확 위치 마커 + `fn1`/`en1` 일련번호 + 본문 인라인 서식), 하이퍼링크(`[text](url)`) 보존을 한 번에 처리합니다. 기존 `HwpxDocument.export_markdown()`은 그대로 유지됩니다.
+- `HwpxOxmlNote`에 본문 paragraph 접근/편집 helper를 추가했습니다: `body_paragraph` property, `add_run(text, *, char_pr_id_ref=..., bold=..., italic=..., underline=..., attributes=...)`, `add_hyperlink(url, display_text, *, char_pr_id_ref=...)`. XML 직접 조작 없이 각주 본문에 혼합 서식 run과 하이퍼링크를 추가할 수 있습니다.
+- 새 컨버터와 helper에 대한 회귀 테스트 27개를 `tests/test_markdown_export.py`에 추가했습니다 (인라인 서식, 표 병합/중첩, 각주 정확 위치 + 일련번호 + 본문 인라인 서식 + 하이퍼링크, heading 감지, BinData 추출, 라운드트립 charPrIDRef 보존, 입력 형식 4가지).
 - `src/hwpx/tools/_schemas/owpml/`에 2011 Hancom 네임스페이스용 subset XSD 번들을 추가했습니다 (`header.xsd`, `body.xsd`, `paralist.xsd`, `core.xsd`, `xml.xsd`, `NOTICE`).
 - `hwpx.oxml.load_compound_schema()`와 `SchemaImportError`를 추가해 offline compound XSD 로딩을 지원합니다.
 - fixture matrix 기반 Phase 1 validation 리포트(`shared/hwpx/HWPX_STACK_VALIDATION_2026-04-20_pre-phase1.md`, `..._post-phase1.md`)와 회귀 테스트를 추가했습니다.
@@ -13,6 +16,9 @@
 - `HwpxDocument.validate()`는 기본 `strict=False`로 동작하며, `validate_on_save_strict` 옵션으로 저장 시 strict 검증을 제어할 수 있습니다.
 - 패키지 배포물(sdist/wheel)에 OWPML subset schema bundle이 포함되도록 package-data를 확장했습니다.
 
+### 수정
+- `HwpxOxmlParagraph.add_footnote()`/`add_endnote()`의 `char_pr_id_ref` 인자가 외부 호스팅 run에만 적용되고 각주 **본문 run은 항상 `charPrIDRef="0"`** 으로 하드코딩되던 문제를 수정했습니다. 인자가 사용자 의도대로 본문 run에도 적용됩니다. 회귀 테스트: `TestNoteHelpers::test_add_footnote_cpr_applies_to_body`.
+
 ## [2.9.1] - 2026-04-27
 
 상호운용성(interop) 버그 묶음 릴리즈입니다. 외부 기여자들이 보고하고 수정한 세 가지 문제를 정리합니다.
diff --git a/README.md b/README.md
index aeaab17..250a5a7 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,47 @@ hwpx-validate-package 보고서.hwpx
 hwpx-analyze-template 보고서.hwpx
 ```
 
+### 4. 풍부한 Markdown 변환 (서식·표·각주·이미지 보존)
+
+`export_markdown()`는 단순 평문 추출이고, `export_rich_markdown()`는 인라인 서식(`**굵게**`, `*기울임*`, `~~취소선~~`),
+표(중첩 포함, colspan/rowspan 안전), 도형 텍스트, 이미지, 각주/미주, 하이퍼링크, 제목(`#`/`##`) 자동 감지까지 보존한다.
+
+```python
+from hwpx import HwpxDocument
+
+doc = HwpxDocument.open("보고서.hwpx")
+
+md = doc.export_rich_markdown(
+    image_dir="out/images",          # BinData 이미지를 디스크에 추출
+    image_ref_prefix="images/",      # 마크다운 내 ![](images/...) 경로 접두
+    detect_headings=True,            # styleIDRef 기반 #/## 자동
+)
+print(md)
+```
+
+문자열·경로·바이트도 그대로 받는다:
+
+```python
+from hwpx.tools.markdown_export import export_markdown
+
+md = export_markdown("보고서.hwpx")          # 경로
+md = export_markdown(open("a.hwpx", "rb").read())  # bytes
+```
+
+### 5. 각주 본문에 혼합 서식 / 하이퍼링크 추가
+
+`HwpxOxmlNote`에 `body_paragraph`, `add_run`, `add_hyperlink` helper가 있어 각주 본문을
+직접 paragraph로 다루지 않고도 인라인 서식·링크를 손쉽게 채울 수 있다.
+
+```python
+para = section.paragraphs[0]
+note = para.add_footnote("")  # 빈 각주 생성 후 본문 구성
+note.add_run("자세한 내용은 ", )
+note.add_run("정부 공식 사이트", bold=True)
+note.add_run("를 참고하라: ")
+note.add_hyperlink("https://www.kasa.go.kr", "우주항공청")
+```
+
 처음에는 `open/new -> edit/extract -> save_to_path` 흐름만 잡으면 된다. 패키지 구조, XML 파트, 템플릿 회귀 점검은 필요할 때만 확장하면 된다.
 
 ## 어디부터 읽으면 되나
diff --git a/src/hwpx/document.py b/src/hwpx/document.py
index 61396c1..d44bd6e 100644
--- a/src/hwpx/document.py
+++ b/src/hwpx/document.py
@@ -1299,6 +1299,14 @@ def export_markdown(self, **kwargs: object) -> str:
         from .tools.exporter import export_markdown
         return export_markdown(self, **kwargs)  # type: ignore[arg-type]
 
+    def export_rich_markdown(self, **kwargs: object) -> str:
+        """Export rich Markdown preserving inline styles, tables, footnotes, hyperlinks, images, and shape text.
+
+        Keyword args forwarded to :func:`~hwpx.tools.markdown_export.export_markdown`.
+        """
+        from .tools.markdown_export import export_markdown as _rich
+        return _rich(self, **kwargs)  # type: ignore[arg-type]
+
     # ------------------------------------------------------------------
     # Validation
     # ------------------------------------------------------------------
diff --git a/src/hwpx/oxml/document.py b/src/hwpx/oxml/document.py
index e6faf56..5043df8 100644
--- a/src/hwpx/oxml/document.py
+++ b/src/hwpx/oxml/document.py
@@ -1607,6 +1607,58 @@ def text(self, value: str) -> None:
         t.text = _sanitize_text(value)
         self.paragraph.section.mark_dirty()
 
+    @property
+    def body_paragraph(self) -> "HwpxOxmlParagraph":
+        """Return the note's body ``<hp:p>`` wrapped as :class:`HwpxOxmlParagraph`.
+
+        The body lives inside ``<hp:subList>`` and is distinct from
+        :attr:`paragraph`, which is the *hosting* paragraph (where the note
+        marker is inserted). Use this to add runs with mixed formatting
+        directly into the note body:
+
+        >>> note = para.add_footnote("기본 ")
+        >>> note.add_run("청색", char_pr_id_ref=5)
+        """
+        p = self.element.find(f".//{_HP}p")
+        if p is None:
+            raise ValueError("note has no body paragraph element")
+        return HwpxOxmlParagraph(p, self.paragraph.section)
+
+    def add_run(
+        self,
+        text: str = "",
+        *,
+        char_pr_id_ref: str | int | None = None,
+        bold: bool = False,
+        italic: bool = False,
+        underline: bool = False,
+        attributes: dict[str, str] | None = None,
+    ) -> "HwpxOxmlRun":
+        """Append a run to the note body paragraph (delegates to body_paragraph.add_run)."""
+        return self.body_paragraph.add_run(
+            text,
+            char_pr_id_ref=char_pr_id_ref,
+            bold=bold,
+            italic=italic,
+            underline=underline,
+            attributes=attributes,
+        )
+
+    def add_hyperlink(
+        self,
+        url: str,
+        display_text: str,
+        *,
+        char_pr_id_ref: str | int | None = None,
+    ) -> "HwpxOxmlInlineObject":
+        """Append a hyperlink to the note body paragraph.
+
+        Convenience wrapper around ``body_paragraph.add_hyperlink``.
+        """
+        return self.body_paragraph.add_hyperlink(
+            url, display_text, char_pr_id_ref=char_pr_id_ref
+        )
+
 
 def _default_sublist_attributes() -> dict[str, str]:
     """Return standard attributes for a ``<hp:subList>`` element.
@@ -3364,7 +3416,10 @@ def _add_note(
         sublist = _append_child(note_element, f"{_HP}subList", _default_sublist_attributes())
         p_attrs = {"id": _paragraph_id(), **_DEFAULT_PARAGRAPH_ATTRS}
         paragraph = _append_child(sublist, f"{_HP}p", p_attrs)
-        note_run = _append_child(paragraph, f"{_HP}run", {"charPrIDRef": "0"})
+        # 본문 run의 charPrIDRef도 인자를 따라가도록 적용 (host run과 동일 스타일).
+        # None이면 "0"(default).
+        body_cpr = "0" if char_pr_id_ref is None else str(char_pr_id_ref)
+        note_run = _append_child(paragraph, f"{_HP}run", {"charPrIDRef": body_cpr})
         t = _append_child(note_run, f"{_HP}t", {})
         t.text = _sanitize_text(text)
         self.section.mark_dirty()
diff --git a/src/hwpx/tools/markdown_export.py b/src/hwpx/tools/markdown_export.py
new file mode 100644
index 0000000..66858b8
--- /dev/null
+++ b/src/hwpx/tools/markdown_export.py
@@ -0,0 +1,450 @@
+"""Rich HWPX → Markdown converter.
+
+Preserves:
+- 인라인 서식 (bold/italic/color/shade) via run charPrIDRef diff
+- 표 병합 셀 (colspan/rowspan) via HTML
+- 중첩 표 재귀 HTML
+- 도형(rect/ellipse/polygon) 내부 paragraph
+- 이미지 (BinData → ![image](path))
+- 헤딩 (Ⅰ. / 1. 패턴)
+- 각주/미주 정확 위치 + fn1/en1 일련번호 + 본문 인라인 서식
+- 하이퍼링크 [text](url) (fieldBegin/End 추적)
+"""
+
+from __future__ import annotations
+
+import re
+from pathlib import Path
+from typing import Union
+
+from ..document import HwpxDocument
+
+HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
+HC_NS = "http://www.hancom.co.kr/hwpml/2011/core"
+NS = {"hp": HP_NS, "hc": HC_NS}
+
+# 도형은 rect/ellipse/polygon만 순회. drawText/container는 이들의 자식이라
+# 별도 순회하면 같은 paragraph가 중복 처리됨.
+SHAPE_TAGS = ("rect", "ellipse", "polygon")
+
+ROMAN_HEAD = re.compile(r"^\s*[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ]\.\s*.+")
+ARABIC_HEAD = re.compile(r"^\s*\d+\.\s+[가-힣A-Za-z].+")
+
+
+# ──────────────────────────────────────────────────────────────────
+# 인라인 서식
+# ──────────────────────────────────────────────────────────────────
+def _diff_style(cp, base_cp) -> dict:
+    if cp is None:
+        return {}
+    ca, a = cp.child_attributes, cp.attributes
+    base_ca = base_cp.child_attributes if base_cp is not None else {}
+    base_a = base_cp.attributes if base_cp is not None else {}
+
+    bold = "bold" in ca and "bold" not in base_ca
+    italic = "italic" in ca and "italic" not in base_ca
+    underline = (
+        ca.get("underline", {}).get("type", "NONE") != "NONE"
+        and base_ca.get("underline", {}).get("type", "NONE") == "NONE"
+    )
+    color = a.get("textColor", "#000000")
+    base_color = base_a.get("textColor", "#000000")
+    # 흰색은 어두운 배경 위 디자인 효과로 가정 → 시각 의미 없음
+    color_changed = (
+        color != base_color and color.upper() not in ("#000000", "#FFFFFF")
+    )
+    shade = a.get("shadeColor", "none")
+    base_shade = base_a.get("shadeColor", "none")
+    shade_changed = shade.lower() not in ("none", "", base_shade.lower())
+
+    return {
+        "bold": bold,
+        "italic": italic,
+        "underline": underline,
+        "color": color if color_changed else None,
+        "shade": shade if shade_changed else None,
+    }
+
+
+def _wrap(text: str, style: dict) -> str:
+    if not text:
+        return ""
+    out = text
+    if style.get("shade"):
+        out = f'<mark style="background-color:{style["shade"]}">{out}</mark>'
+    if style.get("color"):
+        out = f'<span style="color:{style["color"]}">{out}</span>'
+    if style.get("underline"):
+        out = f"<u>{out}</u>"
+    if style.get("italic"):
+        out = f"*{out}*"
+    if style.get("bold"):
+        out = f"**{out}**"
+    return out
+
+
+def _style_key(style: dict) -> tuple:
+    return tuple(sorted((k, v) for k, v in style.items() if v))
+
+
+def _render_runs(items, base_cp, chars) -> str:
+    """[(cpr_id, text)] 시퀀스를 인접 동일 서식 머지 후 markdown으로."""
+    groups: list[tuple[tuple, str]] = []
+    for cpr, text in items:
+        if not text:
+            continue
+        cp = chars.get(str(cpr), base_cp)
+        style = _diff_style(cp, base_cp)
+        key = _style_key(style)
+        if groups and groups[-1][0] == key:
+            groups[-1] = (key, groups[-1][1] + text)
+        else:
+            groups.append((key, text))
+    return "".join(_wrap(text, dict(key)) for key, text in groups)
+
+
+# ──────────────────────────────────────────────────────────────────
+# 이미지 매핑
+# ──────────────────────────────────────────────────────────────────
+def _build_image_map(
+    doc: HwpxDocument,
+    image_dir: Path | None,
+    image_ref_prefix: str | None,
+) -> dict[str, str]:
+    """doc._package의 BinData/* 를 image_dir에 추출하고 {ref_stem → rel_path} 반환.
+    image_dir이 None이면 추출 없이 빈 dict (마크다운에 ![image]() 안 들어감).
+    """
+    if image_dir is None:
+        return {}
+    image_dir = Path(image_dir)
+    image_dir.mkdir(parents=True, exist_ok=True)
+    prefix = image_ref_prefix if image_ref_prefix is not None else image_dir.name
+    mapping: dict[str, str] = {}
+    pkg = doc._package
+    for name in pkg.files():
+        if not name.startswith("BinData/"):
+            continue
+        data = pkg.read(name)
+        fname = Path(name).name
+        (image_dir / fname).write_bytes(data)
+        mapping[Path(name).stem] = f"{prefix}/{fname}" if prefix else fname
+    return mapping
+
+
+def _paragraph_images(p_el, mapping: dict[str, str]) -> list[str]:
+    """paragraph element 안 모든 <hp:pic> → markdown 이미지 라인."""
+    out = []
+    for pic in p_el.findall(".//hp:pic", NS):
+        img = pic.find(".//hc:img", NS)
+        if img is None:
+            continue
+        ref = img.get("binaryItemIDRef")
+        if not ref or not mapping:
+            continue
+        rel = mapping.get(ref, f"BinData/{ref}")
+        out.append(f"![image]({rel})")
+    return out
+
+
+# ──────────────────────────────────────────────────────────────────
+# Paragraph element → markdown (재귀 진입점)
+# ──────────────────────────────────────────────────────────────────
+def _p_element_to_md(p_el, doc, notes_out: list | None = None) -> str:
+    chars = doc._root.char_properties
+    base_cp = chars.get("0")
+    HP = f"{{{HP_NS}}}"
+
+    output: list[str] = []
+    items: list[tuple] = []
+    link_url: str | None = None
+    link_items: list[tuple] = []
+
+    def flush_items():
+        nonlocal items
+        if items:
+            output.append(_render_runs(items, base_cp, chars))
+            items = []
+
+    def flush_link():
+        nonlocal link_url, link_items
+        if link_url is None:
+            return
+        text = _render_runs(link_items, base_cp, chars)
+        if text:
+            output.append(f"[{text}]({link_url})" if link_url else text)
+        link_url = None
+        link_items = []
+
+    def push_text(cpr, text):
+        if link_url is not None:
+            link_items.append((cpr, text))
+        else:
+            items.append((cpr, text))
+
+    for run in p_el.findall(f"{HP}run"):
+        cpr = run.get("charPrIDRef", "0")
+        for child in run:
+            tag = child.tag.replace(HP, "")
+            if tag == "t":
+                if child.text:
+                    push_text(cpr, child.text)
+            elif tag == "ctrl":
+                for gc in child:
+                    gctag = gc.tag.replace(HP, "")
+                    if gctag == "fieldBegin" and gc.get("type") == "HYPERLINK":
+                        flush_items()
+                        link_url = gc.get("name", "")
+                    elif gctag == "fieldEnd":
+                        flush_link()
+            elif tag in ("footNote", "endNote"):
+                inst_id = child.get("instId", "")
+                kind = "fn" if tag == "footNote" else "en"
+                marker = f"[^{kind}{inst_id}]"
+                if link_url is not None:
+                    flush_link()
+                else:
+                    flush_items()
+                output.append(marker)
+                if notes_out is not None:
+                    body_parts = []
+                    for fp in child.findall(f".//{HP}p"):
+                        sub_md = _p_element_to_md(fp, doc, None).strip()
+                        if sub_md:
+                            body_parts.append(sub_md)
+                    notes_out.append((kind, inst_id, " ".join(body_parts)))
+
+    flush_items()
+    flush_link()
+    return "".join(output)
+
+
+# ──────────────────────────────────────────────────────────────────
+# 도형 / 셀 / 표
+# ──────────────────────────────────────────────────────────────────
+def _shape_text_lines(scope_el, doc, notes_out: list | None = None) -> list[str]:
+    lines: list[str] = []
+    seen_p = set()
+    for tag in SHAPE_TAGS:
+        for shape in scope_el.findall(f".//hp:{tag}", NS):
+            for sub_p in shape.findall(f".//hp:p", NS):
+                pid = id(sub_p)
+                if pid in seen_p:
+                    continue
+                seen_p.add(pid)
+                md = _p_element_to_md(sub_p, doc, notes_out).strip()
+                if md:
+                    lines.append(md)
+    return lines
+
+
+def _cell_to_md(cell, doc, mapping, depth: int = 0, notes_out: list | None = None) -> str:
+    chunks: list[str] = []
+    for cp in cell.paragraphs:
+        md = _p_element_to_md(cp.element, doc, notes_out).strip()
+        imgs = _paragraph_images(cp.element, mapping)
+        shape_lines = _shape_text_lines(cp.element, doc, notes_out)
+        if md:
+            chunks.append(md)
+        chunks.extend(shape_lines)
+        chunks.extend(imgs)
+        for sub in cp.tables:
+            chunks.append(_table_to_md(sub, doc, mapping, depth + 1, notes_out))
+    return "<br>".join(c for c in chunks if c).strip()
+
+
+def _table_to_md(tbl, doc, mapping, depth: int = 0, notes_out: list | None = None) -> str:
+    grid = tbl.get_cell_map()
+    rows, cols = tbl.row_count, tbl.column_count
+    has_merge = any(not pos.is_anchor for row in grid for pos in row)
+
+    if has_merge or depth > 0:
+        # 병합 셀 또는 중첩 — HTML
+        out = ["<table>"]
+        for r in range(rows):
+            out.append("<tr>")
+            for c in range(cols):
+                pos = grid[r][c]
+                if not pos.is_anchor:
+                    continue
+                col_end = c
+                while (
+                    col_end + 1 < cols
+                    and not grid[r][col_end + 1].is_anchor
+                    and grid[r][col_end + 1].cell is pos.cell
+                ):
+                    col_end += 1
+                row_end = r
+                while (
+                    row_end + 1 < rows
+                    and not grid[row_end + 1][c].is_anchor
+                    and grid[row_end + 1][c].cell is pos.cell
+                ):
+                    row_end += 1
+                colspan = col_end - c + 1
+                rowspan = row_end - r + 1
+                attrs = []
+                if colspan > 1:
+                    attrs.append(f'colspan="{colspan}"')
+                if rowspan > 1:
+                    attrs.append(f'rowspan="{rowspan}"')
+                attr_s = (" " + " ".join(attrs)) if attrs else ""
+                content = _cell_to_md(pos.cell, doc, mapping, depth + 1, notes_out)
+                tag = "th" if r == 0 else "td"
+                out.append(f"<{tag}{attr_s}>{content}</{tag}>")
+            out.append("</tr>")
+        out.append("</table>")
+        return "\n".join(out)
+
+    # 단순 — GFM
+    lines = []
+    for r in range(rows):
+        cells = [
+            _cell_to_md(grid[r][c].cell, doc, mapping, depth + 1, notes_out)
+            for c in range(cols)
+        ]
+        lines.append("| " + " | ".join(cells) + " |")
+        if r == 0:
+            lines.append("| " + " | ".join(["---"] * cols) + " |")
+    return "\n".join(lines)
+
+
+# ──────────────────────────────────────────────────────────────────
+# 헤딩 감지
+# ──────────────────────────────────────────────────────────────────
+def _detect_heading(text: str) -> str | None:
+    plain = re.sub(r"\*\*|<[^>]+>|\*", "", text.strip())
+    if ROMAN_HEAD.match(plain):
+        return f"# {plain}"
+    if ARABIC_HEAD.match(plain) and len(plain) < 40:
+        return f"## {plain}"
+    return None
+
+
+# ──────────────────────────────────────────────────────────────────
+# Public API
+# ──────────────────────────────────────────────────────────────────
+def export_markdown(
+    source: Union[HwpxDocument, str, Path, bytes],
+    *,
+    image_dir: Union[str, Path, None] = None,
+    image_ref_prefix: str | None = None,
+    detect_headings: bool = True,
+    notes_section_separator: str = "\n\n---\n",
+) -> str:
+    """HWPX → rich markdown.
+
+    Parameters
+    ----------
+    source : HwpxDocument | path | bytes
+        HwpxDocument 인스턴스 또는 파일 경로/바이트.
+    image_dir : path | None
+        BinData/* 추출 대상 디렉토리. None이면 이미지 마커 생성하지 않음.
+    image_ref_prefix : str | None
+        markdown 이미지 경로의 prefix. None이면 image_dir의 basename.
+    detect_headings : bool
+        Ⅰ./1. 패턴 감지로 `#`/`##` 헤딩 격상 여부.
+    notes_section_separator : str
+        각주/미주 정의 부록 앞에 삽입할 separator.
+    """
+    if isinstance(source, HwpxDocument):
+        doc = source
+    elif isinstance(source, (bytes, bytearray)):
+        import io
+        doc = HwpxDocument.open(io.BytesIO(source))
+    else:
+        doc = HwpxDocument.open(str(source))
+
+    mapping = _build_image_map(doc, Path(image_dir) if image_dir else None, image_ref_prefix)
+    notes: list[tuple] = []
+    lines: list[str] = []
+
+    for section in doc.sections:
+        for p in section.paragraphs:
+            md = _p_element_to_md(p.element, doc, notes).strip()
+            imgs = _paragraph_images(p.element, mapping)
+            tables = [_table_to_md(t, doc, mapping, 0, notes) for t in p.tables]
+
+            # 중복 가드 1: paragraph text가 표 셀 안에 동일하게 들어있으면 표가 정식
+            if md and p.tables:
+                plain = (p.text or "").strip()
+                all_cell_text = "".join(
+                    (cell.text or "")
+                    for tbl in p.tables
+                    for row in tbl.rows
+                    for cell in row.cells
+                )
+                if plain and plain in all_cell_text:
+                    md = ""
+
+            # 중복 가드 2: 도형 보유 시 paragraph text는 도형 텍스트의 흘러나옴
+            if md and any(
+                p.element.find(f".//hp:{tag}", NS) is not None
+                for tag in SHAPE_TAGS
+            ):
+                md = ""
+
+            # 도형 내부 paragraph 추출 (표 안 도형은 cell_to_md에서 처리됨)
+            shape_lines: list[str] = []
+            seen_p = set()
+            for sub in p.tables:
+                for nested_p in sub.element.findall(".//hp:p", NS):
+                    seen_p.add(id(nested_p))
+            for tag in SHAPE_TAGS:
+                for shape in p.element.findall(f".//hp:{tag}", NS):
+                    for sub_p in shape.findall(".//hp:p", NS):
+                        pid = id(sub_p)
+                        if pid in seen_p:
+                            continue
+                        seen_p.add(pid)
+                        sub_md = _p_element_to_md(sub_p, doc, notes).strip()
+                        if sub_md:
+                            shape_lines.append(sub_md)
+
+            # 헤딩 감지 (1x1 표 셀에 있는 경우 포함)
+            promoted = None
+            if detect_headings:
+                if md:
+                    promoted = _detect_heading(md)
+                elif p.tables and len(p.tables) == 1:
+                    t = p.tables[0]
+                    if t.row_count == 1 and t.column_count == 1:
+                        cell_text = _cell_to_md(
+                            t.rows[0].cells[0], doc, mapping, 0, notes
+                        )
+                        promoted = _detect_heading(cell_text)
+                        if promoted:
+                            lines.append(promoted)
+                            continue
+
+            if promoted:
+                lines.append(promoted)
+            elif md:
+                lines.append(md)
+            lines.extend(shape_lines)
+            lines.extend(imgs)
+            lines.extend(tables)
+
+    body = "\n\n".join(lines)
+
+    # 각주/미주 instId → fn1/en1 일련번호 매핑 + 정의 부록
+    if notes:
+        seq_map: dict[str, dict[str, int]] = {"fn": {}, "en": {}}
+        for kind, inst_id, _ in notes:
+            if inst_id not in seq_map[kind]:
+                seq_map[kind][inst_id] = len(seq_map[kind]) + 1
+
+        for kind, m in seq_map.items():
+            for inst_id, seq in m.items():
+                body = body.replace(f"[^{kind}{inst_id}]", f"[^{kind}{seq}]")
+
+        body += notes_section_separator
+        seen = set()
+        for kind, inst_id, text in notes:
+            key = (kind, inst_id)
+            if key in seen:
+                continue
+            seen.add(key)
+            seq = seq_map[kind][inst_id]
+            body += f"\n[^{kind}{seq}]: {text}\n"
+
+    return body
diff --git a/tests/test_markdown_export.py b/tests/test_markdown_export.py
new file mode 100644
index 0000000..e354c9c
--- /dev/null
+++ b/tests/test_markdown_export.py
@@ -0,0 +1,343 @@
+"""Tests for hwpx.tools.markdown_export — rich Markdown converter."""
+
+from __future__ import annotations
+
+import zipfile
+from pathlib import Path
+
+import lxml.etree as ET
+import pytest
+
+from hwpx import HwpxDocument
+from hwpx.tools.markdown_export import export_markdown
+
+HP = "{http://www.hancom.co.kr/hwpml/2011/paragraph}"
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+EXAMPLES = REPO_ROOT / "examples"
+FIXTURES = REPO_ROOT / "shared" / "hwpx" / "fixtures"
+
+
+# ──────────────────────────────────────────────────────────────────
+# Static fixtures
+# ──────────────────────────────────────────────────────────────────
+@pytest.fixture
+def showcase_doc():
+    return HwpxDocument.open(str(EXAMPLES / "FormattingShowcase.hwpx"))
+
+
+@pytest.fixture
+def table_merge_doc():
+    return HwpxDocument.open(str(FIXTURES / "tables" / "30_table_merge_min.hwpx"))
+
+
+@pytest.fixture
+def stress_doc():
+    return HwpxDocument.open(str(FIXTURES / "stress" / "99_all_in_one_stress.hwpx"))
+
+
+# ──────────────────────────────────────────────────────────────────
+# Dynamic fixtures (notes/hyperlinks/styled notes)
+# ──────────────────────────────────────────────────────────────────
+@pytest.fixture
+def notes_doc():
+    """각주·미주·하이퍼링크 + 인접 위치 각주."""
+    doc = HwpxDocument.new()
+    sec = doc.sections[0]
+
+    p1 = sec.add_paragraph()
+    p1.add_run("우리나라는 누리호")
+    p1.add_footnote("누리호: 한국형 발사체")
+    p1.add_run("와 다누리호")
+    p1.add_footnote("다누리호: 달 궤도선")
+    p1.add_run("를 보유한 우주강국이다.")
+
+    p2 = sec.add_paragraph("국가 R&D 투자는 ")
+    p2.add_run("19년간", bold=True)
+    p2.add_footnote("'06~'24 누적")
+    p2.add_run(" 8조 7,931억원에 달한다.")
+
+    p3 = sec.add_paragraph("자세한 정책은 ")
+    p3.add_hyperlink("https://www.kasa.go.kr", "우주항공청 홈페이지")
+    p3.add_run("에 공개되어 있다")
+    p3.add_endnote("KASA, 2024.5.30 발표")
+    p3.add_run(".")
+    return doc
+
+
+@pytest.fixture
+def styled_note_doc():
+    """각주 본문에 서식이 섞인 fixture — HwpxOxmlNote.add_run helper 사용."""
+    doc = HwpxDocument.new()
+    sec = doc.sections[0]
+    p = sec.add_paragraph("혼합 서식 각주")
+    n = p.add_footnote("기본 ")
+    n.add_run("청색", char_pr_id_ref=5)
+    n.add_run(" + 일반")
+    return doc
+
+
+# ──────────────────────────────────────────────────────────────────
+# 인라인 서식
+# ──────────────────────────────────────────────────────────────────
+class TestInlineStyles:
+    def test_bold_and_color_preserved(self, showcase_doc):
+        md = export_markdown(showcase_doc)
+        # "굵은 텍스트"는 bold + 청색 #1F4E79
+        assert "**<span style=\"color:#1F4E79\">굵은 텍스트</span>**" in md or \
+               "<span style=\"color:#1F4E79\">굵은 텍스트</span>" in md
+        # "기울임"은 italic + 보라
+        assert "*<span style=\"color:#7030A0\">기울임</span>*" in md
+        # "강조 표시"는 노란 하이라이트
+        assert '<mark style="background-color:#FFF2CC">강조 표시</mark>' in md
+        # 빨간 글자
+        assert "<span style=\"color:#C00000\">색상이 다른 코드 샘플</span>" in md
+
+    def test_adjacent_same_style_merged(self, notes_doc):
+        # "19년간"이 단일 bold로 머지되어야 (마커가 끊기지 않음)
+        md = export_markdown(notes_doc)
+        assert "**19년간**" in md
+        assert "**1****9****년간**" not in md
+
+    def test_white_color_ignored(self):
+        # PoC와 동일하게 #FFFFFF는 색상 마커 없이 plain
+        doc = HwpxDocument.new()
+        p = doc.sections[0].add_paragraph()
+        # cpr이 #FFFFFF인 가상 시나리오는 만들기 어려우니, 단순히 출력에 흰색 span이 없음을 회귀로
+        p.add_run("테스트")
+        md = export_markdown(doc)
+        assert "#FFFFFF" not in md
+
+
+# ──────────────────────────────────────────────────────────────────
+# 표
+# ──────────────────────────────────────────────────────────────────
+class TestTables:
+    def test_merge_uses_html_with_colspan(self, table_merge_doc):
+        md = export_markdown(table_merge_doc)
+        assert "<table>" in md
+        assert 'colspan="2"' in md
+        assert 'rowspan="2"' in md
+
+    def test_nested_table_recursion(self, stress_doc):
+        md = export_markdown(stress_doc)
+        # 외부 표 안에 또 다른 <table> 들어있어야 (재귀 처리)
+        count = md.count("<table>")
+        assert count >= 2, f"expected >=2 <table>, got {count}"
+
+
+# ──────────────────────────────────────────────────────────────────
+# 각주 / 미주
+# ──────────────────────────────────────────────────────────────────
+class TestFootnotes:
+    def test_marker_precise_position(self, notes_doc):
+        md = export_markdown(notes_doc)
+        # "누리호" 직후 + "다누리호" 직후 각각 마커
+        assert "누리호[^fn1]" in md
+        assert "다누리호[^fn2]" in md
+
+    def test_sequence_ids(self, notes_doc):
+        md = export_markdown(notes_doc)
+        # fn1, fn2, fn3 + en1
+        assert "[^fn1]" in md
+        assert "[^fn2]" in md
+        assert "[^fn3]" in md
+        assert "[^en1]" in md
+        # 정의 부록
+        assert "[^fn1]:" in md
+        assert "[^en1]:" in md
+
+    def test_endnote_separate_counter(self, notes_doc):
+        md = export_markdown(notes_doc)
+        # 각주 3개 + 미주 1개 — en1이 fn4가 되지 않고 별도 카운터
+        assert "[^fn1]" in md
+        assert "[^en1]" in md
+        # fn4 정의는 없어야 함
+        assert "[^fn4]:" not in md
+
+    def test_footnote_body_inline_style(self, styled_note_doc):
+        md = export_markdown(styled_note_doc)
+        # 각주 본문에 cpr=5(청색) 마커가 살아있어야 함
+        # cpr=5의 textColor = #2E74B5 (HwpxDocument.new() default)
+        assert "#2E74B5" in md, f"각주 본문 색상 마커 누락:\n{md}"
+
+
+# ──────────────────────────────────────────────────────────────────
+# HwpxOxmlNote helper API (body_paragraph / add_run / cpr propagation)
+# ──────────────────────────────────────────────────────────────────
+class TestNoteHelpers:
+    def test_body_paragraph_distinct_from_host(self):
+        doc = HwpxDocument.new()
+        p = doc.sections[0].add_paragraph("본문")
+        n = p.add_footnote("각주 본문")
+        # host: sec의 직속 paragraph (p와 동일)
+        assert n.paragraph is p
+        # body: footNote 안 paragraph (다른 element)
+        body = n.body_paragraph
+        assert body.element is not p.element
+        assert body.element.getparent().tag.endswith("subList")
+        assert body.text == "각주 본문"
+
+    def test_add_run_appends_to_body(self):
+        doc = HwpxDocument.new()
+        p = doc.sections[0].add_paragraph("본문")
+        n = p.add_footnote("기본 ")
+        n.add_run("추가", char_pr_id_ref=5)
+        # 본문에 run 2개 — "기본 "와 "추가"
+        body_runs = n.body_paragraph.runs
+        assert len(body_runs) == 2
+        assert body_runs[1].text == "추가"
+        assert body_runs[1].char_pr_id_ref == "5"
+
+    def test_add_footnote_cpr_applies_to_body(self):
+        doc = HwpxDocument.new()
+        p = doc.sections[0].add_paragraph("본문")
+        n = p.add_footnote("청색 각주", char_pr_id_ref=5)
+        # 본문 run의 cpr이 5
+        body_runs = n.body_paragraph.runs
+        assert body_runs[0].char_pr_id_ref == "5"
+
+    def test_styled_note_markdown_preserves_color(self, styled_note_doc):
+        md = export_markdown(styled_note_doc)
+        # 본문 청색 cpr=5 → #2E74B5 마커
+        assert "#2E74B5" in md
+        assert "기본 <span style=\"color:#2E74B5\">청색</span> + 일반" in md
+
+    def test_add_hyperlink_helper(self):
+        doc = HwpxDocument.new()
+        p = doc.sections[0].add_paragraph("본문")
+        n = p.add_footnote("기본 ")
+        n.add_hyperlink("https://example.com", "여기")
+        # body에 hyperlink XML이 정확히 들어갔는지
+        body_el = n.body_paragraph.element
+        assert body_el.find(f".//{HP}fieldBegin") is not None
+
+    def test_hyperlink_inside_footnote_body(self):
+        """각주 본문에 하이퍼링크 → 마커는 정확 위치, 본문은 [text](url)."""
+        doc = HwpxDocument.new()
+        sec = doc.sections[0]
+        p = sec.add_paragraph("본문 시작 ")
+        n = p.add_footnote("자세한 정보는 ")
+        n.add_hyperlink("https://example.com", "여기")
+        n.add_run("를 참고")
+        p.add_run("본문 끝")
+
+        md = export_markdown(doc)
+        # 마커는 paragraph 안 정확 위치 (본문 시작 뒤, 본문 끝 앞)
+        assert "본문 시작 [^fn1]본문 끝" in md
+        # 정의에 markdown link 포함
+        assert "[^fn1]: 자세한 정보는 [여기](https://example.com)를 참고" in md
+
+
+# ──────────────────────────────────────────────────────────────────
+# 하이퍼링크
+# ──────────────────────────────────────────────────────────────────
+class TestHyperlinks:
+    def test_markdown_link(self, notes_doc):
+        md = export_markdown(notes_doc)
+        # [text](url) 변환 확인
+        assert "[우주항공청 홈페이지](https://www.kasa.go.kr)" in md
+
+
+# ──────────────────────────────────────────────────────────────────
+# 헤딩 감지
+# ──────────────────────────────────────────────────────────────────
+class TestHeadings:
+    def test_roman_promoted_to_h1(self):
+        doc = HwpxDocument.new()
+        doc.sections[0].add_paragraph("Ⅰ. 개요")
+        md = export_markdown(doc)
+        assert md.lstrip().startswith("# Ⅰ. 개요")
+
+    def test_arabic_promoted_to_h2(self):
+        doc = HwpxDocument.new()
+        doc.sections[0].add_paragraph("1. 수립 배경 및 대상사업")
+        md = export_markdown(doc)
+        assert "## 1. 수립 배경 및 대상사업" in md
+
+    def test_detect_headings_disabled(self):
+        doc = HwpxDocument.new()
+        doc.sections[0].add_paragraph("Ⅰ. 개요")
+        md = export_markdown(doc, detect_headings=False)
+        assert "# Ⅰ" not in md
+        assert "Ⅰ. 개요" in md
+
+
+# ──────────────────────────────────────────────────────────────────
+# 이미지
+# ──────────────────────────────────────────────────────────────────
+class TestImages:
+    def test_no_image_dir_means_no_markers(self, showcase_doc, tmp_path):
+        md = export_markdown(showcase_doc, image_dir=None)
+        assert "![image]" not in md
+
+    def test_image_dir_extracts_bindata(self, table_merge_doc, tmp_path):
+        img_dir = tmp_path / "imgs"
+        export_markdown(table_merge_doc, image_dir=img_dir, image_ref_prefix="imgs")
+        # BinData가 없는 fixture라도 호출 자체는 성공
+        assert img_dir.exists()
+
+
+# ──────────────────────────────────────────────────────────────────
+# 라운드트립 (charPrIDRef 보존)
+# ──────────────────────────────────────────────────────────────────
+class TestRoundtrip:
+    def test_charpridref_preserved_after_replace(self, tmp_path, notes_doc):
+        src = tmp_path / "src.hwpx"
+        notes_doc.save_to_path(str(src))
+
+        def snap(d):
+            return [
+                (r.char_pr_id_ref, r.text or "")
+                for s in d.sections
+                for p in s.paragraphs
+                for r in p.runs
+            ]
+
+        before = snap(HwpxDocument.open(str(src)))
+        doc = HwpxDocument.open(str(src))
+        doc.replace_text_in_runs("우주", "★우주★")
+        dst = tmp_path / "dst.hwpx"
+        doc.save_to_path(str(dst))
+        after = snap(HwpxDocument.open(str(dst)))
+
+        assert len(before) == len(after)
+        # cpr 완전 보존
+        loss = sum(1 for b, a in zip(before, after) if b[0] != a[0])
+        assert loss == 0, f"charPrIDRef lost in {loss} runs"
+
+    def test_modified_document_still_exports(self, tmp_path, notes_doc):
+        src = tmp_path / "src.hwpx"
+        notes_doc.save_to_path(str(src))
+        doc = HwpxDocument.open(str(src))
+        doc.replace_text_in_runs("우주", "★우주★")
+        dst = tmp_path / "dst.hwpx"
+        doc.save_to_path(str(dst))
+        # 라운드트립 후에도 markdown 변환 성공 + 각주/링크 보존
+        md = export_markdown(HwpxDocument.open(str(dst)))
+        assert "[^fn1]" in md
+        # 치환이 link display text 안에서도 일어남 (의도된 동작)
+        assert "](https://www.kasa.go.kr)" in md
+        assert "★우주★항공청 홈페이지" in md
+
+
+# ──────────────────────────────────────────────────────────────────
+# 통합 — 다양한 입력 형태
+# ──────────────────────────────────────────────────────────────────
+class TestSourceInputs:
+    def test_accepts_document_instance(self, showcase_doc):
+        md = export_markdown(showcase_doc)
+        assert len(md) > 0
+
+    def test_accepts_path_string(self):
+        md = export_markdown(str(EXAMPLES / "FormattingShowcase.hwpx"))
+        assert len(md) > 0
+
+    def test_accepts_pathlib(self):
+        md = export_markdown(EXAMPLES / "FormattingShowcase.hwpx")
+        assert len(md) > 0
+
+    def test_via_document_method(self, showcase_doc):
+        md = showcase_doc.export_rich_markdown()
+        direct = export_markdown(showcase_doc)
+        assert md == direct