Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Versioning:
## [Unreleased]

### Added
- **PPTX/HWPX/XLSX 조건·표현식·필터** — 단순 `{{key}}` 치환에 더해
`{% if %}` 조건, `{{ price * qty }}` 표현식, `{{ x|length }}` 필터를 지원
(셀/문단 단위 jinja2, docx 의 Jinja 와 통일). 단순 `{{key}}` 는 기존 빠른
경로 보존. 구조적 표 행 루프(`{%tr%}`)는 여전히 DOCX 전용.
- **`get_form_fields()` + inspect 중복 라벨 힌트** — 라벨 후보와 *중복(dot-path 필요)*
여부를 미리 보여준다. `inspect_document` 응답에 `duplicate_labels` + 힌트를 추가해
LLM 이 fill_form 전에 dot-path 필요를 인지(ambiguous 재시도 라운드 절감).
Expand Down
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,12 @@ LLM은 이 preview를 보고 **"빈 셀이 어디 있는지 / 어떤 값을 넣
└─────────────────────┴─────┴─────┘
```

### PPTX / HWPX — 단순 `{{key}}` 치환만
### PPTX / HWPX / XLSX — `{{key}}` + 조건/표현식 (v0.11+)

loop / if / filter는 지원하지 않습니다. PPTX는 placeholder가 여러 `run`으로 쪼개질 수 있어, 어댑터가 paragraph 전체 텍스트를 재조립한 뒤 첫 `run`에 다시 담는 방식으로 처리합니다 (서식 일부 손실 가능).
단순 `{{key}}` 치환에 더해 **조건(`{% if %}`)·표현식(`{{ price * qty }}`)·필터(`{{ x|length }}`)** 를 지원합니다 (셀/문단 단위 jinja2 렌더 — docx 의 Jinja 와 통일). 단순 `{{key}}` 만 있을 때는 기존 빠른 치환 경로를 그대로 씁니다.

- **제약**: 표 행 반복(`{%tr for%}` 같은 구조적 row 루프)은 DOCX(docxtpl)만 지원합니다. PPTX/HWPX/XLSX 의 조건/표현식은 *한 셀/문단 블록 내부* 범위입니다.
- PPTX는 placeholder가 여러 `run`으로 쪼개질 수 있어, 어댑터가 paragraph 전체 텍스트를 재조립한 뒤 첫 `run`에 다시 담는 방식으로 처리합니다 (서식 일부 손실 가능).

### 누락 키 처리 — 3포맷 동일 (`on_missing`)

Expand Down
44 changes: 44 additions & 0 deletions document_adapter/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,20 @@

_LABEL_NORMALIZE_RE = re.compile(r"[\s·・//\-::()\[\]<>*#]+")

# 단순 {{key}} 치환용 패턴 (loops/conditions 없는 빠른 경로).
_TEMPLATE_TAG = re.compile(r"\{\{\s*(\w+)\s*\}\}")
# 단순 식별자 {{ name }} 이 *아닌* {{ ... }} (표현식·필터 등) 탐지.
_EXPR_TAG = re.compile(r"\{\{(?!\s*\w+\s*\}\})")


def _has_template(text: str) -> bool:
return "{{" in text or "{%" in text


def _needs_jinja(text: str) -> bool:
"""조건/루프({% %}) 또는 표현식({{ a*b }}) 이 있으면 jinja 가 필요."""
return "{%" in text or _EXPR_TAG.search(text) is not None


def _estimate_text_width_cm(text: str) -> float:
"""10pt 기준 대략적인 글자폭 합(cm). 한글/CJK ~0.35cm, 그 외 ~0.20cm.
Expand Down Expand Up @@ -439,6 +453,36 @@ def _render_report(self, placeholders: list[str], context: dict[str, Any],
raise ValueError(f"missing placeholders: {missing}")
return {"used": used, "missing": missing}

def _render_text_block(self, text: str, context: dict[str, Any],
on_missing: str) -> str:
"""한 텍스트 블록(셀/문단) 렌더. pptx/hwpx/xlsx 공통.

- ``{% %}`` (조건/루프/표현식) 가 없으면 단순 {{key}} 치환(기존 동작 보존).
- 있으면 jinja2 로 렌더(조건·표현식·필터 지원) — docx(docxtpl)와 통일.
on_missing 은 jinja undefined 로 매핑(blank→"", leave→{{key}}, error→예외).
"""
if not _needs_jinja(text):
def repl(m: "re.Match[str]") -> str:
key = m.group(1)
if key in context:
return str(context[key])
return "" if on_missing == "blank" else m.group(0)
return _TEMPLATE_TAG.sub(repl, text)

import jinja2
undef = {
"blank": jinja2.Undefined,
"leave": jinja2.DebugUndefined,
"error": jinja2.StrictUndefined,
}[on_missing]
env = jinja2.Environment(undefined=undef, autoescape=False)
try:
return env.from_string(text).render(context)
except jinja2.TemplateSyntaxError:
return text # 유효한 템플릿이 아니면 원본 유지
except jinja2.UndefinedError as e:
raise ValueError(str(e)) from e

@abstractmethod
def set_cell(self, table_index: int, row: int, col: int, value: str,
*, allow_merge_redirect: bool = False) -> str:
Expand Down
11 changes: 3 additions & 8 deletions document_adapter/hwpx_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
NotImplementedForFormat,
TableIndexError,
TableSchema,
_has_template,
)

TAG_PATTERN = re.compile(r"\{\{\s*(\w+)\s*\}\}")
Expand Down Expand Up @@ -291,17 +292,11 @@ def render_template(self, context: dict[str, Any], *,
"""
report = self._render_report(self.get_placeholders(), context, on_missing)

def repl(m: "re.Match[str]") -> str:
key = m.group(1)
if key in context:
return str(context[key])
return "" if on_missing == "blank" else m.group(0)

def substitute(p: etree._Element) -> bool:
text = paragraph_text(p)
if not TAG_PATTERN.search(text):
if not _has_template(text):
return False
set_paragraph_text(p, TAG_PATTERN.sub(repl, text))
set_paragraph_text(p, self._render_text_block(text, context, on_missing))
return True

for section_name, root in self._pkg.iter_section_roots():
Expand Down
12 changes: 3 additions & 9 deletions document_adapter/pptx_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
ShapeInfo,
TableIndexError,
TableSchema,
_has_template,
)

TAG_PATTERN = re.compile(r"\{\{\s*(\w+)\s*\}\}")
Expand Down Expand Up @@ -351,19 +352,12 @@ def render_template(self, context: dict[str, Any], *,
paragraph 전체 텍스트를 재조립 후 첫 run에 담는다 (서식 일부 손실 가능).
누락 키 처리는 on_missing 정책을 따른다 (base 참조)."""
report = self._render_report(self.get_placeholders(), context, on_missing)

def repl(m: "re.Match[str]") -> str:
key = m.group(1)
if key in context:
return str(context[key])
return "" if on_missing == "blank" else m.group(0)

for tf in self._iter_text_frames():
for para in tf.paragraphs:
full_text = "".join(run.text for run in para.runs)
if not TAG_PATTERN.search(full_text):
if not _has_template(full_text):
continue
rendered = TAG_PATTERN.sub(repl, full_text)
rendered = self._render_text_block(full_text, context, on_missing)
if para.runs:
para.runs[0].text = rendered
for run in para.runs[1:]:
Expand Down
13 changes: 4 additions & 9 deletions document_adapter/xlsx_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
MergedCellWriteError,
TableIndexError,
TableSchema,
_has_template,
)

TAG_PATTERN = re.compile(r"\{\{\s*(\w+)\s*\}\}")
Expand Down Expand Up @@ -220,18 +221,12 @@ def get_cell(self, table_index: int, row: int, col: int) -> CellContent:
def render_template(self, context: dict[str, Any], *,
on_missing: str = "blank") -> dict[str, list[str]]:
report = self._render_report(self.get_placeholders(), context, on_missing)

def repl(m: "re.Match[str]") -> str:
key = m.group(1)
if key in context:
return str(context[key])
return "" if on_missing == "blank" else m.group(0)

for ws in self._wb.worksheets:
for row in ws.iter_rows():
for cell in row:
if isinstance(cell.value, str) and TAG_PATTERN.search(cell.value):
cell.value = TAG_PATTERN.sub(repl, cell.value)
if isinstance(cell.value, str) and _has_template(cell.value):
cell.value = self._render_text_block(
cell.value, context, on_missing)
return report

def _resolve_writable(self, ws, row: int, col: int,
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ dependencies = [
"docxtpl>=0.20",
"python-pptx>=1.0",
"openpyxl>=3.1",
"jinja2>=3.0",
"lxml>=5.0",
"mcp>=1.0",
]
Expand Down
63 changes: 63 additions & 0 deletions tests/test_scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,69 @@ def test_render_template_missing_key_blanks_consistently(tmp_path: Path,
assert "{{" not in text # 미완성 플레이스홀더 노출 없음


def _render_one(path: Path, fmt: str, text: str, context: dict) -> str:
"""fmt 포맷 문서에 text 한 줄을 넣고 render 후 본문 텍스트를 추출."""
if fmt == "hwpx":
from hwpx.document import HwpxDocument
d = HwpxDocument.new()
d.add_paragraph(text)
d.save_to_path(path)
elif fmt == "pptx":
from pptx import Presentation
from pptx.util import Inches
pr = Presentation()
pr.slide_width = Inches(10)
pr.slide_height = Inches(7.5)
s = pr.slides.add_slide(pr.slide_layouts[6])
s.shapes.add_textbox(Inches(1), Inches(1), Inches(8),
Inches(1)).text_frame.text = text
pr.save(str(path))
else: # xlsx
from openpyxl import Workbook
wb = Workbook()
wb.active["A1"] = text
wb.save(str(path))
ad = load(path)
ad.render_template(context)
ad.save(path)
ad.close()
out = []
with zipfile.ZipFile(path) as z:
for n in z.namelist():
if not n.endswith(".xml"):
continue
try:
root = etree.fromstring(z.read(n))
except etree.XMLSyntaxError:
continue
for t in root.iter():
if t.text and "R:" in t.text:
out.append(t.text)
return " ".join(out)


@pytest.mark.parametrize("fmt", ["pptx", "hwpx", "xlsx"])
def test_render_conditions_and_expressions(tmp_path: Path, fmt: str) -> None:
"""pptx/hwpx/xlsx 도 조건({% if %})·표현식({{a*b}})·필터({{x|length}}) 지원
(docx 의 Jinja 와 통일). 단순 {{key}} 는 기존 동작 보존."""
# 조건 (참/거짓)
cond_true = _render_one(tmp_path / f"a.{fmt}", fmt,
"R:{% if vip %}VIP{% endif %}", {"vip": True})
assert "R:VIP" in cond_true
cond_false = _render_one(tmp_path / f"b.{fmt}", fmt,
"R:{% if vip %}VIP{% endif %}", {"vip": False})
assert "R:VIP" not in cond_false
# 표현식 + 필터
expr = _render_one(tmp_path / f"c.{fmt}", fmt,
"R:{{ price * qty }}/{{ items|length }}",
{"price": 1000, "qty": 3, "items": [1, 2, 3, 4]})
assert "R:3000/4" in expr
# 단순 {{key}} 보존
simple = _render_one(tmp_path / f"d.{fmt}", fmt, "R:{{name}}",
{"name": "홍길동"})
assert "R:홍길동" in simple


@pytest.mark.parametrize("fmt", ["docx", "pptx", "hwpx"])
def test_render_template_on_missing_modes(tmp_path: Path, fmt: str) -> None:
"""on_missing leave 는 {{key}} 유지, error 는 ValueError."""
Expand Down
Loading