Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 23 additions & 13 deletions backend/app/services/document_conversion/html_to_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,25 @@ async def try_chrome_pdf() -> bool:
port = sock.getsockname()[1]

profile_dir = tempfile.TemporaryDirectory(prefix="clawith-html-pdf-")
chrome_args = [
chrome,
"--headless=new",
"--disable-gpu",
"--disable-dev-shm-usage",
"--no-first-run",
"--no-default-browser-check",
"--allow-file-access-from-files",
f"--remote-debugging-port={port}",
f"--user-data-dir={profile_dir.name}",
"about:blank",
]
import sys
if sys.platform.startswith("linux"):
# Linux environments (like Docker containers) require no-sandbox in standard restricted container contexts
chrome_args.extend(["--no-sandbox", "--disable-setuid-sandbox"])

proc = subprocess.Popen(
[
chrome,
"--headless=new",
"--disable-gpu",
"--disable-dev-shm-usage",
"--no-first-run",
"--no-default-browser-check",
"--allow-file-access-from-files",
f"--remote-debugging-port={port}",
f"--user-data-dir={profile_dir.name}",
"about:blank",
],
chrome_args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
Expand Down Expand Up @@ -152,8 +158,12 @@ async def send(method: str, params: dict[str, Any] | None = None) -> dict[str, A
profile_dir.cleanup()

try:
if await try_chrome_pdf():
chrome_success = await try_chrome_pdf()
if chrome_success:
return f"✅ Successfully converted HTML to PDF with Chrome: {target_path}"
else:
chrome_pdf_error = Exception("Chrome process timed out or failed to connect to debugging port")
logger.warning("Chrome HTML to PDF failed (timed out), falling back to WeasyPrint")
except Exception as exc:
chrome_pdf_error = exc
logger.warning(f"Chrome HTML to PDF failed, falling back to WeasyPrint: {exc}")
Expand Down
64 changes: 64 additions & 0 deletions backend/tests/test_html_to_pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import sys
import pytest
from unittest.mock import MagicMock, patch
from pathlib import Path
from app.services.document_conversion.html_to_pdf import convert_html_to_pdf

@pytest.mark.asyncio
@patch("app.services.document_conversion.html_to_pdf.chrome_executable")
@patch("subprocess.Popen")
@patch("time.time")
@patch("weasyprint.HTML")
async def test_convert_html_to_pdf_linux(mock_weasy_html, mock_time, mock_popen, mock_chrome_exec):
mock_chrome_exec.return_value = "/usr/bin/google-chrome"
mock_time.side_effect = [1000.0, 1010.0] # Fails deadline immediately

# Mock subprocess.Popen
mock_proc = MagicMock()
mock_popen.return_value = mock_proc

# Mock weasyprint HTML write_pdf
mock_weasy_instance = MagicMock()
mock_weasy_html.return_value = mock_weasy_instance

src = Path("/tmp/src.html")
tgt = Path("/tmp/tgt.pdf")

with patch("sys.platform", "linux"):
res = await convert_html_to_pdf(src, tgt, "tgt.pdf", {})

assert mock_popen.called
args = mock_popen.call_args[0][0]
assert "--no-sandbox" in args
assert "--disable-setuid-sandbox" in args
assert "WeasyPrint" in res


@pytest.mark.asyncio
@patch("app.services.document_conversion.html_to_pdf.chrome_executable")
@patch("subprocess.Popen")
@patch("time.time")
@patch("weasyprint.HTML")
async def test_convert_html_to_pdf_darwin(mock_weasy_html, mock_time, mock_popen, mock_chrome_exec):
mock_chrome_exec.return_value = "/usr/bin/google-chrome"
mock_time.side_effect = [1000.0, 1010.0] # Fails deadline immediately

# Mock subprocess.Popen
mock_proc = MagicMock()
mock_popen.return_value = mock_proc

# Mock weasyprint HTML write_pdf
mock_weasy_instance = MagicMock()
mock_weasy_html.return_value = mock_weasy_instance

src = Path("/tmp/src.html")
tgt = Path("/tmp/tgt.pdf")

with patch("sys.platform", "darwin"):
res = await convert_html_to_pdf(src, tgt, "tgt.pdf", {})

assert mock_popen.called
args = mock_popen.call_args[0][0]
assert "--no-sandbox" not in args
assert "--disable-setuid-sandbox" not in args
assert "WeasyPrint" in res