diff --git a/backend/app/services/document_conversion/html_to_pdf.py b/backend/app/services/document_conversion/html_to_pdf.py index c1b9b8d78..ca8c5edac 100644 --- a/backend/app/services/document_conversion/html_to_pdf.py +++ b/backend/app/services/document_conversion/html_to_pdf.py @@ -35,19 +35,25 @@ async def try_chrome_pdf() -> bool: port = sock.getsockname()[1] profile_dir = tempfile.TemporaryDirectory(prefix="clawith-html-pdf-") + chrome_args = [ + chrome, + "--headless=new", + "--disable-gpu", + "--disable-dev-shm-usage", + "--no-first-run", + "--no-default-browser-check", + "--allow-file-access-from-files", + f"--remote-debugging-port={port}", + f"--user-data-dir={profile_dir.name}", + "about:blank", + ] + import sys + if sys.platform.startswith("linux"): + # Linux environments (like Docker containers) require no-sandbox in standard restricted container contexts + chrome_args.extend(["--no-sandbox", "--disable-setuid-sandbox"]) + proc = subprocess.Popen( - [ - chrome, - "--headless=new", - "--disable-gpu", - "--disable-dev-shm-usage", - "--no-first-run", - "--no-default-browser-check", - "--allow-file-access-from-files", - f"--remote-debugging-port={port}", - f"--user-data-dir={profile_dir.name}", - "about:blank", - ], + chrome_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) @@ -152,8 +158,12 @@ async def send(method: str, params: dict[str, Any] | None = None) -> dict[str, A profile_dir.cleanup() try: - if await try_chrome_pdf(): + chrome_success = await try_chrome_pdf() + if chrome_success: return f"✅ Successfully converted HTML to PDF with Chrome: {target_path}" + else: + chrome_pdf_error = Exception("Chrome process timed out or failed to connect to debugging port") + logger.warning("Chrome HTML to PDF failed (timed out), falling back to WeasyPrint") except Exception as exc: chrome_pdf_error = exc logger.warning(f"Chrome HTML to PDF failed, falling back to WeasyPrint: {exc}") diff --git a/backend/tests/test_html_to_pdf.py b/backend/tests/test_html_to_pdf.py new file mode 100644 index 000000000..a791466cb --- /dev/null +++ b/backend/tests/test_html_to_pdf.py @@ -0,0 +1,64 @@ +import sys +import pytest +from unittest.mock import MagicMock, patch +from pathlib import Path +from app.services.document_conversion.html_to_pdf import convert_html_to_pdf + +@pytest.mark.asyncio +@patch("app.services.document_conversion.html_to_pdf.chrome_executable") +@patch("subprocess.Popen") +@patch("time.time") +@patch("weasyprint.HTML") +async def test_convert_html_to_pdf_linux(mock_weasy_html, mock_time, mock_popen, mock_chrome_exec): + mock_chrome_exec.return_value = "/usr/bin/google-chrome" + mock_time.side_effect = [1000.0, 1010.0] # Fails deadline immediately + + # Mock subprocess.Popen + mock_proc = MagicMock() + mock_popen.return_value = mock_proc + + # Mock weasyprint HTML write_pdf + mock_weasy_instance = MagicMock() + mock_weasy_html.return_value = mock_weasy_instance + + src = Path("/tmp/src.html") + tgt = Path("/tmp/tgt.pdf") + + with patch("sys.platform", "linux"): + res = await convert_html_to_pdf(src, tgt, "tgt.pdf", {}) + + assert mock_popen.called + args = mock_popen.call_args[0][0] + assert "--no-sandbox" in args + assert "--disable-setuid-sandbox" in args + assert "WeasyPrint" in res + + +@pytest.mark.asyncio +@patch("app.services.document_conversion.html_to_pdf.chrome_executable") +@patch("subprocess.Popen") +@patch("time.time") +@patch("weasyprint.HTML") +async def test_convert_html_to_pdf_darwin(mock_weasy_html, mock_time, mock_popen, mock_chrome_exec): + mock_chrome_exec.return_value = "/usr/bin/google-chrome" + mock_time.side_effect = [1000.0, 1010.0] # Fails deadline immediately + + # Mock subprocess.Popen + mock_proc = MagicMock() + mock_popen.return_value = mock_proc + + # Mock weasyprint HTML write_pdf + mock_weasy_instance = MagicMock() + mock_weasy_html.return_value = mock_weasy_instance + + src = Path("/tmp/src.html") + tgt = Path("/tmp/tgt.pdf") + + with patch("sys.platform", "darwin"): + res = await convert_html_to_pdf(src, tgt, "tgt.pdf", {}) + + assert mock_popen.called + args = mock_popen.call_args[0][0] + assert "--no-sandbox" not in args + assert "--disable-setuid-sandbox" not in args + assert "WeasyPrint" in res