Skip to content

Commit c79509b

Browse files
committed
fix: make PDF reports optional and robust in limited environments
1 parent b953ae6 commit c79509b

4 files changed

Lines changed: 67 additions & 15 deletions

File tree

hashprep/reports/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
from .generators import generate_report as generate_report
2+
from .generators import get_generators as get_generators

hashprep/reports/generators.py

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,34 @@ def generate(self, summary, full=False, output_file=None):
77
pass
88

99

10-
# Lazy loading report classes
1110
def _load_generators():
11+
"""
12+
Lazily load report generators.
13+
14+
PDF generation relies on WeasyPrint and system libraries that may not be
15+
available in all environments. We treat the PDF generator as optional and
16+
only enable it when its dependencies import cleanly.
17+
"""
1218
from .html import HtmlReport
1319
from .json import JsonReport
1420
from .markdown import MarkdownReport
15-
from .pdf import PdfReport
1621

17-
return {
22+
generators = {
1823
"md": MarkdownReport(),
1924
"json": JsonReport(),
2025
"html": HtmlReport(),
21-
"pdf": PdfReport(),
2226
}
2327

28+
try:
29+
from .pdf import PdfReport # type: ignore
30+
except Exception:
31+
# PDF generation is unavailable (missing WeasyPrint or system deps)
32+
generators["pdf"] = None
33+
else:
34+
generators["pdf"] = PdfReport()
35+
36+
return generators
37+
2438

2539
# get generators dictionary
2640
def get_generators():
@@ -34,6 +48,16 @@ def generate_report(summary, format="md", full=False, output_file=None, theme="m
3448
if format not in generators:
3549
raise ValueError(f"Unsupported format: {format}")
3650

37-
if format in ["html", "pdf"]:
38-
return generators[format].generate(summary, full, output_file, theme=theme)
51+
if format == "pdf":
52+
pdf_generator = generators.get("pdf")
53+
if pdf_generator is None:
54+
raise RuntimeError(
55+
"PDF report generation is unavailable. "
56+
"Install WeasyPrint and its system dependencies to enable PDF output."
57+
)
58+
return pdf_generator.generate(summary, full, output_file, theme=theme)
59+
60+
if format == "html":
61+
return generators["html"].generate(summary, full, output_file, theme=theme)
62+
3963
return generators[format].generate(summary, full, output_file)

tests/test_cli.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
import pytest
1212

13+
from hashprep.reports import get_generators
14+
1315

1416
@pytest.fixture
1517
def titanic_csv():
@@ -33,6 +35,10 @@ def run_cli(args, cwd=None):
3335
return result
3436

3537

38+
_GENERATORS = get_generators()
39+
_PDF_AVAILABLE = _GENERATORS.get("pdf") is not None
40+
41+
3642
class TestCLIScan:
3743
"""Test 'hashprep scan' command."""
3844

@@ -176,14 +182,20 @@ def test_report_pdf(self, titanic_csv, temp_output_dir):
176182
"""Test PDF report generation."""
177183
result = run_cli(["report", titanic_csv, "--format", "pdf", "--full"], cwd=temp_output_dir)
178184

179-
assert result.returncode == 0
180-
assert "train_hashprep_report.pdf" in result.stdout
185+
if not _PDF_AVAILABLE:
186+
# When PDF generation is unavailable, the CLI should exit non-zero
187+
# with a clear error message propagated from generate_report.
188+
assert result.returncode != 0
189+
assert "PDF report generation is unavailable" in (result.stderr + result.stdout)
190+
else:
191+
assert result.returncode == 0
192+
assert "train_hashprep_report.pdf" in result.stdout
181193

182-
report_file = os.path.join(temp_output_dir, "train_hashprep_report.pdf")
183-
assert os.path.exists(report_file)
184-
# Check PDF magic number
185-
with open(report_file, "rb") as f:
186-
assert f.read(4) == b"%PDF"
194+
report_file = os.path.join(temp_output_dir, "train_hashprep_report.pdf")
195+
assert os.path.exists(report_file)
196+
# Check PDF magic number
197+
with open(report_file, "rb") as f:
198+
assert f.read(4) == b"%PDF"
187199

188200
def test_report_with_code_generation(self, titanic_csv, temp_output_dir):
189201
"""Test report with code generation."""
@@ -280,7 +292,12 @@ def test_full_workflow(self, titanic_csv, temp_output_dir):
280292
# Step 3: Generate all report formats
281293
for fmt in ["md", "json", "html", "pdf"]:
282294
result = run_cli(["report", titanic_csv, "--format", fmt, "--full"], cwd=temp_output_dir)
283-
assert result.returncode == 0
295+
if fmt == "pdf" and not _PDF_AVAILABLE:
296+
# In environments without PDF support, CLI should fail cleanly
297+
assert result.returncode != 0
298+
assert "PDF report generation is unavailable" in (result.stderr + result.stdout)
299+
else:
300+
assert result.returncode == 0
284301

285302
# Step 4: Generate code
286303
result = run_cli(["report", titanic_csv, "--with-code"], cwd=temp_output_dir)

tests/test_library_api.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,14 @@
1111
import pytest
1212

1313
from hashprep import DatasetAnalyzer
14-
from hashprep.reports import generate_report
14+
from hashprep.reports import generate_report, get_generators
1515
from hashprep.utils.sampling import SamplingConfig
1616

1717

18+
_GENERATORS = get_generators()
19+
_PDF_AVAILABLE = _GENERATORS.get("pdf") is not None
20+
21+
1822
@pytest.fixture
1923
def sample_dataframe():
2024
"""Create a sample DataFrame for testing."""
@@ -226,6 +230,9 @@ def test_pdf_report(self, sample_dataframe):
226230
output_file = f.name
227231

228232
try:
233+
if not _PDF_AVAILABLE:
234+
pytest.skip("PDF generation is unavailable in this environment")
235+
229236
report = generate_report(summary, format="pdf", full=True, output_file=output_file)
230237

231238
assert report is not None
@@ -394,6 +401,9 @@ def test_titanic_all_report_formats(self, titanic_csv):
394401
output_file = f.name
395402

396403
try:
404+
if fmt == "pdf" and not _PDF_AVAILABLE:
405+
pytest.skip("PDF generation is unavailable in this environment")
406+
397407
if fmt == "html":
398408
report = generate_report(summary, format=fmt, full=True, output_file=output_file, theme="minimal")
399409
else:

0 commit comments

Comments
 (0)