Skip to content

Commit d5bcc93

Browse files
committed
Add plot capture and upload functionality for evaluation
Introduces matplotlib plot capture by prepending a preamble to user code, saving plots as PNGs, and displaying them alongside stdout in feedback. Adds `Pillow`, `requests`, `boto3`, and `python-dotenv` for handling images and uploads. Updates `pyproject.toml` and `poetry.lock` accordingly.
1 parent d529f63 commit d5bcc93

3 files changed

Lines changed: 65 additions & 9 deletions

File tree

evaluation_function/evaluation.py

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,46 @@
11
import os
2+
import shutil
23
import subprocess
34
import tempfile
45
from typing import Any
6+
7+
from PIL import Image
58
from lf_toolkit.evaluation import Result, Params
9+
from lf_toolkit.evaluation.image_upload import upload_image, ImageUploadError
610

711
_TIMEOUT = 5
12+
_UPLOAD_FOLDER = "evaluatePython"
13+
14+
_PREAMBLE_TEMPLATE = """\
15+
import os as _os
16+
import matplotlib.pyplot as _plt
17+
import atexit as _atexit
18+
19+
_plot_dir = {plot_dir!r}
20+
_plot_idx = [0]
21+
22+
def _patched_show(*args, **kwargs):
23+
for num in _plt.get_fignums():
24+
_plot_idx[0] += 1
25+
_plt.figure(num).savefig(_os.path.join(_plot_dir, str(_plot_idx[0]).zfill(4) + '.png'))
26+
_plt.close('all')
27+
28+
_plt.show = _patched_show
829
30+
def _capture_remaining():
31+
for num in _plt.get_fignums():
32+
_plot_idx[0] += 1
33+
_plt.figure(num).savefig(_os.path.join(_plot_dir, str(_plot_idx[0]).zfill(4) + '.png'))
934
10-
def _run_code(code: str, stdin: str) -> tuple[str, str, bool]:
35+
_atexit.register(_capture_remaining)
36+
"""
37+
38+
39+
def _run_code(code: str, stdin: str) -> tuple[str, str, bool, list[Image.Image]]:
40+
plot_dir = tempfile.mkdtemp()
41+
preamble = _PREAMBLE_TEMPLATE.format(plot_dir=plot_dir)
1142
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
12-
f.write(code)
43+
f.write(preamble + "\n" + code)
1344
tmpfile = f.name
1445
try:
1546
proc = subprocess.run(
@@ -20,30 +51,50 @@ def _run_code(code: str, stdin: str) -> tuple[str, str, bool]:
2051
timeout=_TIMEOUT,
2152
env={**os.environ, "MPLBACKEND": "Agg"},
2253
)
23-
return proc.stdout, proc.stderr, False
54+
images = []
55+
for fn in sorted(os.listdir(plot_dir)):
56+
if fn.endswith(".png"):
57+
img = Image.open(os.path.join(plot_dir, fn))
58+
img.load()
59+
img.format = "PNG"
60+
images.append(img)
61+
return proc.stdout, proc.stderr, False, images
2462
except subprocess.TimeoutExpired:
25-
return "", "", True
63+
return "", "", True, []
2664
finally:
2765
os.unlink(tmpfile)
66+
shutil.rmtree(plot_dir, ignore_errors=True)
2867

2968

3069
def _code_block(label: str, content: str) -> str:
3170
return f"{label}:\n```\n{content}\n```"
3271

3372

73+
def _upload_plots(images: list[Image.Image]) -> list[str]:
74+
result = []
75+
for i, img in enumerate(images, 1):
76+
try:
77+
url = upload_image(img, _UPLOAD_FOLDER)
78+
result.append(f"![Plot {i}]({url})")
79+
except ImageUploadError:
80+
pass
81+
return result
82+
83+
3484
def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
3585
tests = params.get("tests", [])
3686
result = Result()
3787

3888
if not tests:
39-
stdout, stderr, timed_out = _run_code(str(response), "")
89+
stdout, stderr, timed_out, images = _run_code(str(response), "")
4090
if timed_out:
4191
result.add_feedback("error", f"Code timed out after {_TIMEOUT}s.")
4292
elif stderr and not stdout:
4393
result.add_feedback("error", _code_block("Error", stderr.strip()))
4494
else:
45-
output = stdout.rstrip() or "(no output)"
46-
result.add_feedback("output", _code_block("Output", output))
95+
parts = [_code_block("Output", stdout.rstrip() or "(no output)")]
96+
parts.extend(_upload_plots(images))
97+
result.add_feedback("output", "\n\n".join(parts))
4798
return result
4899

49100
passed = 0
@@ -53,7 +104,7 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
53104
expected = test.get("expected_output", "").rstrip()
54105
hidden = test.get("hidden", False)
55106

56-
stdout, stderr, timed_out = _run_code(str(response), stdin)
107+
stdout, stderr, timed_out, images = _run_code(str(response), stdin)
57108
actual = stdout.rstrip()
58109
label = f"Hidden test {i}" if hidden else f"Test {i}"
59110

@@ -79,6 +130,7 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
79130
if stdin.strip():
80131
parts.append(_code_block("Input", stdin.rstrip()))
81132
parts.append(_code_block("Output", actual or "(no output)"))
133+
parts.extend(_upload_plots(images))
82134
result.add_feedback("pass", "\n\n".join(parts))
83135
else:
84136
tag = "hidden_fail" if hidden else "fail"
@@ -90,6 +142,7 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
90142
parts.append(_code_block("Input", stdin.rstrip()))
91143
parts.append(_code_block("Your output", actual or "(no output)"))
92144
parts.append(_code_block("Expected", expected))
145+
parts.extend(_upload_plots(images))
93146
result.add_feedback(tag, "\n\n".join(parts))
94147

95148
result.is_correct = passed == len(tests)

poetry.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ matplotlib = ">=3.7"
2222
scipy = ">=1.10"
2323
seaborn = ">=0.13"
2424
statsmodels = ">=0.14"
25+
requests = ">=2.32"
26+
boto3 = ">=1.42"
27+
python-dotenv = ">=1.0"
2528

2629
[tool.poetry.group.dev.dependencies]
2730
pytest = "^8.2.2"

0 commit comments

Comments
 (0)