11import os
2+ import shutil
23import subprocess
34import tempfile
45from typing import Any
6+
7+ from PIL import Image
58from lf_toolkit .evaluation import Result , Params
9+ from lf_toolkit .evaluation .image_upload import upload_image , ImageUploadError
610
711_TIMEOUT = 5
12+ _UPLOAD_FOLDER = "evaluatePython"
13+
14+ _PREAMBLE_TEMPLATE = """\
15+ import os as _os
16+ import matplotlib.pyplot as _plt
17+ import atexit as _atexit
18+
19+ _plot_dir = {plot_dir!r}
20+ _plot_idx = [0]
21+
22+ def _patched_show(*args, **kwargs):
23+ for num in _plt.get_fignums():
24+ _plot_idx[0] += 1
25+ _plt.figure(num).savefig(_os.path.join(_plot_dir, str(_plot_idx[0]).zfill(4) + '.png'))
26+ _plt.close('all')
27+
28+ _plt.show = _patched_show
829
30+ def _capture_remaining():
31+ for num in _plt.get_fignums():
32+ _plot_idx[0] += 1
33+ _plt.figure(num).savefig(_os.path.join(_plot_dir, str(_plot_idx[0]).zfill(4) + '.png'))
934
10- def _run_code (code : str , stdin : str ) -> tuple [str , str , bool ]:
35+ _atexit.register(_capture_remaining)
36+ """
37+
38+
39+ def _run_code (code : str , stdin : str ) -> tuple [str , str , bool , list [Image .Image ]]:
40+ plot_dir = tempfile .mkdtemp ()
41+ preamble = _PREAMBLE_TEMPLATE .format (plot_dir = plot_dir )
1142 with tempfile .NamedTemporaryFile (mode = "w" , suffix = ".py" , delete = False ) as f :
12- f .write (code )
43+ f .write (preamble + " \n " + code )
1344 tmpfile = f .name
1445 try :
1546 proc = subprocess .run (
@@ -20,30 +51,50 @@ def _run_code(code: str, stdin: str) -> tuple[str, str, bool]:
2051 timeout = _TIMEOUT ,
2152 env = {** os .environ , "MPLBACKEND" : "Agg" },
2253 )
23- return proc .stdout , proc .stderr , False
54+ images = []
55+ for fn in sorted (os .listdir (plot_dir )):
56+ if fn .endswith (".png" ):
57+ img = Image .open (os .path .join (plot_dir , fn ))
58+ img .load ()
59+ img .format = "PNG"
60+ images .append (img )
61+ return proc .stdout , proc .stderr , False , images
2462 except subprocess .TimeoutExpired :
25- return "" , "" , True
63+ return "" , "" , True , []
2664 finally :
2765 os .unlink (tmpfile )
66+ shutil .rmtree (plot_dir , ignore_errors = True )
2867
2968
3069def _code_block (label : str , content : str ) -> str :
3170 return f"{ label } :\n ```\n { content } \n ```"
3271
3372
73+ def _upload_plots (images : list [Image .Image ]) -> list [str ]:
74+ result = []
75+ for i , img in enumerate (images , 1 ):
76+ try :
77+ url = upload_image (img , _UPLOAD_FOLDER )
78+ result .append (f"" )
79+ except ImageUploadError :
80+ pass
81+ return result
82+
83+
3484def evaluation_function (response : Any , answer : Any , params : Params ) -> Result :
3585 tests = params .get ("tests" , [])
3686 result = Result ()
3787
3888 if not tests :
39- stdout , stderr , timed_out = _run_code (str (response ), "" )
89+ stdout , stderr , timed_out , images = _run_code (str (response ), "" )
4090 if timed_out :
4191 result .add_feedback ("error" , f"Code timed out after { _TIMEOUT } s." )
4292 elif stderr and not stdout :
4393 result .add_feedback ("error" , _code_block ("Error" , stderr .strip ()))
4494 else :
45- output = stdout .rstrip () or "(no output)"
46- result .add_feedback ("output" , _code_block ("Output" , output ))
95+ parts = [_code_block ("Output" , stdout .rstrip () or "(no output)" )]
96+ parts .extend (_upload_plots (images ))
97+ result .add_feedback ("output" , "\n \n " .join (parts ))
4798 return result
4899
49100 passed = 0
@@ -53,7 +104,7 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
53104 expected = test .get ("expected_output" , "" ).rstrip ()
54105 hidden = test .get ("hidden" , False )
55106
56- stdout , stderr , timed_out = _run_code (str (response ), stdin )
107+ stdout , stderr , timed_out , images = _run_code (str (response ), stdin )
57108 actual = stdout .rstrip ()
58109 label = f"Hidden test { i } " if hidden else f"Test { i } "
59110
@@ -79,6 +130,7 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
79130 if stdin .strip ():
80131 parts .append (_code_block ("Input" , stdin .rstrip ()))
81132 parts .append (_code_block ("Output" , actual or "(no output)" ))
133+ parts .extend (_upload_plots (images ))
82134 result .add_feedback ("pass" , "\n \n " .join (parts ))
83135 else :
84136 tag = "hidden_fail" if hidden else "fail"
@@ -90,6 +142,7 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
90142 parts .append (_code_block ("Input" , stdin .rstrip ()))
91143 parts .append (_code_block ("Your output" , actual or "(no output)" ))
92144 parts .append (_code_block ("Expected" , expected ))
145+ parts .extend (_upload_plots (images ))
93146 result .add_feedback (tag , "\n \n " .join (parts ))
94147
95148 result .is_correct = passed == len (tests )
0 commit comments