Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion benchmarks/utils/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@
# Interval in seconds between checking for per-instance timeouts
TIMEOUT_CHECK_INTERVAL_SECONDS = 60

# Maximum number of tasks a worker process handles before being recycled.
# This prevents unbounded memory growth from Python heap fragmentation:
# CPython's pymalloc allocator does not return freed memory to the OS,
# so long-lived workers accumulate RSS over time. Recycling forces the OS
# to reclaim all memory held by the old process.
# The cost of recycling is a process spawn + module re-import (~1-2s),
# which is negligible compared to per-instance runtime (minutes).
MAX_TASKS_PER_CHILD = 10


@dataclass
class PendingInstance:
Expand Down Expand Up @@ -369,7 +378,9 @@ def attempt_on_result(instance: EvalInstance, out: EvalOutput) -> None:
attempt_outputs.append(out)

# Run evaluation for this attempt
pool = ProcessPoolExecutor(max_workers=self.num_workers)
pool = ProcessPoolExecutor(
max_workers=self.num_workers,
)
futures: list[Future] = []
# Consolidated tracking: maps future -> PendingInstance
pending_instances: dict[Future, PendingInstance] = {}
Expand Down
Loading