diff --git a/code/object/objcollide.cpp b/code/object/objcollide.cpp index 5e11d15a3ed..4ece1192746 100644 --- a/code/object/objcollide.cpp +++ b/code/object/objcollide.cpp @@ -762,6 +762,7 @@ void spin_up_mp_collision() { void spin_down_mp_collision() { threading::spin_down_threaded_task(); collision_processing_done.store(true); + threading::spin_down_wait_complete(); } void queue_mp_collision(uint ctype, const obj_pair& colliding) { diff --git a/code/utils/threading.cpp b/code/utils/threading.cpp index 5c71228235e..27787263021 100644 --- a/code/utils/threading.cpp +++ b/code/utils/threading.cpp @@ -20,6 +20,7 @@ namespace threading { static std::condition_variable wait_for_task; static std::mutex wait_for_task_mutex; static bool wait_for_task_condition; + static std::atomic_uint32_t wait_for_spindown_tasks; static std::atomic worker_task; static SCP_vector worker_threads; @@ -28,12 +29,15 @@ namespace threading { static void mp_worker_thread_main(size_t threadIdx) { while(true) { { + //We're waiting for a new task, so spindown was successful + wait_for_spindown_tasks.fetch_add(1, std::memory_order_release); std::unique_lock lk(wait_for_task_mutex); wait_for_task.wait(lk, []() { return wait_for_task_condition; }); } switch (worker_task.load(std::memory_order_acquire)) { case WorkerThreadTask::EXIT: + //We're done and will quit, so ensure we report this. return; case WorkerThreadTask::COLLISION: collide_mp_worker_thread(threadIdx); @@ -144,6 +148,7 @@ namespace threading { //External Functions void spin_up_threaded_task(WorkerThreadTask task) { + wait_for_spindown_tasks.store(0, std::memory_order_release); worker_task.store(task); { std::scoped_lock lock {wait_for_task_mutex}; @@ -157,6 +162,11 @@ namespace threading { wait_for_task_condition = false; } + void spin_down_wait_complete() { + //Technically, spindowns should only occur when the actual code is confirmed to be complete. So busy-waiting here is not an issue. + while (wait_for_spindown_tasks.load(std::memory_order_acquire) < num_threads); + } + void init_task_pool() { if (Cmdline_multithreading == 0) { //At least given the current collision-detection threading, 8 cores (if available) seems like a sweetspot, with more cores adding too much overhead. @@ -181,6 +191,8 @@ namespace threading { void shut_down_task_pool() { spin_up_threaded_task(WorkerThreadTask::EXIT); + //Technically we could await spin_down_wait_complete here, but since we're returning and joining the threads here, there is no need + for(auto& thread : worker_threads) { thread.join(); } diff --git a/code/utils/threading.h b/code/utils/threading.h index ab33e655bbd..5da628ec114 100644 --- a/code/utils/threading.h +++ b/code/utils/threading.h @@ -11,6 +11,9 @@ namespace threading { //This _must_ be called on the main thread BEFORE a task completes on a thread of the task pool. void spin_down_threaded_task(); + //This should be called AFTER the command to finish a given task is given. This will block until all threads have returned into a state where they are able to listen to new commands. + void spin_down_wait_complete(); + void init_task_pool(); void shut_down_task_pool();