diff --git a/cpp/src/branch_and_bound/mip_node.hpp b/cpp/src/branch_and_bound/mip_node.hpp index 694a7099c4..3fe3fcd7fd 100644 --- a/cpp/src/branch_and_bound/mip_node.hpp +++ b/cpp/src/branch_and_bound/mip_node.hpp @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -44,18 +45,29 @@ class mip_node_t { { // Iterative teardown to avoid stack overflow on deep trees. // Detach all descendants breadth-first, then destroy them as leaves. - std::vector> nodes; - for (auto& c : children) { - if (c) { nodes.push_back(std::move(c)); } - } - // nodes.size() grows so that this loop only terminates when only leaves remain - for (size_t i = 0; i < nodes.size(); ++i) { - for (auto& c : nodes[i]->children) { + // vector::push_back can throw bad_alloc; the catch-all keeps the destructor + // exception-free. Under OOM, any not-yet-detached descendants are destroyed + // via the recursive unique_ptr chain in `children` as this frame unwinds. + try { + std::vector> nodes; + for (auto& c : children) { if (c) { nodes.push_back(std::move(c)); } } - } + // nodes.size() grows so that this loop only terminates when only leaves remain + for (size_t i = 0; i < nodes.size(); ++i) { + for (auto& c : nodes[i]->children) { + if (c) { nodes.push_back(std::move(c)); } + } + } - // scope-exit ensure destruction of all detached leaves + // scope-exit ensure destruction of all detached leaves + } catch (const std::exception& e) { + CUOPT_LOG_ERROR( + "mip_node_t destructor: iterative teardown failed (%s); falling back to " + "recursive unique_ptr destruction.", + e.what()); + } catch (...) { + } } mip_node_t(mip_node_t&&) = default; diff --git a/cpp/src/grpc/client/grpc_client.cpp b/cpp/src/grpc/client/grpc_client.cpp index 59c6bfcb5d..6eb560247e 100644 --- a/cpp/src/grpc/client/grpc_client.cpp +++ b/cpp/src/grpc/client/grpc_client.cpp @@ -127,7 +127,49 @@ grpc_client_t::grpc_client_t(const std::string& server_address) : impl_(std::mak chunked_array_threshold_bytes_ = config_.max_message_bytes * 3 / 4; } -grpc_client_t::~grpc_client_t() { stop_log_streaming(); } +grpc_client_t::~grpc_client_t() +{ + // The destructor must not propagate exceptions AND must not leave a joinable + // std::thread alive — a joinable thread's destructor calls std::terminate. + // We inline a noexcept variant of stop_log_streaming() here so that on any + // failure we still detach the thread before its destructor runs. + stop_logs_.store(true); + try { + std::lock_guard lk(log_context_mutex_); + if (active_log_context_) { + static_cast(active_log_context_)->TryCancel(); + } + } catch (const std::exception& e) { + CUOPT_LOG_ERROR( + "grpc_client_t destructor: TryCancel/lock failed (%s); proceeding to " + "join/detach.", + e.what()); + } catch (...) { + // Best-effort cancel; fall through to join/detach the thread. + } + std::unique_ptr t; + std::swap(t, log_thread_); + if (t && t->joinable()) { + try { + t->join(); + } catch (const std::exception& e) { + CUOPT_LOG_ERROR("grpc_client_t destructor: log-thread join failed (%s); detaching.", + e.what()); + // join failed (e.g., std::system_error). Detach so the local + // unique_ptr's destructor doesn't terminate on the joinable thread. + try { + t->detach(); + } catch (const std::exception& e2) { + CUOPT_LOG_ERROR( + "grpc_client_t destructor: detach also failed (%s); thread may " + "terminate the process on unique_ptr destruction.", + e2.what()); + } catch (...) { + } + } catch (...) { + } + } +} bool grpc_client_t::connect() { diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu index 575228895b..68dad624ae 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu @@ -230,10 +230,17 @@ class timing_raii_t { ~timing_raii_t() { - auto end_time = std::chrono::high_resolution_clock::now(); - auto duration = - std::chrono::duration_cast>(end_time - start_time_); - times_vec_.push_back(duration.count()); + // vector::push_back can throw bad_alloc; the catch-all keeps the destructor + // exception-free. Losing one timing sample under OOM is acceptable. + try { + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast>(end_time - start_time_); + times_vec_.push_back(duration.count()); + } catch (const std::exception& e) { + CUOPT_LOG_ERROR("timing_raii_t destructor: failed to record sample (%s).", e.what()); + } catch (...) { + } } private: