From cccace9e2009d74f303b1fdc41eff3deadea1fcd Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Wed, 11 Mar 2026 12:17:31 +0100 Subject: [PATCH 01/21] replaced SpMV calls with SpMVOp calls in compute_At_y and compute_A_x --- cpp/CMakeLists.txt | 5 ++- cpp/src/pdlp/cusparse_view.cu | 78 ++++++++++++++++++++++++++++++++++ cpp/src/pdlp/cusparse_view.hpp | 11 +++++ cpp/src/pdlp/pdhg.cu | 39 ++++++++--------- cpp/src/pdlp/pdlp.cu | 1 + 5 files changed, 111 insertions(+), 23 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7e9dd13711..395f364807 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -439,7 +439,10 @@ set_target_properties(cuopt CXX_SCAN_FOR_MODULES OFF ) -target_compile_definitions(cuopt PUBLIC "CUOPT_LOG_ACTIVE_LEVEL=RAPIDS_LOGGER_LOG_LEVEL_${LIBCUOPT_LOGGING_LEVEL}") +target_compile_definitions(cuopt + PUBLIC "CUOPT_LOG_ACTIVE_LEVEL=RAPIDS_LOGGER_LOG_LEVEL_${LIBCUOPT_LOGGING_LEVEL}" + PUBLIC CUSPARSE_ENABLE_EXPERIMENTAL_API +) target_compile_options(cuopt PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 359bb7e928..27f8704eb1 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -304,6 +304,8 @@ cusparse_view_t::cusparse_view_t( A_T_indices_{op_problem_scaled.reverse_constraints}, buffer_non_transpose{0, handle_ptr->get_stream()}, buffer_transpose{0, handle_ptr->get_stream()}, + buffer_non_transpose_spmvop{0, handle_ptr->get_stream()}, + buffer_transpose_spmvop{0, handle_ptr->get_stream()}, buffer_transpose_batch{0, handle_ptr->get_stream()}, buffer_non_transpose_batch{0, handle_ptr->get_stream()}, buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()}, @@ -717,6 +719,8 @@ cusparse_view_t::cusparse_view_t( A_T_indices_{_A_T_indices}, buffer_non_transpose{0, handle_ptr->get_stream()}, buffer_transpose{0, handle_ptr->get_stream()}, + buffer_non_transpose_spmvop{0, handle_ptr->get_stream()}, + buffer_transpose_spmvop{0, handle_ptr->get_stream()}, buffer_transpose_batch{0, handle_ptr->get_stream()}, buffer_non_transpose_batch{0, handle_ptr->get_stream()}, buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()}, @@ -926,6 +930,8 @@ cusparse_view_t::cusparse_view_t( tmp_dual(existing_cusparse_view.tmp_dual), buffer_non_transpose{0, handle_ptr->get_stream()}, buffer_transpose{0, handle_ptr->get_stream()}, + buffer_non_transpose_spmvop{0, handle_ptr->get_stream()}, + buffer_transpose_spmvop{0, handle_ptr->get_stream()}, buffer_transpose_batch{0, handle_ptr->get_stream()}, buffer_non_transpose_batch{0, handle_ptr->get_stream()}, buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()}, @@ -1031,6 +1037,76 @@ cusparse_view_t::cusparse_view_t( #endif } +// Creates SpMVOp plans. Must be called after scale_problem() so plans use the scaled matrix. +template +void cusparse_view_t::create_spmv_op_plans(bool is_reflected) +{ + RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream())); + + // Prepare buffers for At_y SpMVOp + size_t buffer_size_transpose = 0; + RAFT_CUSPARSE_TRY( + cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_T, + dual_solution, + current_AtY, + current_AtY, + CUDA_R_64F, + &buffer_size_transpose)); + buffer_transpose_spmvop.resize(buffer_size_transpose, handle_ptr_->get_stream()); + + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr(handle_ptr_->get_cusparse_handle(), + &spmv_op_descr_A_t_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_T, + dual_solution, + current_AtY, + current_AtY, + CUDA_R_64F, + buffer_transpose_spmvop.data())); + + char* lto_buffer = NULL; + size_t lto_buffer_size = 0; + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle_ptr_->get_cusparse_handle(), + spmv_op_descr_A_t_, + &spmv_op_plan_A_t_, + lto_buffer, + lto_buffer_size)); + + // Only prepare buffers for A_x if using reflected_halpern + if (is_reflected) + { + size_t buffer_size_non_transpose = 0; + RAFT_CUSPARSE_TRY( + cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A, + reflected_primal_solution, + dual_gradient, + dual_gradient, + CUDA_R_64F, + &buffer_size_non_transpose)); + buffer_non_transpose_spmvop.resize(buffer_size_non_transpose, handle_ptr_->get_stream()); + + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr(handle_ptr_->get_cusparse_handle(), + &spmv_op_descr_A_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A, + reflected_primal_solution, + dual_gradient, + dual_gradient, + CUDA_R_64F, + buffer_non_transpose_spmvop.data())); + + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle_ptr_->get_cusparse_handle(), + spmv_op_descr_A_, + &spmv_op_plan_A_, + lto_buffer, + lto_buffer_size)); + } +} + // Empty constructor used in kkt restart to save memory template cusparse_view_t::cusparse_view_t( @@ -1041,6 +1117,8 @@ cusparse_view_t::cusparse_view_t( : handle_ptr_(handle_ptr), buffer_non_transpose{0, handle_ptr->get_stream()}, buffer_transpose{0, handle_ptr->get_stream()}, + buffer_non_transpose_spmvop{0, handle_ptr->get_stream()}, + buffer_transpose_spmvop{0, handle_ptr->get_stream()}, buffer_transpose_batch{0, handle_ptr->get_stream()}, buffer_non_transpose_batch{0, handle_ptr->get_stream()}, buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()}, diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index c6d0ddea61..96cad98fbe 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -120,6 +120,8 @@ class cusparse_view_t { const rmm::device_uvector&, // Empty just to init the const& const std::vector&); // Empty just to init the const& + void create_spmv_op_plans(bool is_reflected); + const bool batch_mode_{false}; raft::handle_t const* handle_ptr_{nullptr}; @@ -171,6 +173,15 @@ class cusparse_view_t { // reuse buffers for cusparse spmv rmm::device_uvector buffer_non_transpose; rmm::device_uvector buffer_transpose; + rmm::device_uvector buffer_non_transpose_spmvop; + rmm::device_uvector buffer_transpose_spmvop; + + // here for tests of compilation + cusparseSpMVOpDescr_t spmv_op_descr_A_; + cusparseSpMVOpDescr_t spmv_op_descr_A_t_; + cusparseSpMVOpPlan_t spmv_op_plan_A_; + cusparseSpMVOpPlan_t spmv_op_plan_A_t_; + // reuse buffers for cusparse spmm rmm::device_uvector buffer_transpose_batch; diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index d9dbb083f9..6bceb4ded7 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -465,17 +465,14 @@ void pdhg_solver_t::compute_At_y() } } if (!cusparse_view_.mixed_precision_enabled_) { - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T, - cusparse_view_.dual_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.current_AtY, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); + RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), + cusparse_view_.spmv_op_plan_A_t_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_solution, + cusparse_view_.current_AtY, + cusparse_view_.current_AtY)); } } else { RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm( @@ -513,18 +510,16 @@ void pdhg_solver_t::compute_A_x() } } if (!cusparse_view_.mixed_precision_enabled_) { - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A, - cusparse_view_.reflected_primal_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_gradient, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_non_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); + RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), + cusparse_view_.spmv_op_plan_A_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.reflected_primal_solution, + cusparse_view_.dual_gradient, + cusparse_view_.dual_gradient)); } + } else { RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm( handle_ptr_->get_cusparse_handle(), diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index 49c77e44dc..080f7c2a16 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -2316,6 +2316,7 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co compute_initial_primal_weight(); initial_scaling_strategy_.scale_problem(); + pdhg_solver_.get_cusparse_view().create_spmv_op_plans(settings_.hyper_params.use_reflected_primal_dual); // Update FP32 matrix copies for mixed precision SpMV after scaling pdhg_solver_.get_cusparse_view().update_mixed_precision_matrices(); From dd12e1a0c4ed780d2fd31b37745b32758f807be5 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Wed, 11 Mar 2026 13:50:42 +0100 Subject: [PATCH 02/21] small fix for full OK tests --- cpp/src/pdlp/pdhg.cu | 63 +++++++++++++++++++++++++++++++------------- cpp/src/pdlp/pdlp.cu | 3 ++- 2 files changed, 46 insertions(+), 20 deletions(-) diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index 6bceb4ded7..505566cb5e 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -463,16 +463,29 @@ void pdhg_solver_t::compute_At_y() cusparse_view_.buffer_transpose_mixed_.data(), stream_view_); } + else { + RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); + RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), + cusparse_view_.spmv_op_plan_A_t_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_solution, + cusparse_view_.current_AtY, + cusparse_view_.current_AtY)); + } } - if (!cusparse_view_.mixed_precision_enabled_) { - RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); - RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_t_, - reusable_device_scalar_value_1_.data(), - reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_solution, - cusparse_view_.current_AtY, - cusparse_view_.current_AtY)); + else { + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T, + cusparse_view_.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.current_AtY, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_transpose.data(), + stream_view_)); } } else { RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm( @@ -508,18 +521,30 @@ void pdhg_solver_t::compute_A_x() cusparse_view_.buffer_non_transpose_mixed_.data(), stream_view_); } + else { + RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); + RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), + cusparse_view_.spmv_op_plan_A_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.reflected_primal_solution, + cusparse_view_.dual_gradient, + cusparse_view_.dual_gradient)); + } } - if (!cusparse_view_.mixed_precision_enabled_) { - RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); - RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_, - reusable_device_scalar_value_1_.data(), - reusable_device_scalar_value_0_.data(), - cusparse_view_.reflected_primal_solution, - cusparse_view_.dual_gradient, - cusparse_view_.dual_gradient)); + else { + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A, + cusparse_view_.reflected_primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_non_transpose.data(), + stream_view_)); } - } else { RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm( handle_ptr_->get_cusparse_handle(), diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index 080f7c2a16..af14e8c4fb 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -2316,7 +2316,8 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co compute_initial_primal_weight(); initial_scaling_strategy_.scale_problem(); - pdhg_solver_.get_cusparse_view().create_spmv_op_plans(settings_.hyper_params.use_reflected_primal_dual); + if (!pdhg_solver_.get_cusparse_view().mixed_precision_enabled_ && !std::is_same_v) + pdhg_solver_.get_cusparse_view().create_spmv_op_plans(settings_.hyper_params.use_reflected_primal_dual); // Update FP32 matrix copies for mixed precision SpMV after scaling pdhg_solver_.get_cusparse_view().update_mixed_precision_matrices(); From 2439f92a8cb914a813aa9ee78e50bf5610776048 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Wed, 11 Mar 2026 14:11:43 +0100 Subject: [PATCH 03/21] coding style and updated dependencies. still need to conda install libnvjitlink-dev manually --- cpp/src/pdlp/cusparse_view.cu | 92 +++++++++++++++++------------------ cpp/src/pdlp/pdhg.cu | 60 +++++++++++------------ cpp/src/pdlp/pdlp.cu | 3 +- dependencies.yaml | 2 +- 4 files changed, 77 insertions(+), 80 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 27f8704eb1..a8b416070d 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -1041,69 +1041,67 @@ cusparse_view_t::cusparse_view_t( template void cusparse_view_t::create_spmv_op_plans(bool is_reflected) { - RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream())); + RAFT_CUSPARSE_TRY( + cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream())); // Prepare buffers for At_y SpMVOp size_t buffer_size_transpose = 0; - RAFT_CUSPARSE_TRY( - cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - A_T, - dual_solution, - current_AtY, - current_AtY, - CUDA_R_64F, - &buffer_size_transpose)); + RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_T, + dual_solution, + current_AtY, + current_AtY, + CUDA_R_64F, + &buffer_size_transpose)); buffer_transpose_spmvop.resize(buffer_size_transpose, handle_ptr_->get_stream()); RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr(handle_ptr_->get_cusparse_handle(), - &spmv_op_descr_A_t_, - CUSPARSE_OPERATION_NON_TRANSPOSE, - A_T, - dual_solution, - current_AtY, - current_AtY, - CUDA_R_64F, - buffer_transpose_spmvop.data())); - - char* lto_buffer = NULL; + &spmv_op_descr_A_t_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_T, + dual_solution, + current_AtY, + current_AtY, + CUDA_R_64F, + buffer_transpose_spmvop.data())); + + char* lto_buffer = NULL; size_t lto_buffer_size = 0; RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle_ptr_->get_cusparse_handle(), - spmv_op_descr_A_t_, - &spmv_op_plan_A_t_, - lto_buffer, - lto_buffer_size)); + spmv_op_descr_A_t_, + &spmv_op_plan_A_t_, + lto_buffer, + lto_buffer_size)); // Only prepare buffers for A_x if using reflected_halpern - if (is_reflected) - { + if (is_reflected) { size_t buffer_size_non_transpose = 0; - RAFT_CUSPARSE_TRY( - cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - A, - reflected_primal_solution, - dual_gradient, - dual_gradient, - CUDA_R_64F, - &buffer_size_non_transpose)); + RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A, + reflected_primal_solution, + dual_gradient, + dual_gradient, + CUDA_R_64F, + &buffer_size_non_transpose)); buffer_non_transpose_spmvop.resize(buffer_size_non_transpose, handle_ptr_->get_stream()); RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr(handle_ptr_->get_cusparse_handle(), - &spmv_op_descr_A_, - CUSPARSE_OPERATION_NON_TRANSPOSE, - A, - reflected_primal_solution, - dual_gradient, - dual_gradient, - CUDA_R_64F, - buffer_non_transpose_spmvop.data())); + &spmv_op_descr_A_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A, + reflected_primal_solution, + dual_gradient, + dual_gradient, + CUDA_R_64F, + buffer_non_transpose_spmvop.data())); RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle_ptr_->get_cusparse_handle(), - spmv_op_descr_A_, - &spmv_op_plan_A_, - lto_buffer, - lto_buffer_size)); + spmv_op_descr_A_, + &spmv_op_plan_A_, + lto_buffer, + lto_buffer_size)); } } diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index 505566cb5e..23a10f9368 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -462,30 +462,29 @@ void pdhg_solver_t::compute_At_y() CUSPARSE_SPMV_CSR_ALG2, cusparse_view_.buffer_transpose_mixed_.data(), stream_view_); - } - else { - RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); + } else { + RAFT_CUSPARSE_TRY( + cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_t_, - reusable_device_scalar_value_1_.data(), - reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_solution, - cusparse_view_.current_AtY, - cusparse_view_.current_AtY)); + cusparse_view_.spmv_op_plan_A_t_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_solution, + cusparse_view_.current_AtY, + cusparse_view_.current_AtY)); } - } - else { + } else { RAFT_CUSPARSE_TRY( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T, - cusparse_view_.dual_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.current_AtY, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_transpose.data(), - stream_view_)); + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T, + cusparse_view_.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.current_AtY, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_transpose.data(), + stream_view_)); } } else { RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm( @@ -520,19 +519,18 @@ void pdhg_solver_t::compute_A_x() CUSPARSE_SPMV_CSR_ALG2, cusparse_view_.buffer_non_transpose_mixed_.data(), stream_view_); - } - else { - RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); + } else { + RAFT_CUSPARSE_TRY( + cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_, - reusable_device_scalar_value_1_.data(), - reusable_device_scalar_value_0_.data(), - cusparse_view_.reflected_primal_solution, - cusparse_view_.dual_gradient, - cusparse_view_.dual_gradient)); + cusparse_view_.spmv_op_plan_A_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.reflected_primal_solution, + cusparse_view_.dual_gradient, + cusparse_view_.dual_gradient)); } - } - else { + } else { RAFT_CUSPARSE_TRY( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index af14e8c4fb..a393b73379 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -2317,7 +2317,8 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co initial_scaling_strategy_.scale_problem(); if (!pdhg_solver_.get_cusparse_view().mixed_precision_enabled_ && !std::is_same_v) - pdhg_solver_.get_cusparse_view().create_spmv_op_plans(settings_.hyper_params.use_reflected_primal_dual); + pdhg_solver_.get_cusparse_view().create_spmv_op_plans( + settings_.hyper_params.use_reflected_primal_dual); // Update FP32 matrix copies for mixed precision SpMV after scaling pdhg_solver_.get_cusparse_view().update_mixed_precision_matrices(); diff --git a/dependencies.yaml b/dependencies.yaml index 1601058cf2..6a7427b193 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -667,7 +667,7 @@ dependencies: packages: - cuda-version=13.0 - matrix: - cuda: "13.1" + cuda: "13.2" packages: - cuda-version=13.1 - matrix: From 86be5b27eea608e30074ef720ccee7822310032b Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 23 Mar 2026 14:04:39 +0100 Subject: [PATCH 04/21] updated dependencies.yaml --- dependencies.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/dependencies.yaml b/dependencies.yaml index 6a7427b193..e29b55dc24 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -733,6 +733,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - cuda-nvtx-dev + - libnvjitlink-dev cuda_wheels: From 81b8f779eb12a3202759e3b911d37750d0867cc0 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 23 Mar 2026 14:19:49 +0100 Subject: [PATCH 05/21] added comments and removed a useless set_stream --- cpp/src/pdlp/cusparse_view.cu | 5 +---- cpp/src/pdlp/cusparse_view.hpp | 6 +++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index a8b416070d..fe5c59de2f 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -1041,9 +1041,6 @@ cusparse_view_t::cusparse_view_t( template void cusparse_view_t::create_spmv_op_plans(bool is_reflected) { - RAFT_CUSPARSE_TRY( - cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream())); - // Prepare buffers for At_y SpMVOp size_t buffer_size_transpose = 0; RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), @@ -1074,7 +1071,7 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) lto_buffer, lto_buffer_size)); - // Only prepare buffers for A_x if using reflected_halpern + // Only prepare buffers for A_x if we are using reflected_halpern if (is_reflected) { size_t buffer_size_non_transpose = 0; RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index 96cad98fbe..6a4c6423ba 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -120,8 +120,6 @@ class cusparse_view_t { const rmm::device_uvector&, // Empty just to init the const& const std::vector&); // Empty just to init the const& - void create_spmv_op_plans(bool is_reflected); - const bool batch_mode_{false}; raft::handle_t const* handle_ptr_{nullptr}; @@ -176,7 +174,7 @@ class cusparse_view_t { rmm::device_uvector buffer_non_transpose_spmvop; rmm::device_uvector buffer_transpose_spmvop; - // here for tests of compilation + // SpMVOp descriptors and plans for A and A_T cusparseSpMVOpDescr_t spmv_op_descr_A_; cusparseSpMVOpDescr_t spmv_op_descr_A_t_; cusparseSpMVOpPlan_t spmv_op_plan_A_; @@ -223,6 +221,8 @@ class cusparse_view_t { // Redirects the cuSPARSE CSR structure pointers from op_problem_scaled_ to the original problem // so the duplicated row/column buffers can be freed. void redirect_cusparse_csr_structure_pointers(const problem_t& original_problem); + // Creates SpMVOp plans. Must be called after scale_problem() so plans use the scaled matrix. + void create_spmv_op_plans(bool is_reflected); }; // Mixed precision SpMV: FP32 matrix with FP64 vectors and FP64 compute type From c5f8db26fa1ca88bef4b5cb88f9ac0835db1bcb6 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 23 Mar 2026 15:23:10 +0100 Subject: [PATCH 06/21] moved create_spmv_op_plans and added libnvjitlink-dev --- cpp/src/pdlp/cusparse_view.cu | 130 ++++++++++++++++----------------- cpp/src/pdlp/cusparse_view.hpp | 1 - 2 files changed, 65 insertions(+), 66 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index fe5c59de2f..ce95c489e2 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -1037,71 +1037,6 @@ cusparse_view_t::cusparse_view_t( #endif } -// Creates SpMVOp plans. Must be called after scale_problem() so plans use the scaled matrix. -template -void cusparse_view_t::create_spmv_op_plans(bool is_reflected) -{ - // Prepare buffers for At_y SpMVOp - size_t buffer_size_transpose = 0; - RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - A_T, - dual_solution, - current_AtY, - current_AtY, - CUDA_R_64F, - &buffer_size_transpose)); - buffer_transpose_spmvop.resize(buffer_size_transpose, handle_ptr_->get_stream()); - - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr(handle_ptr_->get_cusparse_handle(), - &spmv_op_descr_A_t_, - CUSPARSE_OPERATION_NON_TRANSPOSE, - A_T, - dual_solution, - current_AtY, - current_AtY, - CUDA_R_64F, - buffer_transpose_spmvop.data())); - - char* lto_buffer = NULL; - size_t lto_buffer_size = 0; - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle_ptr_->get_cusparse_handle(), - spmv_op_descr_A_t_, - &spmv_op_plan_A_t_, - lto_buffer, - lto_buffer_size)); - - // Only prepare buffers for A_x if we are using reflected_halpern - if (is_reflected) { - size_t buffer_size_non_transpose = 0; - RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - A, - reflected_primal_solution, - dual_gradient, - dual_gradient, - CUDA_R_64F, - &buffer_size_non_transpose)); - buffer_non_transpose_spmvop.resize(buffer_size_non_transpose, handle_ptr_->get_stream()); - - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr(handle_ptr_->get_cusparse_handle(), - &spmv_op_descr_A_, - CUSPARSE_OPERATION_NON_TRANSPOSE, - A, - reflected_primal_solution, - dual_gradient, - dual_gradient, - CUDA_R_64F, - buffer_non_transpose_spmvop.data())); - - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle_ptr_->get_cusparse_handle(), - spmv_op_descr_A_, - &spmv_op_plan_A_, - lto_buffer, - lto_buffer_size)); - } -} - // Empty constructor used in kkt restart to save memory template cusparse_view_t::cusparse_view_t( @@ -1255,6 +1190,71 @@ bool is_cusparse_runtime_mixed_precision_supported() return (major > 12) || (major == 12 && minor >= 5); } +// Creates SpMVOp plans. Must be called after scale_problem() so plans use the scaled matrix. +template +void cusparse_view_t::create_spmv_op_plans(bool is_reflected) +{ + // Prepare buffers for At_y SpMVOp + size_t buffer_size_transpose = 0; + RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_T, + dual_solution, + current_AtY, + current_AtY, + CUDA_R_64F, + &buffer_size_transpose)); + buffer_transpose_spmvop.resize(buffer_size_transpose, handle_ptr_->get_stream()); + + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr(handle_ptr_->get_cusparse_handle(), + &spmv_op_descr_A_t_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_T, + dual_solution, + current_AtY, + current_AtY, + CUDA_R_64F, + buffer_transpose_spmvop.data())); + + char* lto_buffer = NULL; + size_t lto_buffer_size = 0; + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle_ptr_->get_cusparse_handle(), + spmv_op_descr_A_t_, + &spmv_op_plan_A_t_, + lto_buffer, + lto_buffer_size)); + + // Only prepare buffers for A_x if we are using reflected_halpern + if (is_reflected) { + size_t buffer_size_non_transpose = 0; + RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A, + reflected_primal_solution, + dual_gradient, + dual_gradient, + CUDA_R_64F, + &buffer_size_non_transpose)); + buffer_non_transpose_spmvop.resize(buffer_size_non_transpose, handle_ptr_->get_stream()); + + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr(handle_ptr_->get_cusparse_handle(), + &spmv_op_descr_A_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + A, + reflected_primal_solution, + dual_gradient, + dual_gradient, + CUDA_R_64F, + buffer_non_transpose_spmvop.data())); + + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle_ptr_->get_cusparse_handle(), + spmv_op_descr_A_, + &spmv_op_plan_A_, + lto_buffer, + lto_buffer_size)); + } +} + #if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class cusparse_sp_mat_descr_wrapper_t; template class cusparse_dn_vec_descr_wrapper_t; diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index 6a4c6423ba..40a3c75a8f 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -180,7 +180,6 @@ class cusparse_view_t { cusparseSpMVOpPlan_t spmv_op_plan_A_; cusparseSpMVOpPlan_t spmv_op_plan_A_t_; - // reuse buffers for cusparse spmm rmm::device_uvector buffer_transpose_batch; rmm::device_uvector buffer_non_transpose_batch; From 583da596e7a052afcbd31c746f124d37c89c75a5 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 23 Mar 2026 16:54:11 +0100 Subject: [PATCH 07/21] added guards for spmvop compilation --- cpp/src/pdlp/cusparse_view.cu | 2 ++ cpp/src/pdlp/cusparse_view.hpp | 7 ++++++- cpp/src/pdlp/pdhg.cu | 28 ++++++++++++++++++++++++++++ cpp/src/pdlp/pdlp.cu | 9 ++++++--- 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index ce95c489e2..8ec9ab7a4d 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -1194,6 +1194,7 @@ bool is_cusparse_runtime_mixed_precision_supported() template void cusparse_view_t::create_spmv_op_plans(bool is_reflected) { +#if CUDA_VER_13_2_UP // Prepare buffers for At_y SpMVOp size_t buffer_size_transpose = 0; RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), @@ -1253,6 +1254,7 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) lto_buffer, lto_buffer_size)); } +#endif } #if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index 40a3c75a8f..a24229c790 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -20,6 +20,8 @@ #include +#define CUDA_VER_13_2_UP (CUDART_VERSION >= 13020) + namespace cuopt::linear_programming::detail { template @@ -171,6 +173,9 @@ class cusparse_view_t { // reuse buffers for cusparse spmv rmm::device_uvector buffer_non_transpose; rmm::device_uvector buffer_transpose; + +#if CUDA_VER_13_2_UP + // SpMVOp buffers for A and A_T rmm::device_uvector buffer_non_transpose_spmvop; rmm::device_uvector buffer_transpose_spmvop; @@ -179,7 +184,7 @@ class cusparse_view_t { cusparseSpMVOpDescr_t spmv_op_descr_A_t_; cusparseSpMVOpPlan_t spmv_op_plan_A_; cusparseSpMVOpPlan_t spmv_op_plan_A_t_; - +#endif // reuse buffers for cusparse spmm rmm::device_uvector buffer_transpose_batch; rmm::device_uvector buffer_non_transpose_batch; diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index 23a10f9368..6dc2d42bd3 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -463,6 +463,7 @@ void pdhg_solver_t::compute_At_y() cusparse_view_.buffer_transpose_mixed_.data(), stream_view_); } else { +#if CUDA_VER_13_2_UP RAFT_CUSPARSE_TRY( cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), @@ -472,6 +473,19 @@ void pdhg_solver_t::compute_At_y() cusparse_view_.dual_solution, cusparse_view_.current_AtY, cusparse_view_.current_AtY)); +#else + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T, + cusparse_view_.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.current_AtY, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_transpose.data(), + stream_view_)); +#endif } } else { RAFT_CUSPARSE_TRY( @@ -520,6 +534,7 @@ void pdhg_solver_t::compute_A_x() cusparse_view_.buffer_non_transpose_mixed_.data(), stream_view_); } else { +#if CUDA_VER_13_2_UP RAFT_CUSPARSE_TRY( cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), @@ -529,6 +544,19 @@ void pdhg_solver_t::compute_A_x() cusparse_view_.reflected_primal_solution, cusparse_view_.dual_gradient, cusparse_view_.dual_gradient)); +#else + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A, + cusparse_view_.reflected_primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_non_transpose.data(), + stream_view_)); +#endif } } else { RAFT_CUSPARSE_TRY( diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index a393b73379..fd0cc9ffcd 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -2316,9 +2316,12 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co compute_initial_primal_weight(); initial_scaling_strategy_.scale_problem(); - if (!pdhg_solver_.get_cusparse_view().mixed_precision_enabled_ && !std::is_same_v) - pdhg_solver_.get_cusparse_view().create_spmv_op_plans( - settings_.hyper_params.use_reflected_primal_dual); + if constexpr (std::is_same_v) { + if (!batch_mode_ && !pdhg_solver_.get_cusparse_view().mixed_precision_enabled_) { + pdhg_solver_.get_cusparse_view().create_spmv_op_plans( + settings_.hyper_params.use_reflected_primal_dual); + } + } // Update FP32 matrix copies for mixed precision SpMV after scaling pdhg_solver_.get_cusparse_view().update_mixed_precision_matrices(); From 4f041f1636f8c92aa3885e1b577a86c31d59f1d2 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Tue, 24 Mar 2026 10:35:43 +0100 Subject: [PATCH 08/21] added destructors --- cpp/src/pdlp/cusparse_view.cu | 17 +++++++++++++++++ cpp/src/pdlp/cusparse_view.hpp | 10 ++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 8ec9ab7a4d..9317ca75fd 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -1037,6 +1037,23 @@ cusparse_view_t::cusparse_view_t( #endif } +template +cusparse_view_t::~cusparse_view_t() +{ +#if CUDA_VER_13_2_UP + if (spmv_op_plan_A_t_) { + RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyPlan(spmv_op_plan_A_t_)); + } + if (spmv_op_descr_A_t_) { + RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyDescr(spmv_op_descr_A_t_)); + } + if (spmv_op_plan_A_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyPlan(spmv_op_plan_A_)); } + if (spmv_op_descr_A_) { + RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyDescr(spmv_op_descr_A_)); + } +#endif +} + // Empty constructor used in kkt restart to save memory template cusparse_view_t::cusparse_view_t( diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index a24229c790..38d44f557f 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -122,6 +122,8 @@ class cusparse_view_t { const rmm::device_uvector&, // Empty just to init the const& const std::vector&); // Empty just to init the const& + ~cusparse_view_t(); + const bool batch_mode_{false}; raft::handle_t const* handle_ptr_{nullptr}; @@ -180,10 +182,10 @@ class cusparse_view_t { rmm::device_uvector buffer_transpose_spmvop; // SpMVOp descriptors and plans for A and A_T - cusparseSpMVOpDescr_t spmv_op_descr_A_; - cusparseSpMVOpDescr_t spmv_op_descr_A_t_; - cusparseSpMVOpPlan_t spmv_op_plan_A_; - cusparseSpMVOpPlan_t spmv_op_plan_A_t_; + cusparseSpMVOpDescr_t spmv_op_descr_A_{nullptr}; + cusparseSpMVOpDescr_t spmv_op_descr_A_t_{nullptr}; + cusparseSpMVOpPlan_t spmv_op_plan_A_{nullptr}; + cusparseSpMVOpPlan_t spmv_op_plan_A_t_{nullptr}; #endif // reuse buffers for cusparse spmm rmm::device_uvector buffer_transpose_batch; From 2a1360daef4a5b85a85757907d4febff777683d5 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Tue, 24 Mar 2026 11:49:55 +0100 Subject: [PATCH 09/21] moved buffer_spmvop to allow <13.2 compilation --- cpp/src/pdlp/cusparse_view.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index 38d44f557f..ab6afecfe5 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -176,11 +176,11 @@ class cusparse_view_t { rmm::device_uvector buffer_non_transpose; rmm::device_uvector buffer_transpose; -#if CUDA_VER_13_2_UP // SpMVOp buffers for A and A_T - rmm::device_uvector buffer_non_transpose_spmvop; - rmm::device_uvector buffer_transpose_spmvop; - + rmm::device_uvector buffer_non_transpose_spmvop{0, handle_ptr_->get_stream()}; + rmm::device_uvector buffer_transpose_spmvop{0, handle_ptr_->get_stream()}; + +#if CUDA_VER_13_2_UP // SpMVOp descriptors and plans for A and A_T cusparseSpMVOpDescr_t spmv_op_descr_A_{nullptr}; cusparseSpMVOpDescr_t spmv_op_descr_A_t_{nullptr}; From 96a524ad0a0bd317df08f9f348eef5207cc639cb Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Wed, 25 Mar 2026 11:38:15 +0100 Subject: [PATCH 10/21] removed cusparse_view destructor and added spmvop plamd and descriptors wrappers --- cpp/src/pdlp/cusparse_view.cu | 152 +++++++++++++++++++++++---------- cpp/src/pdlp/cusparse_view.hpp | 60 +++++++++++-- 2 files changed, 160 insertions(+), 52 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 9317ca75fd..e33f5b8e3a 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -153,6 +153,92 @@ cusparse_dn_mat_descr_wrapper_t::operator cusparseDnMatDescr_t() const return descr_; } +#if CUDA_VER_13_2_UP +cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t() + : descr_(nullptr), need_destruction_(false) +{ +} + +cusparse_spmvop_descr_wrapper_t::~cusparse_spmvop_descr_wrapper_t() +{ + if (need_destruction_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyDescr(descr_)); } +} + +cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t( + const cusparse_spmvop_descr_wrapper_t& other) + : descr_(other.descr_), need_destruction_(false) +{ +} + +cusparse_spmvop_descr_wrapper_t& cusparse_spmvop_descr_wrapper_t::operator=( + cusparse_spmvop_descr_wrapper_t&& other) +{ + if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyDescr(descr_)); } + descr_ = other.descr_; + need_destruction_ = other.need_destruction_; + other.need_destruction_ = false; + return *this; +} + + +void cusparse_spmvop_descr_wrapper_t::create(cusparseHandle_t handle, + cusparseOperation_t opA, + cusparseSpMatDescr_t matA, + cusparseDnVecDescr_t vecX, + cusparseDnVecDescr_t vecY, + cusparseDnVecDescr_t vecZ, + cudaDataType computeType, + void* buffer) +{ + if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyDescr(descr_)); } + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr( + handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer)); + need_destruction_ = true; +} + +cusparse_spmvop_descr_wrapper_t::operator cusparseSpMVOpDescr_t() const { return descr_; } + +cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t() + : plan_(nullptr), need_destruction_(false) +{ +} + +cusparse_spmvop_plan_wrapper_t::~cusparse_spmvop_plan_wrapper_t() +{ + if (need_destruction_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyPlan(plan_)); } +} + +cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t( + const cusparse_spmvop_plan_wrapper_t& other) + : plan_(other.plan_), need_destruction_(false) +{ +} + +cusparse_spmvop_plan_wrapper_t& cusparse_spmvop_plan_wrapper_t::operator=( + cusparse_spmvop_plan_wrapper_t&& other) +{ + if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyPlan(plan_)); } + plan_ = other.plan_; + need_destruction_ = other.need_destruction_; + other.need_destruction_ = false; + return *this; +} + +void cusparse_spmvop_plan_wrapper_t::create(cusparseHandle_t handle, + cusparseSpMVOpDescr_t descr, + char* lto_buffer, + size_t lto_buffer_size) +{ + if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyPlan(plan_)); } + RAFT_CUSPARSE_TRY( + cusparseSpMVOp_createPlan(handle, descr, &plan_, lto_buffer, lto_buffer_size)); + need_destruction_ = true; +} + +cusparse_spmvop_plan_wrapper_t::operator cusparseSpMVOpPlan_t() const { return plan_; } + +#endif + #if CUDA_VER_12_4_UP struct dynamic_load_runtime { static void* get_cusparse_runtime_handle() @@ -1037,22 +1123,6 @@ cusparse_view_t::cusparse_view_t( #endif } -template -cusparse_view_t::~cusparse_view_t() -{ -#if CUDA_VER_13_2_UP - if (spmv_op_plan_A_t_) { - RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyPlan(spmv_op_plan_A_t_)); - } - if (spmv_op_descr_A_t_) { - RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyDescr(spmv_op_descr_A_t_)); - } - if (spmv_op_plan_A_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyPlan(spmv_op_plan_A_)); } - if (spmv_op_descr_A_) { - RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyDescr(spmv_op_descr_A_)); - } -#endif -} // Empty constructor used in kkt restart to save memory template @@ -1224,23 +1294,19 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) &buffer_size_transpose)); buffer_transpose_spmvop.resize(buffer_size_transpose, handle_ptr_->get_stream()); - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr(handle_ptr_->get_cusparse_handle(), - &spmv_op_descr_A_t_, - CUSPARSE_OPERATION_NON_TRANSPOSE, - A_T, - dual_solution, - current_AtY, - current_AtY, - CUDA_R_64F, - buffer_transpose_spmvop.data())); + spmv_op_descr_A_t_.create(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_T, + dual_solution, + current_AtY, + current_AtY, + CUDA_R_64F, + buffer_transpose_spmvop.data()); char* lto_buffer = NULL; size_t lto_buffer_size = 0; - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle_ptr_->get_cusparse_handle(), - spmv_op_descr_A_t_, - &spmv_op_plan_A_t_, - lto_buffer, - lto_buffer_size)); + spmv_op_plan_A_t_.create( + handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_t_, lto_buffer, lto_buffer_size); // Only prepare buffers for A_x if we are using reflected_halpern if (is_reflected) { @@ -1255,21 +1321,17 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) &buffer_size_non_transpose)); buffer_non_transpose_spmvop.resize(buffer_size_non_transpose, handle_ptr_->get_stream()); - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr(handle_ptr_->get_cusparse_handle(), - &spmv_op_descr_A_, - CUSPARSE_OPERATION_NON_TRANSPOSE, - A, - reflected_primal_solution, - dual_gradient, - dual_gradient, - CUDA_R_64F, - buffer_non_transpose_spmvop.data())); - - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle_ptr_->get_cusparse_handle(), - spmv_op_descr_A_, - &spmv_op_plan_A_, - lto_buffer, - lto_buffer_size)); + spmv_op_descr_A_.create(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A, + reflected_primal_solution, + dual_gradient, + dual_gradient, + CUDA_R_64F, + buffer_non_transpose_spmvop.data()); + + spmv_op_plan_A_.create( + handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_, lto_buffer, lto_buffer_size); } #endif } diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index ab6afecfe5..ba14a1b555 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -81,6 +81,54 @@ class cusparse_dn_mat_descr_wrapper_t { bool need_destruction_; }; +#if CUDA_VER_13_2_UP +class cusparse_spmvop_descr_wrapper_t { + public: + cusparse_spmvop_descr_wrapper_t(); + ~cusparse_spmvop_descr_wrapper_t(); + + cusparse_spmvop_descr_wrapper_t(const cusparse_spmvop_descr_wrapper_t& other); + cusparse_spmvop_descr_wrapper_t& operator=(cusparse_spmvop_descr_wrapper_t&& other); + cusparse_spmvop_descr_wrapper_t& operator=(const cusparse_spmvop_descr_wrapper_t& other) = delete; + + void create(cusparseHandle_t handle, + cusparseOperation_t opA, + cusparseSpMatDescr_t matA, + cusparseDnVecDescr_t vecX, + cusparseDnVecDescr_t vecY, + cusparseDnVecDescr_t vecZ, + cudaDataType computeType, + void* buffer); + + operator cusparseSpMVOpDescr_t() const; + + private: + cusparseSpMVOpDescr_t descr_; + bool need_destruction_; +}; + +class cusparse_spmvop_plan_wrapper_t { + public: + cusparse_spmvop_plan_wrapper_t(); + ~cusparse_spmvop_plan_wrapper_t(); + + cusparse_spmvop_plan_wrapper_t(const cusparse_spmvop_plan_wrapper_t& other); + cusparse_spmvop_plan_wrapper_t& operator=(cusparse_spmvop_plan_wrapper_t&& other); + cusparse_spmvop_plan_wrapper_t& operator=(const cusparse_spmvop_plan_wrapper_t& other) = delete; + + void create(cusparseHandle_t handle, + cusparseSpMVOpDescr_t descr, + char* lto_buffer, + size_t lto_buffer_size); + + operator cusparseSpMVOpPlan_t() const; + + private: + cusparseSpMVOpPlan_t plan_; + bool need_destruction_; +}; +#endif + template class cusparse_view_t { public: @@ -122,8 +170,6 @@ class cusparse_view_t { const rmm::device_uvector&, // Empty just to init the const& const std::vector&); // Empty just to init the const& - ~cusparse_view_t(); - const bool batch_mode_{false}; raft::handle_t const* handle_ptr_{nullptr}; @@ -181,11 +227,11 @@ class cusparse_view_t { rmm::device_uvector buffer_transpose_spmvop{0, handle_ptr_->get_stream()}; #if CUDA_VER_13_2_UP - // SpMVOp descriptors and plans for A and A_T - cusparseSpMVOpDescr_t spmv_op_descr_A_{nullptr}; - cusparseSpMVOpDescr_t spmv_op_descr_A_t_{nullptr}; - cusparseSpMVOpPlan_t spmv_op_plan_A_{nullptr}; - cusparseSpMVOpPlan_t spmv_op_plan_A_t_{nullptr}; + // SpMVOp descriptors and plans for A and A_T (descr before plan so dtor destroys plan first) + cusparse_spmvop_descr_wrapper_t spmv_op_descr_A_; + cusparse_spmvop_plan_wrapper_t spmv_op_plan_A_; + cusparse_spmvop_descr_wrapper_t spmv_op_descr_A_t_; + cusparse_spmvop_plan_wrapper_t spmv_op_plan_A_t_; #endif // reuse buffers for cusparse spmm rmm::device_uvector buffer_transpose_batch; From 6b8e78ad714e1538c57ee68acf51266eb3a209a6 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Wed, 25 Mar 2026 13:21:37 +0100 Subject: [PATCH 11/21] added cusparseSetStream for safety --- cpp/src/pdlp/cusparse_view.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index e33f5b8e3a..2f541bd61a 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -1282,6 +1282,7 @@ template void cusparse_view_t::create_spmv_op_plans(bool is_reflected) { #if CUDA_VER_13_2_UP + CUSPARSE_CHECK(cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream())); // Prepare buffers for At_y SpMVOp size_t buffer_size_transpose = 0; RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), From a85b10f1aed4a893c5b2cd76076955d78ac9d025 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Wed, 25 Mar 2026 13:59:00 +0100 Subject: [PATCH 12/21] made compute At_y and compute A_x spmv choosing more readable --- cpp/src/pdlp/pdhg.cu | 102 +++++++++++++++++++++++------------------- cpp/src/pdlp/pdhg.hpp | 3 ++ 2 files changed, 59 insertions(+), 46 deletions(-) diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index 6dc2d42bd3..8e371062fe 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -444,6 +444,60 @@ void pdhg_solver_t::compute_next_dual_solution(rmm::device_uvector +void pdhg_solver_t::spmvop_At_y() +{ +#if CUDA_VER_13_2_UP + RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); + RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), + cusparse_view_.spmv_op_plan_A_t_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_solution, + cusparse_view_.current_AtY, + cusparse_view_.current_AtY)); +#else + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T, + cusparse_view_.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.current_AtY, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_transpose.data(), + stream_view_)); +#endif +} + +template +void pdhg_solver_t::spmvop_A_x() +{ +#if CUDA_VER_13_2_UP + RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); + RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), + cusparse_view_.spmv_op_plan_A_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.reflected_primal_solution, + cusparse_view_.dual_gradient, + cusparse_view_.dual_gradient)); +#else + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A, + cusparse_view_.reflected_primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_non_transpose.data(), + stream_view_)); +#endif +} + template void pdhg_solver_t::compute_At_y() { @@ -463,29 +517,7 @@ void pdhg_solver_t::compute_At_y() cusparse_view_.buffer_transpose_mixed_.data(), stream_view_); } else { -#if CUDA_VER_13_2_UP - RAFT_CUSPARSE_TRY( - cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); - RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_t_, - reusable_device_scalar_value_1_.data(), - reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_solution, - cusparse_view_.current_AtY, - cusparse_view_.current_AtY)); -#else - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T, - cusparse_view_.dual_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.current_AtY, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_transpose.data(), - stream_view_)); -#endif + spmvop_At_y(); } } else { RAFT_CUSPARSE_TRY( @@ -534,29 +566,7 @@ void pdhg_solver_t::compute_A_x() cusparse_view_.buffer_non_transpose_mixed_.data(), stream_view_); } else { -#if CUDA_VER_13_2_UP - RAFT_CUSPARSE_TRY( - cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); - RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_, - reusable_device_scalar_value_1_.data(), - reusable_device_scalar_value_0_.data(), - cusparse_view_.reflected_primal_solution, - cusparse_view_.dual_gradient, - cusparse_view_.dual_gradient)); -#else - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A, - cusparse_view_.reflected_primal_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_gradient, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_non_transpose.data(), - stream_view_)); -#endif + spmvop_A_x(); } } else { RAFT_CUSPARSE_TRY( diff --git a/cpp/src/pdlp/pdhg.hpp b/cpp/src/pdlp/pdhg.hpp index 52f45dc83d..d16400bd3b 100644 --- a/cpp/src/pdlp/pdhg.hpp +++ b/cpp/src/pdlp/pdhg.hpp @@ -95,7 +95,10 @@ class pdhg_solver_t { void compute_primal_projection(rmm::device_uvector& primal_step_size); void compute_At_y(); void compute_A_x(); + void spmvop_At_y(); + void spmvop_A_x(); + void my_spmvop(f_t* alpha, f_t* A, f_t* x, f_t* beta, f_t* y, f_t* result); bool batch_mode_{false}; raft::handle_t const* handle_ptr_{nullptr}; rmm::cuda_stream_view stream_view_; From 68922b00d00197f913163dcebe211ab26a21c286 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Fri, 15 May 2026 12:58:02 +0200 Subject: [PATCH 13/21] update broken dependency --- dependencies.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index e29b55dc24..b7463fe8cc 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -667,7 +667,7 @@ dependencies: packages: - cuda-version=13.0 - matrix: - cuda: "13.2" + cuda: "13.1" packages: - cuda-version=13.1 - matrix: From 1880f23b5a134294a10a8d042c9ee5b7475e98fe Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Fri, 15 May 2026 12:59:23 +0200 Subject: [PATCH 14/21] pre-commit run --- .../all_cuda-129_arch-aarch64.yaml | 1 + .../all_cuda-129_arch-x86_64.yaml | 1 + .../all_cuda-132_arch-aarch64.yaml | 1 + .../all_cuda-132_arch-x86_64.yaml | 1 + cpp/src/pdlp/cusparse_view.cu | 9 +++----- cpp/src/pdlp/cusparse_view.hpp | 8 +++---- cpp/src/pdlp/pdhg.cu | 21 +++++++++---------- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 145850d4d8..acc652407d 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -35,6 +35,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libgrpc >=1.78.0,<1.80.0a0 +- libnvjitlink-dev - libprotobuf - libraft-headers==26.6.*,>=0.0.0a0 - librmm==26.6.*,>=0.0.0a0 diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 293b49fbea..ae6772ccae 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -35,6 +35,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libgrpc >=1.78.0,<1.80.0a0 +- libnvjitlink-dev - libprotobuf - libraft-headers==26.6.*,>=0.0.0a0 - librmm==26.6.*,>=0.0.0a0 diff --git a/conda/environments/all_cuda-132_arch-aarch64.yaml b/conda/environments/all_cuda-132_arch-aarch64.yaml index fa8844a1f9..17d986feee 100644 --- a/conda/environments/all_cuda-132_arch-aarch64.yaml +++ b/conda/environments/all_cuda-132_arch-aarch64.yaml @@ -35,6 +35,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libgrpc >=1.78.0,<1.80.0a0 +- libnvjitlink-dev - libprotobuf - libraft-headers==26.6.*,>=0.0.0a0 - librmm==26.6.*,>=0.0.0a0 diff --git a/conda/environments/all_cuda-132_arch-x86_64.yaml b/conda/environments/all_cuda-132_arch-x86_64.yaml index a37d8718c0..62d876e71d 100644 --- a/conda/environments/all_cuda-132_arch-x86_64.yaml +++ b/conda/environments/all_cuda-132_arch-x86_64.yaml @@ -35,6 +35,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libgrpc >=1.78.0,<1.80.0a0 +- libnvjitlink-dev - libprotobuf - libraft-headers==26.6.*,>=0.0.0a0 - librmm==26.6.*,>=0.0.0a0 diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 2f541bd61a..18d2650cf4 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -180,7 +180,6 @@ cusparse_spmvop_descr_wrapper_t& cusparse_spmvop_descr_wrapper_t::operator=( return *this; } - void cusparse_spmvop_descr_wrapper_t::create(cusparseHandle_t handle, cusparseOperation_t opA, cusparseSpMatDescr_t matA, @@ -191,8 +190,8 @@ void cusparse_spmvop_descr_wrapper_t::create(cusparseHandle_t handle, void* buffer) { if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyDescr(descr_)); } - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr( - handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer)); + RAFT_CUSPARSE_TRY( + cusparseSpMVOp_createDescr(handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer)); need_destruction_ = true; } @@ -230,8 +229,7 @@ void cusparse_spmvop_plan_wrapper_t::create(cusparseHandle_t handle, size_t lto_buffer_size) { if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyPlan(plan_)); } - RAFT_CUSPARSE_TRY( - cusparseSpMVOp_createPlan(handle, descr, &plan_, lto_buffer, lto_buffer_size)); + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle, descr, &plan_, lto_buffer, lto_buffer_size)); need_destruction_ = true; } @@ -1123,7 +1121,6 @@ cusparse_view_t::cusparse_view_t( #endif } - // Empty constructor used in kkt restart to save memory template cusparse_view_t::cusparse_view_t( diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index ba14a1b555..23e28e11b3 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -117,9 +117,9 @@ class cusparse_spmvop_plan_wrapper_t { cusparse_spmvop_plan_wrapper_t& operator=(const cusparse_spmvop_plan_wrapper_t& other) = delete; void create(cusparseHandle_t handle, - cusparseSpMVOpDescr_t descr, - char* lto_buffer, - size_t lto_buffer_size); + cusparseSpMVOpDescr_t descr, + char* lto_buffer, + size_t lto_buffer_size); operator cusparseSpMVOpPlan_t() const; @@ -225,7 +225,7 @@ class cusparse_view_t { // SpMVOp buffers for A and A_T rmm::device_uvector buffer_non_transpose_spmvop{0, handle_ptr_->get_stream()}; rmm::device_uvector buffer_transpose_spmvop{0, handle_ptr_->get_stream()}; - + #if CUDA_VER_13_2_UP // SpMVOp descriptors and plans for A and A_T (descr before plan so dtor destroys plan first) cusparse_spmvop_descr_wrapper_t spmv_op_descr_A_; diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index 8e371062fe..ddc60c5e22 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -457,17 +457,16 @@ void pdhg_solver_t::spmvop_At_y() cusparse_view_.current_AtY, cusparse_view_.current_AtY)); #else - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T, - cusparse_view_.dual_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.current_AtY, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_transpose.data(), - stream_view_)); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T, + cusparse_view_.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.current_AtY, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_transpose.data(), + stream_view_)); #endif } From f69f77fa7e287cf336000ff5c007818f7ad86840 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 18 May 2026 14:06:33 +0200 Subject: [PATCH 15/21] added dlsym checks to SpMVOp path --- cpp/src/pdlp/cusparse_view.cu | 15 ++++++++++++ cpp/src/pdlp/cusparse_view.hpp | 3 +++ cpp/src/pdlp/pdhg.cu | 44 ++++++++++++++++++---------------- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 18d2650cf4..2e5cc16dbc 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -1274,11 +1274,26 @@ bool is_cusparse_runtime_mixed_precision_supported() return (major > 12) || (major == 12 && minor >= 5); } +bool is_cusparse_runtime_spmvop_supported() +{ +#if CUDA_VER_13_2_UP + // Probe the runtimme to ensure cusparseSpMVOp is supported + static const bool supported = []() { + dlerror(); + return dlsym(RTLD_DEFAULT, "cusparseSpMVOp") != nullptr; + }(); + return supported; +#else + return false; +#endif +} + // Creates SpMVOp plans. Must be called after scale_problem() so plans use the scaled matrix. template void cusparse_view_t::create_spmv_op_plans(bool is_reflected) { #if CUDA_VER_13_2_UP + if (!is_cusparse_runtime_spmvop_supported()) { return; } CUSPARSE_CHECK(cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream())); // Prepare buffers for At_y SpMVOp size_t buffer_size_transpose = 0; diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index 23e28e11b3..b2242985a2 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -331,4 +331,7 @@ void my_cusparsespmm_preprocess(cusparseHandle_t handle, bool is_cusparse_runtime_mixed_precision_supported(); +// False if cuda version < 13.2 or runtime cuSPARSE does not export SpMVOp symbols. True otherwise. +bool is_cusparse_runtime_spmvop_supported(); + } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index ddc60c5e22..b754584d56 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -448,15 +448,18 @@ template void pdhg_solver_t::spmvop_At_y() { #if CUDA_VER_13_2_UP - RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); - RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_t_, - reusable_device_scalar_value_1_.data(), - reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_solution, - cusparse_view_.current_AtY, - cusparse_view_.current_AtY)); -#else + if (is_cusparse_runtime_spmvop_supported()) { + RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); + RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), + cusparse_view_.spmv_op_plan_A_t_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_solution, + cusparse_view_.current_AtY, + cusparse_view_.current_AtY)); + return; + } +#endif RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, reusable_device_scalar_value_1_.data(), @@ -467,22 +470,24 @@ void pdhg_solver_t::spmvop_At_y() CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view_.buffer_transpose.data(), stream_view_)); -#endif } template void pdhg_solver_t::spmvop_A_x() { #if CUDA_VER_13_2_UP - RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); - RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_, - reusable_device_scalar_value_1_.data(), - reusable_device_scalar_value_0_.data(), - cusparse_view_.reflected_primal_solution, - cusparse_view_.dual_gradient, - cusparse_view_.dual_gradient)); -#else + if (is_cusparse_runtime_spmvop_supported()) { + RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); + RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), + cusparse_view_.spmv_op_plan_A_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.reflected_primal_solution, + cusparse_view_.dual_gradient, + cusparse_view_.dual_gradient)); + return; + } +#endif RAFT_CUSPARSE_TRY( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -494,7 +499,6 @@ void pdhg_solver_t::spmvop_A_x() CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view_.buffer_non_transpose.data(), stream_view_)); -#endif } template From 238adacae3c4338f4195eae3646dba112f0090b9 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 18 May 2026 14:26:02 +0200 Subject: [PATCH 16/21] removed raw buffers/pointers and added doc --- cpp/src/pdlp/cusparse_view.cu | 20 ++++++++------------ cpp/src/pdlp/cusparse_view.hpp | 7 +++---- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 2e5cc16dbc..ce8f70cb90 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -187,11 +187,11 @@ void cusparse_spmvop_descr_wrapper_t::create(cusparseHandle_t handle, cusparseDnVecDescr_t vecY, cusparseDnVecDescr_t vecZ, cudaDataType computeType, - void* buffer) + rmm::device_uvector& buffer) { if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyDescr(descr_)); } RAFT_CUSPARSE_TRY( - cusparseSpMVOp_createDescr(handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer)); + cusparseSpMVOp_createDescr(handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer.data())); need_destruction_ = true; } @@ -224,12 +224,10 @@ cusparse_spmvop_plan_wrapper_t& cusparse_spmvop_plan_wrapper_t::operator=( } void cusparse_spmvop_plan_wrapper_t::create(cusparseHandle_t handle, - cusparseSpMVOpDescr_t descr, - char* lto_buffer, - size_t lto_buffer_size) + cusparseSpMVOpDescr_t descr) { if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyPlan(plan_)); } - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle, descr, &plan_, lto_buffer, lto_buffer_size)); + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle, descr, &plan_, nullptr, 0)); need_destruction_ = true; } @@ -1314,12 +1312,10 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) current_AtY, current_AtY, CUDA_R_64F, - buffer_transpose_spmvop.data()); + buffer_transpose_spmvop); - char* lto_buffer = NULL; - size_t lto_buffer_size = 0; spmv_op_plan_A_t_.create( - handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_t_, lto_buffer, lto_buffer_size); + handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_t); // Only prepare buffers for A_x if we are using reflected_halpern if (is_reflected) { @@ -1341,10 +1337,10 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) dual_gradient, dual_gradient, CUDA_R_64F, - buffer_non_transpose_spmvop.data()); + buffer_non_transpose_spmvop); spmv_op_plan_A_.create( - handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_, lto_buffer, lto_buffer_size); + handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_); } #endif } diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index b2242985a2..abb1fbe9f8 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -82,6 +82,7 @@ class cusparse_dn_mat_descr_wrapper_t { }; #if CUDA_VER_13_2_UP +// RAII wrapper around cusparse SpMVOp objects. All the buffers are owned by the cusparse_view_t. class cusparse_spmvop_descr_wrapper_t { public: cusparse_spmvop_descr_wrapper_t(); @@ -98,7 +99,7 @@ class cusparse_spmvop_descr_wrapper_t { cusparseDnVecDescr_t vecY, cusparseDnVecDescr_t vecZ, cudaDataType computeType, - void* buffer); + rmm::device_uvector& buffer); operator cusparseSpMVOpDescr_t() const; @@ -117,9 +118,7 @@ class cusparse_spmvop_plan_wrapper_t { cusparse_spmvop_plan_wrapper_t& operator=(const cusparse_spmvop_plan_wrapper_t& other) = delete; void create(cusparseHandle_t handle, - cusparseSpMVOpDescr_t descr, - char* lto_buffer, - size_t lto_buffer_size); + cusparseSpMVOpDescr_t descr); operator cusparseSpMVOpPlan_t() const; From 36c41cae0db35547f40cfb3c93b6097bde5a1f63 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 18 May 2026 14:27:31 +0200 Subject: [PATCH 17/21] small fix error name --- cpp/src/pdlp/cusparse_view.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index ce8f70cb90..327351bcac 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -1315,7 +1315,7 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) buffer_transpose_spmvop); spmv_op_plan_A_t_.create( - handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_t); + handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_t_); // Only prepare buffers for A_x if we are using reflected_halpern if (is_reflected) { From e1890ef876b9b693c973a595848a01333ccb54b4 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 18 May 2026 14:33:37 +0200 Subject: [PATCH 18/21] style --- cpp/src/pdlp/cusparse_view.cu | 13 +++++-------- cpp/src/pdlp/cusparse_view.hpp | 3 +-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 327351bcac..ca4f3ccf94 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -190,8 +190,8 @@ void cusparse_spmvop_descr_wrapper_t::create(cusparseHandle_t handle, rmm::device_uvector& buffer) { if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyDescr(descr_)); } - RAFT_CUSPARSE_TRY( - cusparseSpMVOp_createDescr(handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer.data())); + RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr( + handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer.data())); need_destruction_ = true; } @@ -223,8 +223,7 @@ cusparse_spmvop_plan_wrapper_t& cusparse_spmvop_plan_wrapper_t::operator=( return *this; } -void cusparse_spmvop_plan_wrapper_t::create(cusparseHandle_t handle, - cusparseSpMVOpDescr_t descr) +void cusparse_spmvop_plan_wrapper_t::create(cusparseHandle_t handle, cusparseSpMVOpDescr_t descr) { if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyPlan(plan_)); } RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle, descr, &plan_, nullptr, 0)); @@ -1314,8 +1313,7 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) CUDA_R_64F, buffer_transpose_spmvop); - spmv_op_plan_A_t_.create( - handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_t_); + spmv_op_plan_A_t_.create(handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_t_); // Only prepare buffers for A_x if we are using reflected_halpern if (is_reflected) { @@ -1339,8 +1337,7 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) CUDA_R_64F, buffer_non_transpose_spmvop); - spmv_op_plan_A_.create( - handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_); + spmv_op_plan_A_.create(handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_); } #endif } diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index abb1fbe9f8..bed0d0607f 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -117,8 +117,7 @@ class cusparse_spmvop_plan_wrapper_t { cusparse_spmvop_plan_wrapper_t& operator=(cusparse_spmvop_plan_wrapper_t&& other); cusparse_spmvop_plan_wrapper_t& operator=(const cusparse_spmvop_plan_wrapper_t& other) = delete; - void create(cusparseHandle_t handle, - cusparseSpMVOpDescr_t descr); + void create(cusparseHandle_t handle, cusparseSpMVOpDescr_t descr); operator cusparseSpMVOpPlan_t() const; From c907d9337e79f995c8f41f7414ecb741898b6889 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 18 May 2026 14:39:43 +0200 Subject: [PATCH 19/21] added guards for only double instantiation of spmvop --- cpp/src/pdlp/cusparse_view.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index ca4f3ccf94..4015308c95 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -1290,7 +1290,7 @@ template void cusparse_view_t::create_spmv_op_plans(bool is_reflected) { #if CUDA_VER_13_2_UP - if (!is_cusparse_runtime_spmvop_supported()) { return; } + if (!is_cusparse_runtime_spmvop_supported() || !(std::is_same_v)) { return; } CUSPARSE_CHECK(cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream())); // Prepare buffers for At_y SpMVOp size_t buffer_size_transpose = 0; From 99f455d420998589d59f3806fb9c24ee18183ab5 Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 18 May 2026 14:40:58 +0200 Subject: [PATCH 20/21] removed unused my_spmv_op --- cpp/src/pdlp/pdhg.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/pdlp/pdhg.hpp b/cpp/src/pdlp/pdhg.hpp index d16400bd3b..7eb4008eab 100644 --- a/cpp/src/pdlp/pdhg.hpp +++ b/cpp/src/pdlp/pdhg.hpp @@ -98,7 +98,6 @@ class pdhg_solver_t { void spmvop_At_y(); void spmvop_A_x(); - void my_spmvop(f_t* alpha, f_t* A, f_t* x, f_t* beta, f_t* y, f_t* result); bool batch_mode_{false}; raft::handle_t const* handle_ptr_{nullptr}; rmm::cuda_stream_view stream_view_; From 152fc19805ca197a9af69f3154d0ce9c5a2df69f Mon Sep 17 00:00:00 2001 From: Bulle Mostovoi Date: Mon, 18 May 2026 16:26:47 +0200 Subject: [PATCH 21/21] made all spmvop function dlsym resolved --- cpp/src/pdlp/cusparse_view.cu | 268 ++++++++++++++++++++------------- cpp/src/pdlp/cusparse_view.hpp | 14 ++ cpp/src/pdlp/pdhg.cu | 32 ++-- 3 files changed, 197 insertions(+), 117 deletions(-) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 4015308c95..6a36a21b0b 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -153,87 +153,6 @@ cusparse_dn_mat_descr_wrapper_t::operator cusparseDnMatDescr_t() const return descr_; } -#if CUDA_VER_13_2_UP -cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t() - : descr_(nullptr), need_destruction_(false) -{ -} - -cusparse_spmvop_descr_wrapper_t::~cusparse_spmvop_descr_wrapper_t() -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyDescr(descr_)); } -} - -cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t( - const cusparse_spmvop_descr_wrapper_t& other) - : descr_(other.descr_), need_destruction_(false) -{ -} - -cusparse_spmvop_descr_wrapper_t& cusparse_spmvop_descr_wrapper_t::operator=( - cusparse_spmvop_descr_wrapper_t&& other) -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyDescr(descr_)); } - descr_ = other.descr_; - need_destruction_ = other.need_destruction_; - other.need_destruction_ = false; - return *this; -} - -void cusparse_spmvop_descr_wrapper_t::create(cusparseHandle_t handle, - cusparseOperation_t opA, - cusparseSpMatDescr_t matA, - cusparseDnVecDescr_t vecX, - cusparseDnVecDescr_t vecY, - cusparseDnVecDescr_t vecZ, - cudaDataType computeType, - rmm::device_uvector& buffer) -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyDescr(descr_)); } - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createDescr( - handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer.data())); - need_destruction_ = true; -} - -cusparse_spmvop_descr_wrapper_t::operator cusparseSpMVOpDescr_t() const { return descr_; } - -cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t() - : plan_(nullptr), need_destruction_(false) -{ -} - -cusparse_spmvop_plan_wrapper_t::~cusparse_spmvop_plan_wrapper_t() -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY_NO_THROW(cusparseSpMVOp_destroyPlan(plan_)); } -} - -cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t( - const cusparse_spmvop_plan_wrapper_t& other) - : plan_(other.plan_), need_destruction_(false) -{ -} - -cusparse_spmvop_plan_wrapper_t& cusparse_spmvop_plan_wrapper_t::operator=( - cusparse_spmvop_plan_wrapper_t&& other) -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyPlan(plan_)); } - plan_ = other.plan_; - need_destruction_ = other.need_destruction_; - other.need_destruction_ = false; - return *this; -} - -void cusparse_spmvop_plan_wrapper_t::create(cusparseHandle_t handle, cusparseSpMVOpDescr_t descr) -{ - if (need_destruction_) { RAFT_CUSPARSE_TRY(cusparseSpMVOp_destroyPlan(plan_)); } - RAFT_CUSPARSE_TRY(cusparseSpMVOp_createPlan(handle, descr, &plan_, nullptr, 0)); - need_destruction_ = true; -} - -cusparse_spmvop_plan_wrapper_t::operator cusparseSpMVOpPlan_t() const { return plan_; } - -#endif - #if CUDA_VER_12_4_UP struct dynamic_load_runtime { static void* get_cusparse_runtime_handle() @@ -351,6 +270,153 @@ void my_cusparsespmm_preprocess(cusparseHandle_t handle, } #endif +#if CUDA_VER_13_2_UP +// SpMVOp symbols. resolved at runtime via dlsym +using cusparseSpMVOp_destroyDescr_sig = cusparse_sig; +using cusparseSpMVOp_destroyPlan_sig = cusparse_sig; +using cusparseSpMVOp_bufferSize_sig = cusparse_sig; +using cusparseSpMVOp_createDescr_sig = cusparse_sig; +using cusparseSpMVOp_createPlan_sig = + cusparse_sig; +using cusparseSpMVOp_sig = cusparse_sig; + +cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t() + : descr_(nullptr), need_destruction_(false) +{ +} + +cusparse_spmvop_descr_wrapper_t::~cusparse_spmvop_descr_wrapper_t() +{ + if (!need_destruction_) { return; } + static const auto destroy = + dynamic_load_runtime::function("cusparseSpMVOp_destroyDescr"); + RAFT_CUSPARSE_TRY_NO_THROW((*destroy)(descr_)); +} + +cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t( + const cusparse_spmvop_descr_wrapper_t& other) + : descr_(other.descr_), need_destruction_(false) +{ +} + +cusparse_spmvop_descr_wrapper_t& cusparse_spmvop_descr_wrapper_t::operator=( + cusparse_spmvop_descr_wrapper_t&& other) +{ + if (need_destruction_) { + static const auto destroy = dynamic_load_runtime::function( + "cusparseSpMVOp_destroyDescr"); + RAFT_CUSPARSE_TRY((*destroy)(descr_)); + } + descr_ = other.descr_; + need_destruction_ = other.need_destruction_; + other.need_destruction_ = false; + return *this; +} + +void cusparse_spmvop_descr_wrapper_t::create(cusparseHandle_t handle, + cusparseOperation_t opA, + cusparseSpMatDescr_t matA, + cusparseDnVecDescr_t vecX, + cusparseDnVecDescr_t vecY, + cusparseDnVecDescr_t vecZ, + cudaDataType computeType, + rmm::device_uvector& buffer) +{ + static const auto destroy = + dynamic_load_runtime::function("cusparseSpMVOp_destroyDescr"); + static const auto create = + dynamic_load_runtime::function("cusparseSpMVOp_createDescr"); + if (need_destruction_) { RAFT_CUSPARSE_TRY((*destroy)(descr_)); } + RAFT_CUSPARSE_TRY( + (*create)(handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer.data())); + need_destruction_ = true; +} + +cusparse_spmvop_descr_wrapper_t::operator cusparseSpMVOpDescr_t() const { return descr_; } + +cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t() + : plan_(nullptr), need_destruction_(false) +{ +} + +cusparse_spmvop_plan_wrapper_t::~cusparse_spmvop_plan_wrapper_t() +{ + if (!need_destruction_) { return; } + static const auto destroy = + dynamic_load_runtime::function("cusparseSpMVOp_destroyPlan"); + RAFT_CUSPARSE_TRY_NO_THROW((*destroy)(plan_)); +} + +cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t( + const cusparse_spmvop_plan_wrapper_t& other) + : plan_(other.plan_), need_destruction_(false) +{ +} + +cusparse_spmvop_plan_wrapper_t& cusparse_spmvop_plan_wrapper_t::operator=( + cusparse_spmvop_plan_wrapper_t&& other) +{ + if (need_destruction_) { + static const auto destroy = + dynamic_load_runtime::function("cusparseSpMVOp_destroyPlan"); + RAFT_CUSPARSE_TRY((*destroy)(plan_)); + } + plan_ = other.plan_; + need_destruction_ = other.need_destruction_; + other.need_destruction_ = false; + return *this; +} + +void cusparse_spmvop_plan_wrapper_t::create(cusparseHandle_t handle, cusparseSpMVOpDescr_t descr) +{ + static const auto destroy = + dynamic_load_runtime::function("cusparseSpMVOp_destroyPlan"); + static const auto create = + dynamic_load_runtime::function("cusparseSpMVOp_createPlan"); + if (need_destruction_) { RAFT_CUSPARSE_TRY((*destroy)(plan_)); } + // cuOpt does not supply user-provided LTO IR; pass nullptr/0 so cuSPARSE JITs internally. + RAFT_CUSPARSE_TRY((*create)(handle, descr, &plan_, /*ltoIRBuf=*/nullptr, /*ltoIRSize=*/0)); + need_destruction_ = true; +} + +cusparse_spmvop_plan_wrapper_t::operator cusparseSpMVOpPlan_t() const { return plan_; } + +void cusparse_spmvop_run(cusparseHandle_t handle, + cusparseSpMVOpPlan_t plan, + const void* alpha, + const void* beta, + cusparseDnVecDescr_t vecX, + cusparseDnVecDescr_t vecY, + cusparseDnVecDescr_t vecZ, + cudaStream_t stream) +{ + static const auto func = dynamic_load_runtime::function("cusparseSpMVOp"); + RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream)); + RAFT_CUSPARSE_TRY((*func)(handle, plan, alpha, beta, vecX, vecY, vecZ)); +} +#endif + // This cstr is used in pdhg, step size strategy and in cuPDLPx infeasible detection // A_T is owned by the scaled problem // It was already transposed in the scaled_problem version @@ -1275,10 +1341,8 @@ bool is_cusparse_runtime_spmvop_supported() { #if CUDA_VER_13_2_UP // Probe the runtimme to ensure cusparseSpMVOp is supported - static const bool supported = []() { - dlerror(); - return dlsym(RTLD_DEFAULT, "cusparseSpMVOp") != nullptr; - }(); + static const bool supported = + dynamic_load_runtime::function("cusparseSpMVOp").has_value(); return supported; #else return false; @@ -1291,17 +1355,19 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) { #if CUDA_VER_13_2_UP if (!is_cusparse_runtime_spmvop_supported() || !(std::is_same_v)) { return; } + static const auto buffer_size = + dynamic_load_runtime::function("cusparseSpMVOp_bufferSize"); CUSPARSE_CHECK(cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream())); // Prepare buffers for At_y SpMVOp size_t buffer_size_transpose = 0; - RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - A_T, - dual_solution, - current_AtY, - current_AtY, - CUDA_R_64F, - &buffer_size_transpose)); + RAFT_CUSPARSE_TRY((*buffer_size)(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A_T, + dual_solution, + current_AtY, + current_AtY, + CUDA_R_64F, + &buffer_size_transpose)); buffer_transpose_spmvop.resize(buffer_size_transpose, handle_ptr_->get_stream()); spmv_op_descr_A_t_.create(handle_ptr_->get_cusparse_handle(), @@ -1318,14 +1384,14 @@ void cusparse_view_t::create_spmv_op_plans(bool is_reflected) // Only prepare buffers for A_x if we are using reflected_halpern if (is_reflected) { size_t buffer_size_non_transpose = 0; - RAFT_CUSPARSE_TRY(cusparseSpMVOp_bufferSize(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - A, - reflected_primal_solution, - dual_gradient, - dual_gradient, - CUDA_R_64F, - &buffer_size_non_transpose)); + RAFT_CUSPARSE_TRY((*buffer_size)(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + A, + reflected_primal_solution, + dual_gradient, + dual_gradient, + CUDA_R_64F, + &buffer_size_non_transpose)); buffer_non_transpose_spmvop.resize(buffer_size_non_transpose, handle_ptr_->get_stream()); spmv_op_descr_A_.create(handle_ptr_->get_cusparse_handle(), diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index bed0d0607f..f6d68b21e3 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -332,4 +332,18 @@ bool is_cusparse_runtime_mixed_precision_supported(); // False if cuda version < 13.2 or runtime cuSPARSE does not export SpMVOp symbols. True otherwise. bool is_cusparse_runtime_spmvop_supported(); +#if CUDA_VER_13_2_UP +// Dispatches to the runtime cusparseSpMVOp via dlsym so callers (e.g., pdhg.cu) never +// reference the symbol statically. Caller must have verified +// is_cusparse_runtime_spmvop_supported(). +void cusparse_spmvop_run(cusparseHandle_t handle, + cusparseSpMVOpPlan_t plan, + const void* alpha, + const void* beta, + cusparseDnVecDescr_t vecX, + cusparseDnVecDescr_t vecY, + cusparseDnVecDescr_t vecZ, + cudaStream_t stream); +#endif + } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index b754584d56..b243023a28 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -449,14 +449,14 @@ void pdhg_solver_t::spmvop_At_y() { #if CUDA_VER_13_2_UP if (is_cusparse_runtime_spmvop_supported()) { - RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); - RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_t_, - reusable_device_scalar_value_1_.data(), - reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_solution, - cusparse_view_.current_AtY, - cusparse_view_.current_AtY)); + cusparse_spmvop_run(handle_ptr_->get_cusparse_handle(), + cusparse_view_.spmv_op_plan_A_t_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_solution, + cusparse_view_.current_AtY, + cusparse_view_.current_AtY, + stream_view_.value()); return; } #endif @@ -477,14 +477,14 @@ void pdhg_solver_t::spmvop_A_x() { #if CUDA_VER_13_2_UP if (is_cusparse_runtime_spmvop_supported()) { - RAFT_CUSPARSE_TRY(cusparseSetStream(handle_ptr_->get_cusparse_handle(), stream_view_.value())); - RAFT_CUSPARSE_TRY(cusparseSpMVOp(handle_ptr_->get_cusparse_handle(), - cusparse_view_.spmv_op_plan_A_, - reusable_device_scalar_value_1_.data(), - reusable_device_scalar_value_0_.data(), - cusparse_view_.reflected_primal_solution, - cusparse_view_.dual_gradient, - cusparse_view_.dual_gradient)); + cusparse_spmvop_run(handle_ptr_->get_cusparse_handle(), + cusparse_view_.spmv_op_plan_A_, + reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), + cusparse_view_.reflected_primal_solution, + cusparse_view_.dual_gradient, + cusparse_view_.dual_gradient, + stream_view_.value()); return; } #endif