Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
cccace9
replaced SpMV calls with SpMVOp calls in compute_At_y and compute_A_x
Bubullzz Mar 11, 2026
dd12e1a
small fix for full OK tests
Bubullzz Mar 11, 2026
2439f92
coding style and updated dependencies. still need to conda install li…
Bubullzz Mar 11, 2026
86be5b2
updated dependencies.yaml
Bubullzz Mar 23, 2026
81b8f77
added comments and removed a useless set_stream
Bubullzz Mar 23, 2026
c5f8db2
moved create_spmv_op_plans and added libnvjitlink-dev
Bubullzz Mar 23, 2026
583da59
added guards for spmvop compilation
Bubullzz Mar 23, 2026
4f041f1
added destructors
Bubullzz Mar 24, 2026
2a1360d
moved buffer_spmvop to allow <13.2 compilation
Bubullzz Mar 24, 2026
96a524a
removed cusparse_view destructor and added spmvop plamd and descripto…
Bubullzz Mar 25, 2026
6b8e78a
added cusparseSetStream for safety
Bubullzz Mar 25, 2026
a85b10f
made compute At_y and compute A_x spmv choosing more readable
Bubullzz Mar 25, 2026
68922b0
update broken dependency
Bubullzz May 15, 2026
1880f23
pre-commit run
Bubullzz May 15, 2026
f69f77f
added dlsym checks to SpMVOp path
Bubullzz May 18, 2026
238adac
removed raw buffers/pointers and added doc
Bubullzz May 18, 2026
36c41ca
small fix error name
Bubullzz May 18, 2026
e1890ef
style
Bubullzz May 18, 2026
c907d93
added guards for only double instantiation of spmvop
Bubullzz May 18, 2026
99f455d
removed unused my_spmv_op
Bubullzz May 18, 2026
45efb50
Merge branch 'main' into spmvop_3
Bubullzz May 18, 2026
152fc19
made all spmvop function dlsym resolved
Bubullzz May 18, 2026
f08facd
Merge branch 'main' into spmvop_3
Bubullzz May 18, 2026
5cce25f
Merge branch 'main' into spmvop_3
Bubullzz May 18, 2026
1ebd68c
Merge branch 'main' into spmvop_3
Bubullzz May 19, 2026
ad7177f
Merge branch 'main' into spmvop_3
Bubullzz May 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- libcusolver-dev
- libcusparse-dev
- libgrpc >=1.78.0,<1.80.0a0
- libnvjitlink-dev
- libprotobuf
- libraft-headers==26.6.*,>=0.0.0a0
- librmm==26.6.*,>=0.0.0a0
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-129_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- libcusolver-dev
- libcusparse-dev
- libgrpc >=1.78.0,<1.80.0a0
- libnvjitlink-dev
- libprotobuf
- libraft-headers==26.6.*,>=0.0.0a0
- librmm==26.6.*,>=0.0.0a0
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-132_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- libcusolver-dev
- libcusparse-dev
- libgrpc >=1.78.0,<1.80.0a0
- libnvjitlink-dev
- libprotobuf
- libraft-headers==26.6.*,>=0.0.0a0
- librmm==26.6.*,>=0.0.0a0
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-132_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ dependencies:
- libcusolver-dev
- libcusparse-dev
- libgrpc >=1.78.0,<1.80.0a0
- libnvjitlink-dev
- libprotobuf
- libraft-headers==26.6.*,>=0.0.0a0
- librmm==26.6.*,>=0.0.0a0
Expand Down
5 changes: 4 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,10 @@ set_target_properties(cuopt
CXX_SCAN_FOR_MODULES OFF
)

target_compile_definitions(cuopt PUBLIC "CUOPT_LOG_ACTIVE_LEVEL=RAPIDS_LOGGER_LOG_LEVEL_${LIBCUOPT_LOGGING_LEVEL}")
target_compile_definitions(cuopt
PUBLIC "CUOPT_LOG_ACTIVE_LEVEL=RAPIDS_LOGGER_LOG_LEVEL_${LIBCUOPT_LOGGING_LEVEL}"
PUBLIC CUSPARSE_ENABLE_EXPERIMENTAL_API
)

target_compile_options(cuopt
PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUOPT_CXX_FLAGS}>"
Expand Down
226 changes: 226 additions & 0 deletions cpp/src/pdlp/cusparse_view.cu
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,153 @@ void my_cusparsespmm_preprocess(cusparseHandle_t handle,
}
#endif

#if CUDA_VER_13_2_UP
// SpMVOp symbols. resolved at runtime via dlsym
using cusparseSpMVOp_destroyDescr_sig = cusparse_sig<cusparseSpMVOpDescr_t>;
using cusparseSpMVOp_destroyPlan_sig = cusparse_sig<cusparseSpMVOpPlan_t>;
using cusparseSpMVOp_bufferSize_sig = cusparse_sig<cusparseHandle_t,
cusparseOperation_t,
cusparseSpMatDescr_t,
cusparseDnVecDescr_t,
cusparseDnVecDescr_t,
cusparseDnVecDescr_t,
cudaDataType,
size_t*>;
using cusparseSpMVOp_createDescr_sig = cusparse_sig<cusparseHandle_t,
cusparseSpMVOpDescr_t*,
cusparseOperation_t,
cusparseSpMatDescr_t,
cusparseDnVecDescr_t,
cusparseDnVecDescr_t,
cusparseDnVecDescr_t,
cudaDataType,
void*>;
using cusparseSpMVOp_createPlan_sig =
cusparse_sig<cusparseHandle_t, cusparseSpMVOpDescr_t, cusparseSpMVOpPlan_t*, char*, size_t>;
using cusparseSpMVOp_sig = cusparse_sig<cusparseHandle_t,
cusparseSpMVOpPlan_t,
const void*,
const void*,
cusparseDnVecDescr_t,
cusparseDnVecDescr_t,
cusparseDnVecDescr_t>;

cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t()
: descr_(nullptr), need_destruction_(false)
{
}

cusparse_spmvop_descr_wrapper_t::~cusparse_spmvop_descr_wrapper_t()
{
if (!need_destruction_) { return; }
static const auto destroy =
dynamic_load_runtime::function<cusparseSpMVOp_destroyDescr_sig>("cusparseSpMVOp_destroyDescr");
RAFT_CUSPARSE_TRY_NO_THROW((*destroy)(descr_));
}

cusparse_spmvop_descr_wrapper_t::cusparse_spmvop_descr_wrapper_t(
const cusparse_spmvop_descr_wrapper_t& other)
: descr_(other.descr_), need_destruction_(false)
{
}

cusparse_spmvop_descr_wrapper_t& cusparse_spmvop_descr_wrapper_t::operator=(
cusparse_spmvop_descr_wrapper_t&& other)
{
if (need_destruction_) {
static const auto destroy = dynamic_load_runtime::function<cusparseSpMVOp_destroyDescr_sig>(
"cusparseSpMVOp_destroyDescr");
RAFT_CUSPARSE_TRY((*destroy)(descr_));
}
descr_ = other.descr_;
need_destruction_ = other.need_destruction_;
other.need_destruction_ = false;
return *this;
}

void cusparse_spmvop_descr_wrapper_t::create(cusparseHandle_t handle,
cusparseOperation_t opA,
cusparseSpMatDescr_t matA,
cusparseDnVecDescr_t vecX,
cusparseDnVecDescr_t vecY,
cusparseDnVecDescr_t vecZ,
cudaDataType computeType,
rmm::device_uvector<uint8_t>& buffer)
{
static const auto destroy =
dynamic_load_runtime::function<cusparseSpMVOp_destroyDescr_sig>("cusparseSpMVOp_destroyDescr");
static const auto create =
dynamic_load_runtime::function<cusparseSpMVOp_createDescr_sig>("cusparseSpMVOp_createDescr");
if (need_destruction_) { RAFT_CUSPARSE_TRY((*destroy)(descr_)); }
RAFT_CUSPARSE_TRY(
(*create)(handle, &descr_, opA, matA, vecX, vecY, vecZ, computeType, buffer.data()));
need_destruction_ = true;
}

cusparse_spmvop_descr_wrapper_t::operator cusparseSpMVOpDescr_t() const { return descr_; }

cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t()
: plan_(nullptr), need_destruction_(false)
{
}

cusparse_spmvop_plan_wrapper_t::~cusparse_spmvop_plan_wrapper_t()
{
if (!need_destruction_) { return; }
static const auto destroy =
dynamic_load_runtime::function<cusparseSpMVOp_destroyPlan_sig>("cusparseSpMVOp_destroyPlan");
RAFT_CUSPARSE_TRY_NO_THROW((*destroy)(plan_));
}

cusparse_spmvop_plan_wrapper_t::cusparse_spmvop_plan_wrapper_t(
const cusparse_spmvop_plan_wrapper_t& other)
: plan_(other.plan_), need_destruction_(false)
{
}

cusparse_spmvop_plan_wrapper_t& cusparse_spmvop_plan_wrapper_t::operator=(
cusparse_spmvop_plan_wrapper_t&& other)
{
if (need_destruction_) {
static const auto destroy =
dynamic_load_runtime::function<cusparseSpMVOp_destroyPlan_sig>("cusparseSpMVOp_destroyPlan");
RAFT_CUSPARSE_TRY((*destroy)(plan_));
}
plan_ = other.plan_;
need_destruction_ = other.need_destruction_;
other.need_destruction_ = false;
return *this;
}

void cusparse_spmvop_plan_wrapper_t::create(cusparseHandle_t handle, cusparseSpMVOpDescr_t descr)
{
static const auto destroy =
dynamic_load_runtime::function<cusparseSpMVOp_destroyPlan_sig>("cusparseSpMVOp_destroyPlan");
static const auto create =
dynamic_load_runtime::function<cusparseSpMVOp_createPlan_sig>("cusparseSpMVOp_createPlan");
if (need_destruction_) { RAFT_CUSPARSE_TRY((*destroy)(plan_)); }
// cuOpt does not supply user-provided LTO IR; pass nullptr/0 so cuSPARSE JITs internally.
RAFT_CUSPARSE_TRY((*create)(handle, descr, &plan_, /*ltoIRBuf=*/nullptr, /*ltoIRSize=*/0));
need_destruction_ = true;
}

cusparse_spmvop_plan_wrapper_t::operator cusparseSpMVOpPlan_t() const { return plan_; }

void cusparse_spmvop_run(cusparseHandle_t handle,
cusparseSpMVOpPlan_t plan,
const void* alpha,
const void* beta,
cusparseDnVecDescr_t vecX,
cusparseDnVecDescr_t vecY,
cusparseDnVecDescr_t vecZ,
cudaStream_t stream)
{
static const auto func = dynamic_load_runtime::function<cusparseSpMVOp_sig>("cusparseSpMVOp");
RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream));
RAFT_CUSPARSE_TRY((*func)(handle, plan, alpha, beta, vecX, vecY, vecZ));
}
#endif

// This cstr is used in pdhg, step size strategy and in cuPDLPx infeasible detection
// A_T is owned by the scaled problem
// It was already transposed in the scaled_problem version
Expand Down Expand Up @@ -304,6 +451,8 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
A_T_indices_{op_problem_scaled.reverse_constraints},
buffer_non_transpose{0, handle_ptr->get_stream()},
buffer_transpose{0, handle_ptr->get_stream()},
buffer_non_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_batch{0, handle_ptr->get_stream()},
buffer_non_transpose_batch{0, handle_ptr->get_stream()},
buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()},
Expand Down Expand Up @@ -717,6 +866,8 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
A_T_indices_{_A_T_indices},
buffer_non_transpose{0, handle_ptr->get_stream()},
buffer_transpose{0, handle_ptr->get_stream()},
buffer_non_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_batch{0, handle_ptr->get_stream()},
buffer_non_transpose_batch{0, handle_ptr->get_stream()},
buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()},
Expand Down Expand Up @@ -926,6 +1077,8 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
tmp_dual(existing_cusparse_view.tmp_dual),
buffer_non_transpose{0, handle_ptr->get_stream()},
buffer_transpose{0, handle_ptr->get_stream()},
buffer_non_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_batch{0, handle_ptr->get_stream()},
buffer_non_transpose_batch{0, handle_ptr->get_stream()},
buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()},
Expand Down Expand Up @@ -1041,6 +1194,8 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
: handle_ptr_(handle_ptr),
buffer_non_transpose{0, handle_ptr->get_stream()},
buffer_transpose{0, handle_ptr->get_stream()},
buffer_non_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_spmvop{0, handle_ptr->get_stream()},
buffer_transpose_batch{0, handle_ptr->get_stream()},
buffer_non_transpose_batch{0, handle_ptr->get_stream()},
buffer_transpose_batch_row_row_{0, handle_ptr->get_stream()},
Expand Down Expand Up @@ -1182,6 +1337,77 @@ bool is_cusparse_runtime_mixed_precision_supported()
return (major > 12) || (major == 12 && minor >= 5);
}

bool is_cusparse_runtime_spmvop_supported()
{
#if CUDA_VER_13_2_UP
// Probe the runtimme to ensure cusparseSpMVOp is supported
static const bool supported =
dynamic_load_runtime::function<cusparseSpMVOp_sig>("cusparseSpMVOp").has_value();
return supported;
#else
return false;
#endif
}

// Creates SpMVOp plans. Must be called after scale_problem() so plans use the scaled matrix.
template <typename i_t, typename f_t>
void cusparse_view_t<i_t, f_t>::create_spmv_op_plans(bool is_reflected)
{
#if CUDA_VER_13_2_UP
if (!is_cusparse_runtime_spmvop_supported() || !(std::is_same_v<f_t, double>)) { return; }
static const auto buffer_size =
dynamic_load_runtime::function<cusparseSpMVOp_bufferSize_sig>("cusparseSpMVOp_bufferSize");
CUSPARSE_CHECK(cusparseSetStream(handle_ptr_->get_cusparse_handle(), handle_ptr_->get_stream()));
// Prepare buffers for At_y SpMVOp
size_t buffer_size_transpose = 0;
RAFT_CUSPARSE_TRY((*buffer_size)(handle_ptr_->get_cusparse_handle(),
CUSPARSE_OPERATION_NON_TRANSPOSE,
A_T,
dual_solution,
current_AtY,
current_AtY,
CUDA_R_64F,
&buffer_size_transpose));
buffer_transpose_spmvop.resize(buffer_size_transpose, handle_ptr_->get_stream());

spmv_op_descr_A_t_.create(handle_ptr_->get_cusparse_handle(),
CUSPARSE_OPERATION_NON_TRANSPOSE,
A_T,
dual_solution,
current_AtY,
current_AtY,
CUDA_R_64F,
buffer_transpose_spmvop);

spmv_op_plan_A_t_.create(handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_t_);

// Only prepare buffers for A_x if we are using reflected_halpern
if (is_reflected) {
size_t buffer_size_non_transpose = 0;
RAFT_CUSPARSE_TRY((*buffer_size)(handle_ptr_->get_cusparse_handle(),
CUSPARSE_OPERATION_NON_TRANSPOSE,
A,
reflected_primal_solution,
dual_gradient,
dual_gradient,
CUDA_R_64F,
&buffer_size_non_transpose));
buffer_non_transpose_spmvop.resize(buffer_size_non_transpose, handle_ptr_->get_stream());

spmv_op_descr_A_.create(handle_ptr_->get_cusparse_handle(),
CUSPARSE_OPERATION_NON_TRANSPOSE,
A,
reflected_primal_solution,
dual_gradient,
dual_gradient,
CUDA_R_64F,
buffer_non_transpose_spmvop);

spmv_op_plan_A_.create(handle_ptr_->get_cusparse_handle(), spmv_op_descr_A_);
}
#endif
}
Comment thread
Bubullzz marked this conversation as resolved.

#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT
template class cusparse_sp_mat_descr_wrapper_t<int, float>;
template class cusparse_dn_vec_descr_wrapper_t<float>;
Expand Down
Loading
Loading